VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 100184

Last change on this file since 100184 was 100148, checked in by vboxsync, 12 months ago

VMM/IEM: Made the python scripts pick up and deal with the IEM_MC_DEFER_TO_CIMPL_[0-5]_RET short-hand macros. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 258.5 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 100148 2023-06-10 19:44:02Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 100148 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1828 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873 self.cchIndent = cchIndent;
1874
1875 def renderCode(self, cchIndent = 0):
1876 cchIndent += self.cchIndent;
1877 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1878 if self.fDecode:
1879 sRet = sRet.replace('\n', ' // C++ decode\n');
1880 else:
1881 sRet = sRet.replace('\n', ' // C++ normal\n');
1882 return sRet;
1883
1884class McCppCond(McStmtCond):
1885 """
1886 C++/C 'if' statement.
1887 """
1888 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1889 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1890 self.fDecode = fDecode;
1891 self.cchIndent = cchIndent;
1892
1893 def renderCode(self, cchIndent = 0):
1894 cchIndent += self.cchIndent;
1895 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1896 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1897 sRet += ' ' * cchIndent + '{\n';
1898 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1899 sRet += ' ' * cchIndent + '}\n';
1900 if self.aoElseBranch:
1901 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1902 sRet += ' ' * cchIndent + '{\n';
1903 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1904 sRet += ' ' * cchIndent + '}\n';
1905 return sRet;
1906
1907class McCppPreProc(McCppGeneric):
1908 """
1909 C++/C Preprocessor directive.
1910 """
1911 def __init__(self, sCode):
1912 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1913
1914 def renderCode(self, cchIndent = 0):
1915 return self.asParams[0] + '\n';
1916
1917
1918class McBlock(object):
1919 """
1920 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
1921 """
1922
1923 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1924 ## The source file containing the block.
1925 self.sSrcFile = sSrcFile;
1926 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
1927 self.iBeginLine = iBeginLine;
1928 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
1929 self.offBeginLine = offBeginLine;
1930 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
1931 self.iEndLine = -1;
1932 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
1933 self.offEndLine = 0;
1934 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
1935 self.offAfterEnd = 0;
1936 ## The function the block resides in.
1937 self.oFunction = oFunction;
1938 ## The name of the function the block resides in. DEPRECATED.
1939 self.sFunction = oFunction.sName;
1940 ## The block number within the function.
1941 self.iInFunction = iInFunction;
1942 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1943 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1944 ## Decoded statements in the block.
1945 self.aoStmts = [] # type: list(McStmt)
1946
1947 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
1948 """
1949 Completes the microcode block.
1950 """
1951 assert self.iEndLine == -1;
1952 self.iEndLine = iEndLine;
1953 self.offEndLine = offEndLine;
1954 self.offAfterEnd = offAfterEnd;
1955 self.asLines = asLines;
1956
1957 def raiseDecodeError(self, sRawCode, off, sMessage):
1958 """ Raises a decoding error. """
1959 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1960 iLine = sRawCode.count('\n', 0, off);
1961 raise ParserException('%s:%d:%d: parsing error: %s'
1962 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1963
1964 def raiseStmtError(self, sName, sMessage):
1965 """ Raises a statement parser error. """
1966 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1967
1968 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1969 """ Check the parameter count, raising an error it doesn't match. """
1970 if len(asParams) != cParamsExpected:
1971 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1972 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1973 return True;
1974
1975 @staticmethod
1976 def parseMcGeneric(oSelf, sName, asParams):
1977 """ Generic parser that returns a plain McStmt object. """
1978 _ = oSelf;
1979 return McStmt(sName, asParams);
1980
1981 @staticmethod
1982 def parseMcGenericCond(oSelf, sName, asParams):
1983 """ Generic parser that returns a plain McStmtCond object. """
1984 _ = oSelf;
1985 return McStmtCond(sName, asParams);
1986
1987 @staticmethod
1988 def parseMcBegin(oSelf, sName, asParams):
1989 """ IEM_MC_BEGIN """
1990 oSelf.checkStmtParamCount(sName, asParams, 2);
1991 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1992
1993 @staticmethod
1994 def parseMcArg(oSelf, sName, asParams):
1995 """ IEM_MC_ARG """
1996 oSelf.checkStmtParamCount(sName, asParams, 3);
1997 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1998
1999 @staticmethod
2000 def parseMcArgConst(oSelf, sName, asParams):
2001 """ IEM_MC_ARG_CONST """
2002 oSelf.checkStmtParamCount(sName, asParams, 4);
2003 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2004
2005 @staticmethod
2006 def parseMcArgLocalRef(oSelf, sName, asParams):
2007 """ IEM_MC_ARG_LOCAL_REF """
2008 oSelf.checkStmtParamCount(sName, asParams, 4);
2009 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2010
2011 @staticmethod
2012 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2013 """ IEM_MC_ARG_LOCAL_EFLAGS """
2014 oSelf.checkStmtParamCount(sName, asParams, 3);
2015 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2016 return (
2017 McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2018 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2019 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2020 );
2021
2022 @staticmethod
2023 def parseMcLocal(oSelf, sName, asParams):
2024 """ IEM_MC_LOCAL """
2025 oSelf.checkStmtParamCount(sName, asParams, 2);
2026 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2027
2028 @staticmethod
2029 def parseMcLocalConst(oSelf, sName, asParams):
2030 """ IEM_MC_LOCAL_CONST """
2031 oSelf.checkStmtParamCount(sName, asParams, 3);
2032 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2033
2034 @staticmethod
2035 def parseMcCallAImpl(oSelf, sName, asParams):
2036 """ IEM_MC_CALL_AIMPL_3|4 """
2037 cArgs = int(sName[-1]);
2038 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2039 return McStmtCall(sName, asParams, 1, 0);
2040
2041 @staticmethod
2042 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2043 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2044 cArgs = int(sName[-1]);
2045 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2046 return McStmtCall(sName, asParams, 0);
2047
2048 @staticmethod
2049 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2050 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2051 cArgs = int(sName[-1]);
2052 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2053 return McStmtCall(sName, asParams, 0);
2054
2055 @staticmethod
2056 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2057 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2058 cArgs = int(sName[-1]);
2059 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2060 return McStmtCall(sName, asParams, 0);
2061
2062 @staticmethod
2063 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2064 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2065 cArgs = int(sName[-1]);
2066 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2067 return McStmtCall(sName, asParams, 0);
2068
2069 @staticmethod
2070 def parseMcCallSseAImpl(oSelf, sName, asParams):
2071 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2072 cArgs = int(sName[-1]);
2073 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2074 return McStmtCall(sName, asParams, 0);
2075
2076 @staticmethod
2077 def parseMcCallCImpl(oSelf, sName, asParams):
2078 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2079 cArgs = int(sName[-1]);
2080 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2081 return McStmtCall(sName, asParams, 1);
2082
2083 @staticmethod
2084 def stripComments(sCode):
2085 """ Returns sCode with comments removed. """
2086 off = 0;
2087 while off < len(sCode):
2088 off = sCode.find('/', off);
2089 if off < 0 or off + 1 >= len(sCode):
2090 break;
2091
2092 if sCode[off + 1] == '/':
2093 # C++ comment.
2094 offEnd = sCode.find('\n', off + 2);
2095 if offEnd < 0:
2096 return sCode[:off].rstrip();
2097 sCode = sCode[ : off] + sCode[offEnd : ];
2098 off += 1;
2099
2100 elif sCode[off + 1] == '*':
2101 # C comment
2102 offEnd = sCode.find('*/', off + 2);
2103 if offEnd < 0:
2104 return sCode[:off].rstrip();
2105 sSep = ' ';
2106 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2107 sSep = '';
2108 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2109 off += len(sSep);
2110
2111 else:
2112 # Not a comment.
2113 off += 1;
2114 return sCode;
2115
2116 @staticmethod
2117 def extractParam(sCode, offParam):
2118 """
2119 Extracts the parameter value at offParam in sCode.
2120 Returns stripped value and the end offset of the terminating ',' or ')'.
2121 """
2122 # Extract it.
2123 cNesting = 0;
2124 offStart = offParam;
2125 while offParam < len(sCode):
2126 ch = sCode[offParam];
2127 if ch == '(':
2128 cNesting += 1;
2129 elif ch == ')':
2130 if cNesting == 0:
2131 break;
2132 cNesting -= 1;
2133 elif ch == ',' and cNesting == 0:
2134 break;
2135 offParam += 1;
2136 return (sCode[offStart : offParam].strip(), offParam);
2137
2138 @staticmethod
2139 def extractParams(sCode, offOpenParen):
2140 """
2141 Parses a parameter list.
2142 Returns the list of parameter values and the offset of the closing parentheses.
2143 Returns (None, len(sCode)) on if no closing parentheses was found.
2144 """
2145 assert sCode[offOpenParen] == '(';
2146 asParams = [];
2147 off = offOpenParen + 1;
2148 while off < len(sCode):
2149 ch = sCode[off];
2150 if ch.isspace():
2151 off += 1;
2152 elif ch != ')':
2153 (sParam, off) = McBlock.extractParam(sCode, off);
2154 asParams.append(sParam);
2155 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2156 if sCode[off] == ',':
2157 off += 1;
2158 else:
2159 return (asParams, off);
2160 return (None, off);
2161
2162 @staticmethod
2163 def findClosingBraces(sCode, off, offStop):
2164 """
2165 Finds the matching '}' for the '{' at off in sCode.
2166 Returns offset of the matching '}' on success, otherwise -1.
2167
2168 Note! Does not take comments into account.
2169 """
2170 cDepth = 1;
2171 off += 1;
2172 while off < offStop:
2173 offClose = sCode.find('}', off, offStop);
2174 if offClose < 0:
2175 break;
2176 cDepth += sCode.count('{', off, offClose);
2177 cDepth -= 1;
2178 if cDepth == 0:
2179 return offClose;
2180 off = offClose + 1;
2181 return -1;
2182
2183 @staticmethod
2184 def countSpacesAt(sCode, off, offStop):
2185 """ Returns the number of space characters at off in sCode. """
2186 offStart = off;
2187 while off < offStop and sCode[off].isspace():
2188 off += 1;
2189 return off - offStart;
2190
2191 @staticmethod
2192 def skipSpacesAt(sCode, off, offStop):
2193 """ Returns first offset at or after off for a non-space character. """
2194 return off + McBlock.countSpacesAt(sCode, off, offStop);
2195
2196 @staticmethod
2197 def isSubstrAt(sStr, off, sSubStr):
2198 """ Returns true of sSubStr is found at off in sStr. """
2199 return sStr[off : off + len(sSubStr)] == sSubStr;
2200
2201 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2202 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2203 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2204 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2205 + r')');
2206
2207 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2208 """
2209 Decodes sRawCode[off : offStop].
2210
2211 Returns list of McStmt instances.
2212 Raises ParserException on failure.
2213 """
2214 if offStop < 0:
2215 offStop = len(sRawCode);
2216 aoStmts = [];
2217 while off < offStop:
2218 ch = sRawCode[off];
2219
2220 #
2221 # Skip spaces and comments.
2222 #
2223 if ch.isspace():
2224 off += 1;
2225
2226 elif ch == '/':
2227 ch = sRawCode[off + 1];
2228 if ch == '/': # C++ comment.
2229 off = sRawCode.find('\n', off + 2);
2230 if off < 0:
2231 break;
2232 off += 1;
2233 elif ch == '*': # C comment.
2234 off = sRawCode.find('*/', off + 2);
2235 if off < 0:
2236 break;
2237 off += 2;
2238 else:
2239 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2240
2241 #
2242 # Is it a MC statement.
2243 #
2244 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2245 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2246 # Extract it and strip comments from it.
2247 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2248 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2249 if offEnd <= off:
2250 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2251 else:
2252 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2253 if offEnd <= off:
2254 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2255 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2256 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2257 offEnd -= 1;
2258 while offEnd > off and sRawCode[offEnd - 1].isspace():
2259 offEnd -= 1;
2260
2261 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2262
2263 # Isolate the statement name.
2264 offOpenParen = sRawStmt.find('(');
2265 if offOpenParen < 0:
2266 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2267 sName = sRawStmt[: offOpenParen].strip();
2268
2269 # Extract the parameters.
2270 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2271 if asParams is None:
2272 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2273 if offCloseParen + 1 != len(sRawStmt):
2274 self.raiseDecodeError(sRawCode, off,
2275 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2276
2277 # Hand it to the handler.
2278 fnParser = g_dMcStmtParsers.get(sName);
2279 if not fnParser:
2280 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2281 oStmt = fnParser(self, sName, asParams);
2282 if not isinstance(oStmt, (list, tuple)):
2283 aoStmts.append(oStmt);
2284 else:
2285 aoStmts.extend(oStmt);
2286
2287 #
2288 # If conditional, we need to parse the whole statement.
2289 #
2290 # For reasons of simplicity, we assume the following structure
2291 # and parse each branch in a recursive call:
2292 # IEM_MC_IF_XXX() {
2293 # IEM_MC_WHATEVER();
2294 # } IEM_MC_ELSE() {
2295 # IEM_MC_WHATEVER();
2296 # } IEM_MC_ENDIF();
2297 #
2298 if sName.startswith('IEM_MC_IF_'):
2299 if iLevel > 1:
2300 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2301
2302 # Find start of the IF block:
2303 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2304 if sRawCode[offBlock1] != '{':
2305 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2306
2307 # Find the end of it.
2308 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2309 if offBlock1End < 0:
2310 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2311
2312 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2313
2314 # Is there an else section?
2315 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2316 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2317 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2318 if sRawCode[off] != '(':
2319 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2320 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2321 if sRawCode[off] != ')':
2322 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2323
2324 # Find start of the ELSE block.
2325 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2326 if sRawCode[offBlock2] != '{':
2327 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2328
2329 # Find the end of it.
2330 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2331 if offBlock2End < 0:
2332 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2333
2334 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2335 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2336
2337 # Parse past the endif statement.
2338 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2339 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2340 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2341 if sRawCode[off] != '(':
2342 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2343 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2344 if sRawCode[off] != ')':
2345 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2346 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2347 if sRawCode[off] != ';':
2348 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2349 off += 1;
2350
2351 else:
2352 # Advance.
2353 off = offEnd + 1;
2354
2355 #
2356 # Otherwise it must be a C/C++ statement of sorts.
2357 #
2358 else:
2359 # Find the end of the statement. if and else requires special handling.
2360 sCondExpr = None;
2361 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2362 if oMatch:
2363 if oMatch.group(1)[-1] == '(':
2364 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2365 else:
2366 offEnd = oMatch.end();
2367 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2368 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2369 elif ch == '#':
2370 offEnd = sRawCode.find('\n', off, offStop);
2371 if offEnd < 0:
2372 offEnd = offStop;
2373 offEnd -= 1;
2374 while offEnd > off and sRawCode[offEnd - 1].isspace():
2375 offEnd -= 1;
2376 else:
2377 offEnd = sRawCode.find(';', off);
2378 if offEnd < 0:
2379 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2380
2381 # Check this and the following statement whether it might have
2382 # something to do with decoding. This is a statement filter
2383 # criteria when generating the threaded functions blocks.
2384 offNextEnd = sRawCode.find(';', offEnd + 1);
2385 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2386 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2387 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2388 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2389 );
2390
2391 if not oMatch:
2392 if ch != '#':
2393 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2394 else:
2395 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2396 off = offEnd + 1;
2397 elif oMatch.group(1).startswith('if'):
2398 #
2399 # if () xxx [else yyy] statement.
2400 #
2401 oStmt = McCppCond(sCondExpr, fDecode);
2402 aoStmts.append(oStmt);
2403 off = offEnd + 1;
2404
2405 # Following the if () we can either have a {} containing zero or more statements
2406 # or we have a single statement.
2407 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2408 if sRawCode[offBlock1] == '{':
2409 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2410 if offBlock1End < 0:
2411 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2412 offBlock1 += 1;
2413 else:
2414 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2415 if offBlock1End < 0:
2416 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2417
2418 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2419
2420 # The else is optional and can likewise be followed by {} or a single statement.
2421 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2422 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2423 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2424 if sRawCode[offBlock2] == '{':
2425 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2426 if offBlock2End < 0:
2427 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2428 offBlock2 += 1;
2429 else:
2430 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2431 if offBlock2End < 0:
2432 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2433
2434 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2435 off = offBlock2End + 1;
2436
2437 elif oMatch.group(1) == 'else':
2438 # Problematic 'else' branch, typically involving #ifdefs.
2439 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2440
2441 return aoStmts;
2442
2443 def decode(self):
2444 """
2445 Decodes the block, populating self.aoStmts if necessary.
2446 Returns the statement list.
2447 Raises ParserException on failure.
2448 """
2449 if not self.aoStmts:
2450 self.aoStmts = self.decodeCode(''.join(self.asLines));
2451 return self.aoStmts;
2452
2453
2454## IEM_MC_XXX -> parser dictionary.
2455# The raw table was generated via the following command
2456# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2457# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2458g_dMcStmtParsers = {
2459 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2460 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2461 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2462 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2463 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2464 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2465 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2466 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2467 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2468 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2469 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2470 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2471 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2472 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2473 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2474 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2475 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2476 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2477 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2478 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2479 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2480 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2481 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2482 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2483 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2484 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2485 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2486 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2487 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2488 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2489 'IEM_MC_ARG': McBlock.parseMcArg,
2490 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2491 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2492 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2493 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2494 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2495 'IEM_MC_ASSIGN_U8_SX_U64': McBlock.parseMcGeneric,
2496 'IEM_MC_ASSIGN_U32_SX_U64': McBlock.parseMcGeneric,
2497 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2498 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2499 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2500 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2501 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2502 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2503 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2504 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2505 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2506 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2507 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2508 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2509 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2510 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2511 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2512 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2513 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2514 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2515 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2516 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2517 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2518 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2519 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2520 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2521 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2522 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2523 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2524 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2525 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2526 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2527 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2528 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2529 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2530 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2531 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2532 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2533 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2534 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2535 'IEM_MC_DEFER_TO_CIMPL_0_RET': McBlock.parseMcGeneric,
2536 'IEM_MC_DEFER_TO_CIMPL_1_RET': McBlock.parseMcGeneric,
2537 'IEM_MC_DEFER_TO_CIMPL_2_RET': McBlock.parseMcGeneric,
2538 'IEM_MC_DEFER_TO_CIMPL_3_RET': McBlock.parseMcGeneric,
2539 'IEM_MC_END': McBlock.parseMcGeneric,
2540 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2541 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2542 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2543 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2544 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2545 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2546 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2547 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2548 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2549 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2550 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2551 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2552 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2553 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2554 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2555 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2556 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2557 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2558 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2559 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2560 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2561 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2562 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2563 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2564 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2565 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2566 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2567 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2568 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2569 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2570 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2571 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2572 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2573 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2574 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2575 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2576 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2577 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2578 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2579 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2580 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2581 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2582 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2583 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2584 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2585 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2586 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2587 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2588 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2589 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2590 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2591 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2592 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2593 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2594 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2595 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2596 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2597 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2598 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2599 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2600 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2601 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2602 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2603 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2604 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2605 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2606 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2607 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2608 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2609 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2610 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2611 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2612 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2613 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2614 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2615 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2616 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2617 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2618 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2619 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2620 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2621 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2622 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2623 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2624 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2625 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2626 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2627 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2628 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2629 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2630 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2631 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2632 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2633 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2634 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2635 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2636 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2637 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2638 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2639 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2640 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2641 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2642 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2643 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2644 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2645 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2646 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2647 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2648 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2649 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2650 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2651 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2652 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2653 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2654 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2655 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2656 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2657 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2658 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2659 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2660 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2661 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2662 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2663 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2664 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2665 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2666 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2667 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2668 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2669 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2670 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2671 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2672 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2673 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2674 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2675 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2676 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2677 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2678 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2679 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2680 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2681 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2682 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2683 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2684 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2685 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2686 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2687 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2688 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2689 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2690 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2691 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2692 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2693 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2694 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2695 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2696 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2697 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2698 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2699 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2700 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2701 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2702 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2703 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2704 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2705 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2706 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2707 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2708 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2709 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2710 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2711 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2712 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2713 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2714 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2715 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2716 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2717 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2718 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2719 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2720 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2721 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2722 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2723 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2724 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2725 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2726 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2727 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2728 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2729 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2730 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2731 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2732 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2733 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2734 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2735 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2736 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2737 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2738 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2739 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2740 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2741 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2742 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2743 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2744 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2745 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2746 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2747 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2748 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2749 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2750 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2751 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2752 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2753 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2754 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2755 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2756 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2757 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2758 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2759 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2760 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2761 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2762 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2763 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2764 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2765 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2766 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2767 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2768 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2769 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2770 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2771 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2772 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2773 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2774 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2775 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2776 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2777 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2778 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2779 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2780 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2781 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2782 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2783 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2784 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2785 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2786 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2787 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2788 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2789 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2790 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2791 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2792 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2793 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2794 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2795 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2796 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2797 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2798 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2799 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2800 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2801 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2802 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2803 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2804 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2805 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2806 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2807 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2808 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2809 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2810 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2811 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2812 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2813 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2814 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2815 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2816 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2817 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2818 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2819 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2820 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2821 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2822 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2823 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2824 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2825};
2826
2827## List of microcode blocks.
2828g_aoMcBlocks = [] # type: list(McBlock)
2829
2830
2831
2832class ParserException(Exception):
2833 """ Parser exception """
2834 def __init__(self, sMessage):
2835 Exception.__init__(self, sMessage);
2836
2837
2838class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2839 """
2840 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2841 """
2842
2843 ## @name Parser state.
2844 ## @{
2845 kiCode = 0;
2846 kiCommentMulti = 1;
2847 ## @}
2848
2849 class Macro(object):
2850 """ Macro """
2851 def __init__(self, sName, asArgs, sBody, iLine):
2852 self.sName = sName; ##< The macro name.
2853 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2854 self.sBody = sBody;
2855 self.iLine = iLine;
2856 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2857
2858 @staticmethod
2859 def _needSpace(ch):
2860 """ This is just to make the expanded output a bit prettier. """
2861 return ch.isspace() and ch != '(';
2862
2863 def expandMacro(self, oParent, asArgs = None):
2864 """ Expands the macro body with the given arguments. """
2865 _ = oParent;
2866 sBody = self.sBody;
2867
2868 if self.oReArgMatch:
2869 assert len(asArgs) == len(self.asArgs);
2870 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2871
2872 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2873 oMatch = self.oReArgMatch.search(sBody);
2874 while oMatch:
2875 sName = oMatch.group(2);
2876 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2877 sValue = dArgs[sName];
2878 sPre = '';
2879 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2880 sPre = ' ';
2881 sPost = '';
2882 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2883 sPost = ' ';
2884 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2885 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2886 else:
2887 assert not asArgs;
2888
2889 return sBody;
2890
2891
2892 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2893 self.sSrcFile = sSrcFile;
2894 self.asLines = asLines;
2895 self.iLine = 0;
2896 self.iState = self.kiCode;
2897 self.sComment = '';
2898 self.iCommentLine = 0;
2899 self.aoCurInstrs = [] # type: list(Instruction)
2900 self.oCurFunction = None # type: DecoderFunction
2901 self.iMcBlockInFunc = 0;
2902 self.oCurMcBlock = None # type: McBlock
2903 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2904 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2905 if oInheritMacrosFrom:
2906 self.dMacros = dict(oInheritMacrosFrom.dMacros);
2907 self.oReMacros = oInheritMacrosFrom.oReMacros;
2908
2909 assert sDefaultMap in g_dInstructionMaps;
2910 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2911
2912 self.cTotalInstr = 0;
2913 self.cTotalStubs = 0;
2914 self.cTotalTagged = 0;
2915 self.cTotalMcBlocks = 0;
2916
2917 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2918 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2919 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2920 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2921 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2922 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2923 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2924 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2925 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2926 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2927 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2928 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2929 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[0-5]_RET)\s*\(');
2930 self.fDebug = True;
2931 self.fDebugMc = False;
2932 self.fDebugPreProc = False;
2933
2934 self.dTagHandlers = {
2935 '@opbrief': self.parseTagOpBrief,
2936 '@opdesc': self.parseTagOpDesc,
2937 '@opmnemonic': self.parseTagOpMnemonic,
2938 '@op1': self.parseTagOpOperandN,
2939 '@op2': self.parseTagOpOperandN,
2940 '@op3': self.parseTagOpOperandN,
2941 '@op4': self.parseTagOpOperandN,
2942 '@oppfx': self.parseTagOpPfx,
2943 '@opmaps': self.parseTagOpMaps,
2944 '@opcode': self.parseTagOpcode,
2945 '@opcodesub': self.parseTagOpcodeSub,
2946 '@openc': self.parseTagOpEnc,
2947 '@opfltest': self.parseTagOpEFlags,
2948 '@opflmodify': self.parseTagOpEFlags,
2949 '@opflundef': self.parseTagOpEFlags,
2950 '@opflset': self.parseTagOpEFlags,
2951 '@opflclear': self.parseTagOpEFlags,
2952 '@ophints': self.parseTagOpHints,
2953 '@opdisenum': self.parseTagOpDisEnum,
2954 '@opmincpu': self.parseTagOpMinCpu,
2955 '@opcpuid': self.parseTagOpCpuId,
2956 '@opgroup': self.parseTagOpGroup,
2957 '@opunused': self.parseTagOpUnusedInvalid,
2958 '@opinvalid': self.parseTagOpUnusedInvalid,
2959 '@opinvlstyle': self.parseTagOpUnusedInvalid,
2960 '@optest': self.parseTagOpTest,
2961 '@optestign': self.parseTagOpTestIgnore,
2962 '@optestignore': self.parseTagOpTestIgnore,
2963 '@opcopytests': self.parseTagOpCopyTests,
2964 '@oponly': self.parseTagOpOnlyTest,
2965 '@oponlytest': self.parseTagOpOnlyTest,
2966 '@opxcpttype': self.parseTagOpXcptType,
2967 '@opstats': self.parseTagOpStats,
2968 '@opfunction': self.parseTagOpFunction,
2969 '@opdone': self.parseTagOpDone,
2970 };
2971 for i in range(48):
2972 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
2973 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
2974
2975 self.asErrors = [];
2976
2977 def raiseError(self, sMessage):
2978 """
2979 Raise error prefixed with the source and line number.
2980 """
2981 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
2982
2983 def raiseCommentError(self, iLineInComment, sMessage):
2984 """
2985 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
2986 """
2987 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2988
2989 def error(self, sMessage):
2990 """
2991 Adds an error.
2992 returns False;
2993 """
2994 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
2995 return False;
2996
2997 def errorOnLine(self, iLine, sMessage):
2998 """
2999 Adds an error.
3000 returns False;
3001 """
3002 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3003 return False;
3004
3005 def errorComment(self, iLineInComment, sMessage):
3006 """
3007 Adds a comment error.
3008 returns False;
3009 """
3010 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3011 return False;
3012
3013 def printErrors(self):
3014 """
3015 Print the errors to stderr.
3016 Returns number of errors.
3017 """
3018 if self.asErrors:
3019 sys.stderr.write(u''.join(self.asErrors));
3020 return len(self.asErrors);
3021
3022 def debug(self, sMessage):
3023 """
3024 For debugging.
3025 """
3026 if self.fDebug:
3027 print('debug: %s' % (sMessage,), file = sys.stderr);
3028
3029 def stripComments(self, sLine):
3030 """
3031 Returns sLine with comments stripped.
3032
3033 Complains if traces of incomplete multi-line comments are encountered.
3034 """
3035 sLine = self.oReComment.sub(" ", sLine);
3036 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3037 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3038 return sLine;
3039
3040 def parseFunctionTable(self, sLine):
3041 """
3042 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3043
3044 Note! Updates iLine as it consumes the whole table.
3045 """
3046
3047 #
3048 # Extract the table name.
3049 #
3050 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3051 oMap = g_dInstructionMapsByIemName.get(sName);
3052 if not oMap:
3053 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3054 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3055
3056 #
3057 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3058 # entries per byte:
3059 # no prefix, 066h prefix, f3h prefix, f2h prefix
3060 # Those tables has 256 & 32 entries respectively.
3061 #
3062 cEntriesPerByte = 4;
3063 cValidTableLength = 1024;
3064 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3065
3066 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3067 if oEntriesMatch:
3068 cEntriesPerByte = 1;
3069 cValidTableLength = int(oEntriesMatch.group(1));
3070 asPrefixes = (None,);
3071
3072 #
3073 # The next line should be '{' and nothing else.
3074 #
3075 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3076 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3077 self.iLine += 1;
3078
3079 #
3080 # Parse till we find the end of the table.
3081 #
3082 iEntry = 0;
3083 while self.iLine < len(self.asLines):
3084 # Get the next line and strip comments and spaces (assumes no
3085 # multi-line comments).
3086 sLine = self.asLines[self.iLine];
3087 self.iLine += 1;
3088 sLine = self.stripComments(sLine).strip();
3089
3090 # Split the line up into entries, expanding IEMOP_X4 usage.
3091 asEntries = sLine.split(',');
3092 for i in range(len(asEntries) - 1, -1, -1):
3093 sEntry = asEntries[i].strip();
3094 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3095 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3096 asEntries.insert(i + 1, sEntry);
3097 asEntries.insert(i + 1, sEntry);
3098 asEntries.insert(i + 1, sEntry);
3099 if sEntry:
3100 asEntries[i] = sEntry;
3101 else:
3102 del asEntries[i];
3103
3104 # Process the entries.
3105 for sEntry in asEntries:
3106 if sEntry in ('};', '}'):
3107 if iEntry != cValidTableLength:
3108 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3109 return True;
3110 if sEntry.startswith('iemOp_Invalid'):
3111 pass; # skip
3112 else:
3113 # Look up matching instruction by function.
3114 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3115 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3116 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3117 if aoInstr:
3118 if not isinstance(aoInstr, list):
3119 aoInstr = [aoInstr,];
3120 oInstr = None;
3121 for oCurInstr in aoInstr:
3122 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3123 pass;
3124 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3125 oCurInstr.sPrefix = sPrefix;
3126 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3127 oCurInstr.sOpcode = sOpcode;
3128 oCurInstr.sPrefix = sPrefix;
3129 else:
3130 continue;
3131 oInstr = oCurInstr;
3132 break;
3133 if not oInstr:
3134 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3135 aoInstr.append(oInstr);
3136 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3137 g_aoAllInstructions.append(oInstr);
3138 oMap.aoInstructions.append(oInstr);
3139 else:
3140 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3141 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3142 iEntry += 1;
3143
3144 return self.error('Unexpected end of file in PFNIEMOP table');
3145
3146 def addInstruction(self, iLine = None):
3147 """
3148 Adds an instruction.
3149 """
3150 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3151 g_aoAllInstructions.append(oInstr);
3152 self.aoCurInstrs.append(oInstr);
3153 return oInstr;
3154
3155 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3156 """
3157 Derives the mnemonic and operands from a IEM stats base name like string.
3158 """
3159 if oInstr.sMnemonic is None:
3160 asWords = sStats.split('_');
3161 oInstr.sMnemonic = asWords[0].lower();
3162 if len(asWords) > 1 and not oInstr.aoOperands:
3163 for sType in asWords[1:]:
3164 if sType in g_kdOpTypes:
3165 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3166 else:
3167 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3168 return False;
3169 return True;
3170
3171 def doneInstructionOne(self, oInstr, iLine):
3172 """
3173 Complete the parsing by processing, validating and expanding raw inputs.
3174 """
3175 assert oInstr.iLineCompleted is None;
3176 oInstr.iLineCompleted = iLine;
3177
3178 #
3179 # Specified instructions.
3180 #
3181 if oInstr.cOpTags > 0:
3182 if oInstr.sStats is None:
3183 pass;
3184
3185 #
3186 # Unspecified legacy stuff. We generally only got a few things to go on here.
3187 # /** Opcode 0x0f 0x00 /0. */
3188 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3189 #
3190 else:
3191 #if oInstr.sRawOldOpcodes:
3192 #
3193 #if oInstr.sMnemonic:
3194 pass;
3195
3196 #
3197 # Common defaults.
3198 #
3199
3200 # Guess mnemonic and operands from stats if the former is missing.
3201 if oInstr.sMnemonic is None:
3202 if oInstr.sStats is not None:
3203 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3204 elif oInstr.sFunction is not None:
3205 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3206
3207 # Derive the disassembler op enum constant from the mnemonic.
3208 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3209 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3210
3211 # Derive the IEM statistics base name from mnemonic and operand types.
3212 if oInstr.sStats is None:
3213 if oInstr.sFunction is not None:
3214 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3215 elif oInstr.sMnemonic is not None:
3216 oInstr.sStats = oInstr.sMnemonic;
3217 for oOperand in oInstr.aoOperands:
3218 if oOperand.sType:
3219 oInstr.sStats += '_' + oOperand.sType;
3220
3221 # Derive the IEM function name from mnemonic and operand types.
3222 if oInstr.sFunction is None:
3223 if oInstr.sMnemonic is not None:
3224 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3225 for oOperand in oInstr.aoOperands:
3226 if oOperand.sType:
3227 oInstr.sFunction += '_' + oOperand.sType;
3228 elif oInstr.sStats:
3229 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3230
3231 #
3232 # Apply default map and then add the instruction to all it's groups.
3233 #
3234 if not oInstr.aoMaps:
3235 oInstr.aoMaps = [ self.oDefaultMap, ];
3236 for oMap in oInstr.aoMaps:
3237 oMap.aoInstructions.append(oInstr);
3238
3239 #
3240 # Derive encoding from operands and maps.
3241 #
3242 if oInstr.sEncoding is None:
3243 if not oInstr.aoOperands:
3244 if oInstr.fUnused and oInstr.sSubOpcode:
3245 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3246 else:
3247 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3248 elif oInstr.aoOperands[0].usesModRM():
3249 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3250 or oInstr.onlyInVexMaps():
3251 oInstr.sEncoding = 'VEX.ModR/M';
3252 else:
3253 oInstr.sEncoding = 'ModR/M';
3254
3255 #
3256 # Check the opstat value and add it to the opstat indexed dictionary.
3257 #
3258 if oInstr.sStats:
3259 if oInstr.sStats not in g_dAllInstructionsByStat:
3260 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3261 else:
3262 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3263 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3264
3265 #
3266 # Add to function indexed dictionary. We allow multiple instructions per function.
3267 #
3268 if oInstr.sFunction:
3269 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3270 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3271 else:
3272 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3273
3274 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3275 return True;
3276
3277 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3278 """
3279 Done with current instruction.
3280 """
3281 for oInstr in self.aoCurInstrs:
3282 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3283 if oInstr.fStub:
3284 self.cTotalStubs += 1;
3285
3286 self.cTotalInstr += len(self.aoCurInstrs);
3287
3288 self.sComment = '';
3289 self.aoCurInstrs = [];
3290 if fEndOfFunction:
3291 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3292 if self.oCurFunction:
3293 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3294 self.oCurFunction = None;
3295 self.iMcBlockInFunc = 0;
3296 return True;
3297
3298 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3299 """
3300 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3301 is False, only None values and empty strings are replaced.
3302 """
3303 for oInstr in self.aoCurInstrs:
3304 if fOverwrite is not True:
3305 oOldValue = getattr(oInstr, sAttrib);
3306 if oOldValue is not None:
3307 continue;
3308 setattr(oInstr, sAttrib, oValue);
3309
3310 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3311 """
3312 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3313 If fOverwrite is False, only None values and empty strings are replaced.
3314 """
3315 for oInstr in self.aoCurInstrs:
3316 aoArray = getattr(oInstr, sAttrib);
3317 while len(aoArray) <= iEntry:
3318 aoArray.append(None);
3319 if fOverwrite is True or aoArray[iEntry] is None:
3320 aoArray[iEntry] = oValue;
3321
3322 def parseCommentOldOpcode(self, asLines):
3323 """ Deals with 'Opcode 0xff /4' like comments """
3324 asWords = asLines[0].split();
3325 if len(asWords) >= 2 \
3326 and asWords[0] == 'Opcode' \
3327 and ( asWords[1].startswith('0x')
3328 or asWords[1].startswith('0X')):
3329 asWords = asWords[:1];
3330 for iWord, sWord in enumerate(asWords):
3331 if sWord.startswith('0X'):
3332 sWord = '0x' + sWord[:2];
3333 asWords[iWord] = asWords;
3334 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3335
3336 return False;
3337
3338 def ensureInstructionForOpTag(self, iTagLine):
3339 """ Ensure there is an instruction for the op-tag being parsed. """
3340 if not self.aoCurInstrs:
3341 self.addInstruction(self.iCommentLine + iTagLine);
3342 for oInstr in self.aoCurInstrs:
3343 oInstr.cOpTags += 1;
3344 if oInstr.cOpTags == 1:
3345 self.cTotalTagged += 1;
3346 return self.aoCurInstrs[-1];
3347
3348 @staticmethod
3349 def flattenSections(aasSections):
3350 """
3351 Flattens multiline sections into stripped single strings.
3352 Returns list of strings, on section per string.
3353 """
3354 asRet = [];
3355 for asLines in aasSections:
3356 if asLines:
3357 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3358 return asRet;
3359
3360 @staticmethod
3361 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3362 """
3363 Flattens sections into a simple stripped string with newlines as
3364 section breaks. The final section does not sport a trailing newline.
3365 """
3366 # Typical: One section with a single line.
3367 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3368 return aasSections[0][0].strip();
3369
3370 sRet = '';
3371 for iSection, asLines in enumerate(aasSections):
3372 if asLines:
3373 if iSection > 0:
3374 sRet += sSectionSep;
3375 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3376 return sRet;
3377
3378
3379
3380 ## @name Tag parsers
3381 ## @{
3382
3383 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3384 """
3385 Tag: \@opbrief
3386 Value: Text description, multiple sections, appended.
3387
3388 Brief description. If not given, it's the first sentence from @opdesc.
3389 """
3390 oInstr = self.ensureInstructionForOpTag(iTagLine);
3391
3392 # Flatten and validate the value.
3393 sBrief = self.flattenAllSections(aasSections);
3394 if not sBrief:
3395 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3396 if sBrief[-1] != '.':
3397 sBrief = sBrief + '.';
3398 if len(sBrief) > 180:
3399 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3400 offDot = sBrief.find('.');
3401 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3402 offDot = sBrief.find('.', offDot + 1);
3403 if offDot >= 0 and offDot != len(sBrief) - 1:
3404 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3405
3406 # Update the instruction.
3407 if oInstr.sBrief is not None:
3408 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3409 % (sTag, oInstr.sBrief, sBrief,));
3410 _ = iEndLine;
3411 return True;
3412
3413 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3414 """
3415 Tag: \@opdesc
3416 Value: Text description, multiple sections, appended.
3417
3418 It is used to describe instructions.
3419 """
3420 oInstr = self.ensureInstructionForOpTag(iTagLine);
3421 if aasSections:
3422 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3423 return True;
3424
3425 _ = sTag; _ = iEndLine;
3426 return True;
3427
3428 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3429 """
3430 Tag: @opmenmonic
3431 Value: mnemonic
3432
3433 The 'mnemonic' value must be a valid C identifier string. Because of
3434 prefixes, groups and whatnot, there times when the mnemonic isn't that
3435 of an actual assembler mnemonic.
3436 """
3437 oInstr = self.ensureInstructionForOpTag(iTagLine);
3438
3439 # Flatten and validate the value.
3440 sMnemonic = self.flattenAllSections(aasSections);
3441 if not self.oReMnemonic.match(sMnemonic):
3442 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3443 if oInstr.sMnemonic is not None:
3444 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3445 % (sTag, oInstr.sMnemonic, sMnemonic,));
3446 oInstr.sMnemonic = sMnemonic
3447
3448 _ = iEndLine;
3449 return True;
3450
3451 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3452 """
3453 Tags: \@op1, \@op2, \@op3, \@op4
3454 Value: [where:]type
3455
3456 The 'where' value indicates where the operand is found, like the 'reg'
3457 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3458 a list.
3459
3460 The 'type' value indicates the operand type. These follow the types
3461 given in the opcode tables in the CPU reference manuals.
3462 See Instruction.kdOperandTypes for a list.
3463
3464 """
3465 oInstr = self.ensureInstructionForOpTag(iTagLine);
3466 idxOp = int(sTag[-1]) - 1;
3467 assert 0 <= idxOp < 4;
3468
3469 # flatten, split up, and validate the "where:type" value.
3470 sFlattened = self.flattenAllSections(aasSections);
3471 asSplit = sFlattened.split(':');
3472 if len(asSplit) == 1:
3473 sType = asSplit[0];
3474 sWhere = None;
3475 elif len(asSplit) == 2:
3476 (sWhere, sType) = asSplit;
3477 else:
3478 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3479
3480 if sType not in g_kdOpTypes:
3481 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3482 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3483 if sWhere is None:
3484 sWhere = g_kdOpTypes[sType][1];
3485 elif sWhere not in g_kdOpLocations:
3486 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3487 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3488
3489 # Insert the operand, refusing to overwrite an existing one.
3490 while idxOp >= len(oInstr.aoOperands):
3491 oInstr.aoOperands.append(None);
3492 if oInstr.aoOperands[idxOp] is not None:
3493 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3494 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3495 sWhere, sType,));
3496 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3497
3498 _ = iEndLine;
3499 return True;
3500
3501 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3502 """
3503 Tag: \@opmaps
3504 Value: map[,map2]
3505
3506 Indicates which maps the instruction is in. There is a default map
3507 associated with each input file.
3508 """
3509 oInstr = self.ensureInstructionForOpTag(iTagLine);
3510
3511 # Flatten, split up and validate the value.
3512 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3513 asMaps = sFlattened.split(',');
3514 if not asMaps:
3515 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3516 for sMap in asMaps:
3517 if sMap not in g_dInstructionMaps:
3518 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3519 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3520
3521 # Add the maps to the current list. Throw errors on duplicates.
3522 for oMap in oInstr.aoMaps:
3523 if oMap.sName in asMaps:
3524 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3525
3526 for sMap in asMaps:
3527 oMap = g_dInstructionMaps[sMap];
3528 if oMap not in oInstr.aoMaps:
3529 oInstr.aoMaps.append(oMap);
3530 else:
3531 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3532
3533 _ = iEndLine;
3534 return True;
3535
3536 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3537 """
3538 Tag: \@oppfx
3539 Value: n/a|none|0x66|0xf3|0xf2
3540
3541 Required prefix for the instruction. (In a (E)VEX context this is the
3542 value of the 'pp' field rather than an actual prefix.)
3543 """
3544 oInstr = self.ensureInstructionForOpTag(iTagLine);
3545
3546 # Flatten and validate the value.
3547 sFlattened = self.flattenAllSections(aasSections);
3548 asPrefixes = sFlattened.split();
3549 if len(asPrefixes) > 1:
3550 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3551
3552 sPrefix = asPrefixes[0].lower();
3553 if sPrefix == 'none':
3554 sPrefix = 'none';
3555 elif sPrefix == 'n/a':
3556 sPrefix = None;
3557 else:
3558 if len(sPrefix) == 2:
3559 sPrefix = '0x' + sPrefix;
3560 if not _isValidOpcodeByte(sPrefix):
3561 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3562
3563 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3564 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3565
3566 # Set it.
3567 if oInstr.sPrefix is not None:
3568 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3569 oInstr.sPrefix = sPrefix;
3570
3571 _ = iEndLine;
3572 return True;
3573
3574 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3575 """
3576 Tag: \@opcode
3577 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3578
3579 The opcode byte or sub-byte for the instruction in the context of a map.
3580 """
3581 oInstr = self.ensureInstructionForOpTag(iTagLine);
3582
3583 # Flatten and validate the value.
3584 sOpcode = self.flattenAllSections(aasSections);
3585 if _isValidOpcodeByte(sOpcode):
3586 pass;
3587 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3588 pass;
3589 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3590 pass;
3591 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3592 pass;
3593 else:
3594 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3595
3596 # Set it.
3597 if oInstr.sOpcode is not None:
3598 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3599 oInstr.sOpcode = sOpcode;
3600
3601 _ = iEndLine;
3602 return True;
3603
3604 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3605 """
3606 Tag: \@opcodesub
3607 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3608 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3609
3610 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3611 represents exactly two different instructions. The more proper way would
3612 be to go via maps with two members, but this is faster.
3613 """
3614 oInstr = self.ensureInstructionForOpTag(iTagLine);
3615
3616 # Flatten and validate the value.
3617 sSubOpcode = self.flattenAllSections(aasSections);
3618 if sSubOpcode not in g_kdSubOpcodes:
3619 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3620 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3621
3622 # Set it.
3623 if oInstr.sSubOpcode is not None:
3624 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3625 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3626 oInstr.sSubOpcode = sSubOpcode;
3627
3628 _ = iEndLine;
3629 return True;
3630
3631 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3632 """
3633 Tag: \@openc
3634 Value: ModR/M|fixed|prefix|<map name>
3635
3636 The instruction operand encoding style.
3637 """
3638 oInstr = self.ensureInstructionForOpTag(iTagLine);
3639
3640 # Flatten and validate the value.
3641 sEncoding = self.flattenAllSections(aasSections);
3642 if sEncoding in g_kdEncodings:
3643 pass;
3644 elif sEncoding in g_dInstructionMaps:
3645 pass;
3646 elif not _isValidOpcodeByte(sEncoding):
3647 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3648
3649 # Set it.
3650 if oInstr.sEncoding is not None:
3651 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3652 % ( sTag, oInstr.sEncoding, sEncoding,));
3653 oInstr.sEncoding = sEncoding;
3654
3655 _ = iEndLine;
3656 return True;
3657
3658 ## EFlags tag to Instruction attribute name.
3659 kdOpFlagToAttr = {
3660 '@opfltest': 'asFlTest',
3661 '@opflmodify': 'asFlModify',
3662 '@opflundef': 'asFlUndefined',
3663 '@opflset': 'asFlSet',
3664 '@opflclear': 'asFlClear',
3665 };
3666
3667 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3668 """
3669 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3670 Value: <eflags specifier>
3671
3672 """
3673 oInstr = self.ensureInstructionForOpTag(iTagLine);
3674
3675 # Flatten, split up and validate the values.
3676 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3677 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3678 asFlags = [];
3679 else:
3680 fRc = True;
3681 for iFlag, sFlag in enumerate(asFlags):
3682 if sFlag not in g_kdEFlagsMnemonics:
3683 if sFlag.strip() in g_kdEFlagsMnemonics:
3684 asFlags[iFlag] = sFlag.strip();
3685 else:
3686 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3687 if not fRc:
3688 return False;
3689
3690 # Set them.
3691 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3692 if asOld is not None:
3693 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3694 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3695
3696 _ = iEndLine;
3697 return True;
3698
3699 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3700 """
3701 Tag: \@ophints
3702 Value: Comma or space separated list of flags and hints.
3703
3704 This covers the disassembler flags table and more.
3705 """
3706 oInstr = self.ensureInstructionForOpTag(iTagLine);
3707
3708 # Flatten as a space separated list, split it up and validate the values.
3709 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3710 if len(asHints) == 1 and asHints[0].lower() == 'none':
3711 asHints = [];
3712 else:
3713 fRc = True;
3714 for iHint, sHint in enumerate(asHints):
3715 if sHint not in g_kdHints:
3716 if sHint.strip() in g_kdHints:
3717 sHint[iHint] = sHint.strip();
3718 else:
3719 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3720 if not fRc:
3721 return False;
3722
3723 # Append them.
3724 for sHint in asHints:
3725 if sHint not in oInstr.dHints:
3726 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3727 else:
3728 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3729
3730 _ = iEndLine;
3731 return True;
3732
3733 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3734 """
3735 Tag: \@opdisenum
3736 Value: OP_XXXX
3737
3738 This is for select a specific (legacy) disassembler enum value for the
3739 instruction.
3740 """
3741 oInstr = self.ensureInstructionForOpTag(iTagLine);
3742
3743 # Flatten and split.
3744 asWords = self.flattenAllSections(aasSections).split();
3745 if len(asWords) != 1:
3746 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3747 if not asWords:
3748 return False;
3749 sDisEnum = asWords[0];
3750 if not self.oReDisEnum.match(sDisEnum):
3751 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3752 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3753
3754 # Set it.
3755 if oInstr.sDisEnum is not None:
3756 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3757 oInstr.sDisEnum = sDisEnum;
3758
3759 _ = iEndLine;
3760 return True;
3761
3762 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3763 """
3764 Tag: \@opmincpu
3765 Value: <simple CPU name>
3766
3767 Indicates when this instruction was introduced.
3768 """
3769 oInstr = self.ensureInstructionForOpTag(iTagLine);
3770
3771 # Flatten the value, split into words, make sure there's just one, valid it.
3772 asCpus = self.flattenAllSections(aasSections).split();
3773 if len(asCpus) > 1:
3774 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3775
3776 sMinCpu = asCpus[0];
3777 if sMinCpu in g_kdCpuNames:
3778 oInstr.sMinCpu = sMinCpu;
3779 else:
3780 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3781 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3782
3783 # Set it.
3784 if oInstr.sMinCpu is None:
3785 oInstr.sMinCpu = sMinCpu;
3786 elif oInstr.sMinCpu != sMinCpu:
3787 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3788
3789 _ = iEndLine;
3790 return True;
3791
3792 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3793 """
3794 Tag: \@opcpuid
3795 Value: none | <CPUID flag specifier>
3796
3797 CPUID feature bit which is required for the instruction to be present.
3798 """
3799 oInstr = self.ensureInstructionForOpTag(iTagLine);
3800
3801 # Flatten as a space separated list, split it up and validate the values.
3802 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3803 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3804 asCpuIds = [];
3805 else:
3806 fRc = True;
3807 for iCpuId, sCpuId in enumerate(asCpuIds):
3808 if sCpuId not in g_kdCpuIdFlags:
3809 if sCpuId.strip() in g_kdCpuIdFlags:
3810 sCpuId[iCpuId] = sCpuId.strip();
3811 else:
3812 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3813 if not fRc:
3814 return False;
3815
3816 # Append them.
3817 for sCpuId in asCpuIds:
3818 if sCpuId not in oInstr.asCpuIds:
3819 oInstr.asCpuIds.append(sCpuId);
3820 else:
3821 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3822
3823 _ = iEndLine;
3824 return True;
3825
3826 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3827 """
3828 Tag: \@opgroup
3829 Value: op_grp1[_subgrp2[_subsubgrp3]]
3830
3831 Instruction grouping.
3832 """
3833 oInstr = self.ensureInstructionForOpTag(iTagLine);
3834
3835 # Flatten as a space separated list, split it up and validate the values.
3836 asGroups = self.flattenAllSections(aasSections).split();
3837 if len(asGroups) != 1:
3838 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3839 sGroup = asGroups[0];
3840 if not self.oReGroupName.match(sGroup):
3841 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3842 % (sTag, sGroup, self.oReGroupName.pattern));
3843
3844 # Set it.
3845 if oInstr.sGroup is not None:
3846 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3847 oInstr.sGroup = sGroup;
3848
3849 _ = iEndLine;
3850 return True;
3851
3852 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3853 """
3854 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3855 Value: <invalid opcode behaviour style>
3856
3857 The \@opunused indicates the specification is for a currently unused
3858 instruction encoding.
3859
3860 The \@opinvalid indicates the specification is for an invalid currently
3861 instruction encoding (like UD2).
3862
3863 The \@opinvlstyle just indicates how CPUs decode the instruction when
3864 not supported (\@opcpuid, \@opmincpu) or disabled.
3865 """
3866 oInstr = self.ensureInstructionForOpTag(iTagLine);
3867
3868 # Flatten as a space separated list, split it up and validate the values.
3869 asStyles = self.flattenAllSections(aasSections).split();
3870 if len(asStyles) != 1:
3871 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3872 sStyle = asStyles[0];
3873 if sStyle not in g_kdInvalidStyles:
3874 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3875 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3876 # Set it.
3877 if oInstr.sInvalidStyle is not None:
3878 return self.errorComment(iTagLine,
3879 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3880 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3881 oInstr.sInvalidStyle = sStyle;
3882 if sTag == '@opunused':
3883 oInstr.fUnused = True;
3884 elif sTag == '@opinvalid':
3885 oInstr.fInvalid = True;
3886
3887 _ = iEndLine;
3888 return True;
3889
3890 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3891 """
3892 Tag: \@optest
3893 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3894 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3895
3896 The main idea here is to generate basic instruction tests.
3897
3898 The probably simplest way of handling the diverse input, would be to use
3899 it to produce size optimized byte code for a simple interpreter that
3900 modifies the register input and output states.
3901
3902 An alternative to the interpreter would be creating multiple tables,
3903 but that becomes rather complicated wrt what goes where and then to use
3904 them in an efficient manner.
3905 """
3906 oInstr = self.ensureInstructionForOpTag(iTagLine);
3907
3908 #
3909 # Do it section by section.
3910 #
3911 for asSectionLines in aasSections:
3912 #
3913 # Sort the input into outputs, inputs and selector conditions.
3914 #
3915 sFlatSection = self.flattenAllSections([asSectionLines,]);
3916 if not sFlatSection:
3917 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3918 continue;
3919 oTest = InstructionTest(oInstr);
3920
3921 asSelectors = [];
3922 asInputs = [];
3923 asOutputs = [];
3924 asCur = asOutputs;
3925 fRc = True;
3926 asWords = sFlatSection.split();
3927 for iWord in range(len(asWords) - 1, -1, -1):
3928 sWord = asWords[iWord];
3929 # Check for array switchers.
3930 if sWord == '->':
3931 if asCur != asOutputs:
3932 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3933 break;
3934 asCur = asInputs;
3935 elif sWord == '/':
3936 if asCur != asInputs:
3937 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3938 break;
3939 asCur = asSelectors;
3940 else:
3941 asCur.insert(0, sWord);
3942
3943 #
3944 # Validate and add selectors.
3945 #
3946 for sCond in asSelectors:
3947 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3948 oSelector = None;
3949 for sOp in TestSelector.kasCompareOps:
3950 off = sCondExp.find(sOp);
3951 if off >= 0:
3952 sVariable = sCondExp[:off];
3953 sValue = sCondExp[off + len(sOp):];
3954 if sVariable in TestSelector.kdVariables:
3955 if sValue in TestSelector.kdVariables[sVariable]:
3956 oSelector = TestSelector(sVariable, sOp, sValue);
3957 else:
3958 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
3959 % ( sTag, sValue, sCond,
3960 TestSelector.kdVariables[sVariable].keys(),));
3961 else:
3962 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
3963 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
3964 break;
3965 if oSelector is not None:
3966 for oExisting in oTest.aoSelectors:
3967 if oExisting.sVariable == oSelector.sVariable:
3968 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
3969 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
3970 oTest.aoSelectors.append(oSelector);
3971 else:
3972 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
3973
3974 #
3975 # Validate outputs and inputs, adding them to the test as we go along.
3976 #
3977 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
3978 asValidFieldKinds = [ 'both', sDesc, ];
3979 for sItem in asItems:
3980 oItem = None;
3981 for sOp in TestInOut.kasOperators:
3982 off = sItem.find(sOp);
3983 if off < 0:
3984 continue;
3985 sField = sItem[:off];
3986 sValueType = sItem[off + len(sOp):];
3987 if sField in TestInOut.kdFields \
3988 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
3989 asSplit = sValueType.split(':', 1);
3990 sValue = asSplit[0];
3991 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
3992 if sType in TestInOut.kdTypes:
3993 oValid = TestInOut.kdTypes[sType].validate(sValue);
3994 if oValid is True:
3995 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
3996 oItem = TestInOut(sField, sOp, sValue, sType);
3997 else:
3998 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
3999 % ( sTag, sDesc, sItem, ));
4000 else:
4001 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4002 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4003 else:
4004 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4005 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4006 else:
4007 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4008 % ( sTag, sDesc, sField, sItem,
4009 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4010 if asVal[1] in asValidFieldKinds]),));
4011 break;
4012 if oItem is not None:
4013 for oExisting in aoDst:
4014 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4015 self.errorComment(iTagLine,
4016 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4017 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4018 aoDst.append(oItem);
4019 else:
4020 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4021
4022 #
4023 # .
4024 #
4025 if fRc:
4026 oInstr.aoTests.append(oTest);
4027 else:
4028 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4029 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4030 % (sTag, asSelectors, asInputs, asOutputs,));
4031
4032 _ = iEndLine;
4033 return True;
4034
4035 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4036 """
4037 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4038 """
4039 oInstr = self.ensureInstructionForOpTag(iTagLine);
4040
4041 iTest = 0;
4042 if sTag[-1] == ']':
4043 iTest = int(sTag[8:-1]);
4044 else:
4045 iTest = int(sTag[7:]);
4046
4047 if iTest != len(oInstr.aoTests):
4048 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4049 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4050
4051 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4052 """
4053 Tag: \@optestign | \@optestignore
4054 Value: <value is ignored>
4055
4056 This is a simple trick to ignore a test while debugging another.
4057
4058 See also \@oponlytest.
4059 """
4060 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4061 return True;
4062
4063 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4064 """
4065 Tag: \@opcopytests
4066 Value: <opstat | function> [..]
4067 Example: \@opcopytests add_Eb_Gb
4068
4069 Trick to avoid duplicating tests for different encodings of the same
4070 operation.
4071 """
4072 oInstr = self.ensureInstructionForOpTag(iTagLine);
4073
4074 # Flatten, validate and append the copy job to the instruction. We execute
4075 # them after parsing all the input so we can handle forward references.
4076 asToCopy = self.flattenAllSections(aasSections).split();
4077 if not asToCopy:
4078 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4079 for sToCopy in asToCopy:
4080 if sToCopy not in oInstr.asCopyTests:
4081 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4082 oInstr.asCopyTests.append(sToCopy);
4083 else:
4084 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4085 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4086 else:
4087 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4088
4089 _ = iEndLine;
4090 return True;
4091
4092 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4093 """
4094 Tag: \@oponlytest | \@oponly
4095 Value: none
4096
4097 Only test instructions with this tag. This is a trick that is handy
4098 for singling out one or two new instructions or tests.
4099
4100 See also \@optestignore.
4101 """
4102 oInstr = self.ensureInstructionForOpTag(iTagLine);
4103
4104 # Validate and add instruction to only test dictionary.
4105 sValue = self.flattenAllSections(aasSections).strip();
4106 if sValue:
4107 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4108
4109 if oInstr not in g_aoOnlyTestInstructions:
4110 g_aoOnlyTestInstructions.append(oInstr);
4111
4112 _ = iEndLine;
4113 return True;
4114
4115 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4116 """
4117 Tag: \@opxcpttype
4118 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4119
4120 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4121 """
4122 oInstr = self.ensureInstructionForOpTag(iTagLine);
4123
4124 # Flatten as a space separated list, split it up and validate the values.
4125 asTypes = self.flattenAllSections(aasSections).split();
4126 if len(asTypes) != 1:
4127 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4128 sType = asTypes[0];
4129 if sType not in g_kdXcptTypes:
4130 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4131 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4132 # Set it.
4133 if oInstr.sXcptType is not None:
4134 return self.errorComment(iTagLine,
4135 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4136 % ( sTag, oInstr.sXcptType, sType,));
4137 oInstr.sXcptType = sType;
4138
4139 _ = iEndLine;
4140 return True;
4141
4142 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4143 """
4144 Tag: \@opfunction
4145 Value: <VMM function name>
4146
4147 This is for explicitly setting the IEM function name. Normally we pick
4148 this up from the FNIEMOP_XXX macro invocation after the description, or
4149 generate it from the mnemonic and operands.
4150
4151 It it thought it maybe necessary to set it when specifying instructions
4152 which implementation isn't following immediately or aren't implemented yet.
4153 """
4154 oInstr = self.ensureInstructionForOpTag(iTagLine);
4155
4156 # Flatten and validate the value.
4157 sFunction = self.flattenAllSections(aasSections);
4158 if not self.oReFunctionName.match(sFunction):
4159 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4160 % (sTag, sFunction, self.oReFunctionName.pattern));
4161
4162 if oInstr.sFunction is not None:
4163 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4164 % (sTag, oInstr.sFunction, sFunction,));
4165 oInstr.sFunction = sFunction;
4166
4167 _ = iEndLine;
4168 return True;
4169
4170 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4171 """
4172 Tag: \@opstats
4173 Value: <VMM statistics base name>
4174
4175 This is for explicitly setting the statistics name. Normally we pick
4176 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4177 the mnemonic and operands.
4178
4179 It it thought it maybe necessary to set it when specifying instructions
4180 which implementation isn't following immediately or aren't implemented yet.
4181 """
4182 oInstr = self.ensureInstructionForOpTag(iTagLine);
4183
4184 # Flatten and validate the value.
4185 sStats = self.flattenAllSections(aasSections);
4186 if not self.oReStatsName.match(sStats):
4187 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4188 % (sTag, sStats, self.oReStatsName.pattern));
4189
4190 if oInstr.sStats is not None:
4191 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4192 % (sTag, oInstr.sStats, sStats,));
4193 oInstr.sStats = sStats;
4194
4195 _ = iEndLine;
4196 return True;
4197
4198 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4199 """
4200 Tag: \@opdone
4201 Value: none
4202
4203 Used to explictily flush the instructions that have been specified.
4204 """
4205 sFlattened = self.flattenAllSections(aasSections);
4206 if sFlattened != '':
4207 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4208 _ = sTag; _ = iEndLine;
4209 return self.doneInstructions();
4210
4211 ## @}
4212
4213
4214 def parseComment(self):
4215 """
4216 Parse the current comment (self.sComment).
4217
4218 If it's a opcode specifiying comment, we reset the macro stuff.
4219 """
4220 #
4221 # Reject if comment doesn't seem to contain anything interesting.
4222 #
4223 if self.sComment.find('Opcode') < 0 \
4224 and self.sComment.find('@') < 0:
4225 return False;
4226
4227 #
4228 # Split the comment into lines, removing leading asterisks and spaces.
4229 # Also remove leading and trailing empty lines.
4230 #
4231 asLines = self.sComment.split('\n');
4232 for iLine, sLine in enumerate(asLines):
4233 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4234
4235 while asLines and not asLines[0]:
4236 self.iCommentLine += 1;
4237 asLines.pop(0);
4238
4239 while asLines and not asLines[-1]:
4240 asLines.pop(len(asLines) - 1);
4241
4242 #
4243 # Check for old style: Opcode 0x0f 0x12
4244 #
4245 if asLines[0].startswith('Opcode '):
4246 self.parseCommentOldOpcode(asLines);
4247
4248 #
4249 # Look for @op* tagged data.
4250 #
4251 cOpTags = 0;
4252 sFlatDefault = None;
4253 sCurTag = '@default';
4254 iCurTagLine = 0;
4255 asCurSection = [];
4256 aasSections = [ asCurSection, ];
4257 for iLine, sLine in enumerate(asLines):
4258 if not sLine.startswith('@'):
4259 if sLine:
4260 asCurSection.append(sLine);
4261 elif asCurSection:
4262 asCurSection = [];
4263 aasSections.append(asCurSection);
4264 else:
4265 #
4266 # Process the previous tag.
4267 #
4268 if not asCurSection and len(aasSections) > 1:
4269 aasSections.pop(-1);
4270 if sCurTag in self.dTagHandlers:
4271 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4272 cOpTags += 1;
4273 elif sCurTag.startswith('@op'):
4274 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4275 elif sCurTag == '@default':
4276 sFlatDefault = self.flattenAllSections(aasSections);
4277 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4278 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4279 elif sCurTag in ['@encoding', '@opencoding']:
4280 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4281
4282 #
4283 # New tag.
4284 #
4285 asSplit = sLine.split(None, 1);
4286 sCurTag = asSplit[0].lower();
4287 if len(asSplit) > 1:
4288 asCurSection = [asSplit[1],];
4289 else:
4290 asCurSection = [];
4291 aasSections = [asCurSection, ];
4292 iCurTagLine = iLine;
4293
4294 #
4295 # Process the final tag.
4296 #
4297 if not asCurSection and len(aasSections) > 1:
4298 aasSections.pop(-1);
4299 if sCurTag in self.dTagHandlers:
4300 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4301 cOpTags += 1;
4302 elif sCurTag.startswith('@op'):
4303 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4304 elif sCurTag == '@default':
4305 sFlatDefault = self.flattenAllSections(aasSections);
4306
4307 #
4308 # Don't allow default text in blocks containing @op*.
4309 #
4310 if cOpTags > 0 and sFlatDefault:
4311 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4312
4313 return True;
4314
4315 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
4316 """
4317 Parses a macro invocation.
4318
4319 Returns three values:
4320 1. A list of macro arguments, where the zero'th is the macro name.
4321 2. The offset following the macro invocation, into sInvocation of
4322 this is on the same line or into the last line if it is on a
4323 different line.
4324 3. Number of additional lines the invocation spans (i.e. zero if
4325 it is all contained within sInvocation).
4326 """
4327 # First the name.
4328 offOpen = sInvocation.find('(', offStartInvocation);
4329 if offOpen <= offStartInvocation:
4330 self.raiseError("macro invocation open parenthesis not found");
4331 sName = sInvocation[offStartInvocation:offOpen].strip();
4332 if not self.oReMacroName.match(sName):
4333 self.raiseError("invalid macro name '%s'" % (sName,));
4334 asRet = [sName, ];
4335
4336 # Arguments.
4337 iLine = self.iLine;
4338 cDepth = 1;
4339 off = offOpen + 1;
4340 offStart = off;
4341 offCurLn = 0;
4342 chQuote = None;
4343 while cDepth > 0:
4344 if off >= len(sInvocation):
4345 if iLine >= len(self.asLines):
4346 self.error('macro invocation beyond end of file');
4347 return (asRet, off - offCurLn, iLine - self.iLine);
4348 offCurLn = off;
4349 sInvocation += self.asLines[iLine];
4350 iLine += 1;
4351 ch = sInvocation[off];
4352
4353 if chQuote:
4354 if ch == '\\' and off + 1 < len(sInvocation):
4355 off += 1;
4356 elif ch == chQuote:
4357 chQuote = None;
4358 elif ch in ('"', '\'',):
4359 chQuote = ch;
4360 elif ch in (',', ')',):
4361 if cDepth == 1:
4362 asRet.append(sInvocation[offStart:off].strip());
4363 offStart = off + 1;
4364 if ch == ')':
4365 cDepth -= 1;
4366 elif ch == '(':
4367 cDepth += 1;
4368 off += 1;
4369
4370 return (asRet, off - offCurLn, iLine - self.iLine);
4371
4372 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
4373 """
4374 Returns (None, len(sCode), 0) if not found, otherwise the
4375 parseMacroInvocation() return value.
4376 """
4377 offHit = sCode.find(sMacro, offStart);
4378 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4379 return self.parseMacroInvocation(sCode, offHit);
4380 return (None, len(sCode), 0);
4381
4382 def findAndParseMacroInvocation(self, sCode, sMacro):
4383 """
4384 Returns None if not found, arguments as per parseMacroInvocation if found.
4385 """
4386 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
4387
4388 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4389 """
4390 Returns same as findAndParseMacroInvocation.
4391 """
4392 for sMacro in asMacro:
4393 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4394 if asRet is not None:
4395 return asRet;
4396 return None;
4397
4398 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4399 sDisHints, sIemHints, asOperands):
4400 """
4401 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4402 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4403 """
4404 #
4405 # Some invocation checks.
4406 #
4407 if sUpper != sUpper.upper():
4408 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4409 if sLower != sLower.lower():
4410 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4411 if sUpper.lower() != sLower:
4412 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4413 if not self.oReMnemonic.match(sLower):
4414 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4415
4416 #
4417 # Check if sIemHints tells us to not consider this macro invocation.
4418 #
4419 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4420 return True;
4421
4422 # Apply to the last instruction only for now.
4423 if not self.aoCurInstrs:
4424 self.addInstruction();
4425 oInstr = self.aoCurInstrs[-1];
4426 if oInstr.iLineMnemonicMacro == -1:
4427 oInstr.iLineMnemonicMacro = self.iLine;
4428 else:
4429 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4430 % (sMacro, oInstr.iLineMnemonicMacro,));
4431
4432 # Mnemonic
4433 if oInstr.sMnemonic is None:
4434 oInstr.sMnemonic = sLower;
4435 elif oInstr.sMnemonic != sLower:
4436 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4437
4438 # Process operands.
4439 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4440 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4441 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4442 for iOperand, sType in enumerate(asOperands):
4443 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4444 if sWhere is None:
4445 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4446 if iOperand < len(oInstr.aoOperands): # error recovery.
4447 sWhere = oInstr.aoOperands[iOperand].sWhere;
4448 sType = oInstr.aoOperands[iOperand].sType;
4449 else:
4450 sWhere = 'reg';
4451 sType = 'Gb';
4452 if iOperand == len(oInstr.aoOperands):
4453 oInstr.aoOperands.append(Operand(sWhere, sType))
4454 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4455 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4456 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4457 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4458
4459 # Encoding.
4460 if sForm not in g_kdIemForms:
4461 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4462 else:
4463 if oInstr.sEncoding is None:
4464 oInstr.sEncoding = g_kdIemForms[sForm][0];
4465 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4466 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4467 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4468
4469 # Check the parameter locations for the encoding.
4470 if g_kdIemForms[sForm][1] is not None:
4471 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4472 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4473 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4474 else:
4475 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4476 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4477 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4478 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4479 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4480 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4481 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4482 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4483 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4484 or sForm.replace('VEX','').find('V') < 0) ):
4485 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4486 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4487 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4488 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4489 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4490 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4491 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4492 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4493 oInstr.aoOperands[iOperand].sWhere));
4494
4495
4496 # Check @opcodesub
4497 if oInstr.sSubOpcode \
4498 and g_kdIemForms[sForm][2] \
4499 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4500 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4501 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4502
4503 # Stats.
4504 if not self.oReStatsName.match(sStats):
4505 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4506 elif oInstr.sStats is None:
4507 oInstr.sStats = sStats;
4508 elif oInstr.sStats != sStats:
4509 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4510 % (sMacro, oInstr.sStats, sStats,));
4511
4512 # Process the hints (simply merge with @ophints w/o checking anything).
4513 for sHint in sDisHints.split('|'):
4514 sHint = sHint.strip();
4515 if sHint.startswith('DISOPTYPE_'):
4516 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4517 if sShortHint in g_kdHints:
4518 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4519 else:
4520 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4521 elif sHint != '0':
4522 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4523
4524 for sHint in sIemHints.split('|'):
4525 sHint = sHint.strip();
4526 if sHint.startswith('IEMOPHINT_'):
4527 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4528 if sShortHint in g_kdHints:
4529 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4530 else:
4531 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4532 elif sHint != '0':
4533 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4534
4535 _ = sAsm;
4536 return True;
4537
4538 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4539 """
4540 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4541 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4542 """
4543 if not asOperands:
4544 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4545 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4546 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4547
4548 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4549 """
4550 Process a IEM_MC_BEGIN macro invocation.
4551 """
4552 if self.fDebugMc:
4553 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4554 #self.debug('%s<eos>' % (sCode,));
4555
4556 # Check preconditions.
4557 if not self.oCurFunction:
4558 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4559 if self.oCurMcBlock:
4560 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4561
4562 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4563 cchIndent = offBeginStatementInCodeStr;
4564 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4565 if offPrevNewline >= 0:
4566 cchIndent -= offPrevNewline + 1;
4567 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4568
4569 # Start a new block.
4570 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4571 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4572 g_aoMcBlocks.append(self.oCurMcBlock);
4573 self.cTotalMcBlocks += 1;
4574 self.iMcBlockInFunc += 1;
4575 return True;
4576
4577 @staticmethod
4578 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
4579 """
4580 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
4581 extracting a statement block from a string that's the result of macro
4582 expansion and therefore contains multiple "sub-lines" as it were.
4583
4584 Returns list of lines covering offBegin thru offEnd in sRawLine.
4585 """
4586
4587 off = sRawLine.find('\n', offEnd);
4588 if off > 0:
4589 sRawLine = sRawLine[:off + 1];
4590
4591 off = sRawLine.rfind('\n', 0, offBegin) + 1;
4592 sRawLine = sRawLine[off:];
4593 if not sRawLine.strip().startswith(sBeginStmt):
4594 sRawLine = sRawLine[offBegin - off:]
4595
4596 return [sLine + '\n' for sLine in sRawLine.split('\n')];
4597
4598 def workerIemMcEnd(self, offEndStatementInLine):
4599 """
4600 Process a IEM_MC_END macro invocation.
4601 """
4602 if self.fDebugMc:
4603 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4604
4605 # Check preconditions.
4606 if not self.oCurMcBlock:
4607 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4608
4609 #
4610 # HACK ALERT! For blocks orginating from macro expansion the start and
4611 # end line will be the same, but the line has multiple
4612 # newlines inside it. So, we have to do some extra tricks
4613 # to get the lines out of there. We ASSUME macros aren't
4614 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4615 #
4616 if self.iLine > self.oCurMcBlock.iBeginLine:
4617 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4618 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4619 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4620 else:
4621 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
4622 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
4623
4624 #
4625 # Strip anything following the IEM_MC_END(); statement in the final line,
4626 # so that we don't carry on any trailing 'break' after macro expansions
4627 # like for iemOp_movsb_Xb_Yb.
4628 #
4629 while asLines[-1].strip() == '':
4630 asLines.pop();
4631 sFinal = asLines[-1];
4632 offFinalEnd = sFinal.find('IEM_MC_END');
4633 offEndInFinal = offFinalEnd;
4634 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
4635 offFinalEnd += len('IEM_MC_END');
4636
4637 while sFinal[offFinalEnd].isspace():
4638 offFinalEnd += 1;
4639 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
4640 offFinalEnd += 1;
4641
4642 while sFinal[offFinalEnd].isspace():
4643 offFinalEnd += 1;
4644 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
4645 offFinalEnd += 1;
4646
4647 while sFinal[offFinalEnd].isspace():
4648 offFinalEnd += 1;
4649 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
4650 offFinalEnd += 1;
4651
4652 asLines[-1] = sFinal[: offFinalEnd];
4653
4654 #
4655 # Complete and discard the current block.
4656 #
4657 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
4658 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
4659 self.oCurMcBlock = None;
4660 return True;
4661
4662 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
4663 """
4664 Process a IEM_MC_DEFER_TO_CIMPL_[0-5]_RET macro invocation.
4665 """
4666 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
4667 if self.fDebugMc:
4668 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
4669 #self.debug('%s<eos>' % (sCode,));
4670
4671 # Check preconditions.
4672 if not self.oCurFunction:
4673 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
4674 if self.oCurMcBlock:
4675 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
4676
4677 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4678 cchIndent = offBeginStatementInCodeStr;
4679 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4680 if offPrevNewline >= 0:
4681 cchIndent -= offPrevNewline + 1;
4682 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4683
4684 # Start a new block.
4685 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4686 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4687
4688 # Parse the statment.
4689 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
4690 if asArgs is None:
4691 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
4692 if len(asArgs) != cParams + 3:
4693 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s!'
4694 % (sStmt, len(asArgs), cParams + 3,));
4695
4696 oMcBlock.aoStmts = [McStmtCall(asArgs[0], asArgs[1:], 1),];
4697
4698 # These MCs are not typically part of macro expansions, but let's get
4699 # it out of the way immediately if it's the case.
4700 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
4701 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
4702 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
4703 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
4704 asLines[-1] = asLines[-1][:offAfter + 1];
4705 else:
4706 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
4707 offAfter, sStmt);
4708 assert asLines[-1].find(';') >= 0;
4709 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
4710
4711 assert asLines[0].find(sStmt) >= 0;
4712 #if not asLines[0].strip().startswith(sStmt):
4713 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
4714
4715 # Advance to the line with the closing ')'.
4716 self.iLine += cLines;
4717
4718 # Complete the block.
4719 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
4720
4721 g_aoMcBlocks.append(oMcBlock);
4722 self.cTotalMcBlocks += 1;
4723 self.iMcBlockInFunc += 1;
4724
4725 return True;
4726
4727 def workerStartFunction(self, asArgs):
4728 """
4729 Deals with the start of a decoder function.
4730
4731 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4732 macros, so we get a argument list for these where the 0th argument is the
4733 macro name.
4734 """
4735 # Complete any existing function.
4736 if self.oCurFunction:
4737 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4738
4739 # Create the new function.
4740 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4741 return True;
4742
4743 def checkCodeForMacro(self, sCode, offLine):
4744 """
4745 Checks code for relevant macro invocation.
4746 """
4747
4748 #
4749 # Scan macro invocations.
4750 #
4751 if sCode.find('(') > 0:
4752 # Look for instruction decoder function definitions. ASSUME single line.
4753 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4754 [ 'FNIEMOP_DEF',
4755 'FNIEMOPRM_DEF',
4756 'FNIEMOP_STUB',
4757 'FNIEMOP_STUB_1',
4758 'FNIEMOP_UD_STUB',
4759 'FNIEMOP_UD_STUB_1' ]);
4760 if asArgs is not None:
4761 self.workerStartFunction(asArgs);
4762 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4763
4764 if not self.aoCurInstrs:
4765 self.addInstruction();
4766 for oInstr in self.aoCurInstrs:
4767 if oInstr.iLineFnIemOpMacro == -1:
4768 oInstr.iLineFnIemOpMacro = self.iLine;
4769 else:
4770 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4771 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4772 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4773 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4774 if asArgs[0].find('STUB') > 0:
4775 self.doneInstructions(fEndOfFunction = True);
4776 return True;
4777
4778 # Check for worker function definitions, so we can get a context for MC blocks.
4779 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4780 [ 'FNIEMOP_DEF_1',
4781 'FNIEMOP_DEF_2', ]);
4782 if asArgs is not None:
4783 self.workerStartFunction(asArgs);
4784 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4785 return True;
4786
4787 # IEMOP_HLP_DONE_VEX_DECODING_*
4788 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4789 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4790 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4791 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4792 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4793 ]);
4794 if asArgs is not None:
4795 sMacro = asArgs[0];
4796 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4797 for oInstr in self.aoCurInstrs:
4798 if 'vex_l_zero' not in oInstr.dHints:
4799 if oInstr.iLineMnemonicMacro >= 0:
4800 self.errorOnLine(oInstr.iLineMnemonicMacro,
4801 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4802 oInstr.dHints['vex_l_zero'] = True;
4803
4804 #
4805 # IEMOP_MNEMONIC*
4806 #
4807 if sCode.find('IEMOP_MNEMONIC') >= 0:
4808 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4809 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4810 if asArgs is not None:
4811 if len(self.aoCurInstrs) == 1:
4812 oInstr = self.aoCurInstrs[0];
4813 if oInstr.sStats is None:
4814 oInstr.sStats = asArgs[1];
4815 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4816
4817 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4818 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4819 if asArgs is not None:
4820 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4821 asArgs[7], []);
4822 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4823 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4824 if asArgs is not None:
4825 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4826 asArgs[8], [asArgs[6],]);
4827 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4828 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4829 if asArgs is not None:
4830 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4831 asArgs[9], [asArgs[6], asArgs[7]]);
4832 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4833 # a_fIemHints)
4834 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4835 if asArgs is not None:
4836 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4837 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4838 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4839 # a_fIemHints)
4840 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4841 if asArgs is not None:
4842 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4843 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4844
4845 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4846 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4847 if asArgs is not None:
4848 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4849 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4850 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4851 if asArgs is not None:
4852 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4853 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4854 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4855 if asArgs is not None:
4856 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4857 [asArgs[4], asArgs[5],]);
4858 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4859 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4860 if asArgs is not None:
4861 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4862 [asArgs[4], asArgs[5], asArgs[6],]);
4863 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4864 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4865 if asArgs is not None:
4866 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4867 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4868
4869 #
4870 # IEM_MC_BEGIN + IEM_MC_END.
4871 # We must support multiple instances per code snippet.
4872 #
4873 offCode = sCode.find('IEM_MC_');
4874 if offCode >= 0:
4875 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4876 if oMatch.group(1) == 'END':
4877 self.workerIemMcEnd(offLine + oMatch.start());
4878 elif oMatch.group(1) == 'BEGIN':
4879 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4880 else:
4881 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
4882 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
4883 return True;
4884
4885 return False;
4886
4887 def workerPreProcessRecreateMacroRegex(self):
4888 """
4889 Recreates self.oReMacros when self.dMacros changes.
4890 """
4891 if self.dMacros:
4892 sRegex = '';
4893 for sName, oMacro in self.dMacros.items():
4894 if sRegex:
4895 sRegex += '|' + sName;
4896 else:
4897 sRegex = '\\b(' + sName;
4898 if oMacro.asArgs is not None:
4899 sRegex += '\s*\(';
4900 else:
4901 sRegex += '\\b';
4902 sRegex += ')';
4903 self.oReMacros = re.compile(sRegex);
4904 else:
4905 self.oReMacros = None;
4906 return True;
4907
4908 def workerPreProcessDefine(self, sRest):
4909 """
4910 Handles a macro #define, the sRest is what follows after the directive word.
4911 """
4912
4913 #
4914 # If using line continutation, just concat all the lines together,
4915 # preserving the newline character but not the escaping.
4916 #
4917 iLineStart = self.iLine;
4918 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4919 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4920 self.iLine += 1;
4921 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4922
4923 #
4924 # Use regex to split out the name, argument list and body.
4925 # If this fails, we assume it's a simple macro.
4926 #
4927 oMatch = self.oReHashDefine2.match(sRest);
4928 if oMatch:
4929 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4930 sBody = oMatch.group(3);
4931 else:
4932 oMatch = self.oReHashDefine3.match(sRest);
4933 if not oMatch:
4934 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4935 return self.error('bogus macro definition: %s' % (sRest,));
4936 asArgs = None;
4937 sBody = oMatch.group(2);
4938 sName = oMatch.group(1);
4939 assert sName == sName.strip();
4940 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4941
4942 #
4943 # Is this of any interest to us? We do NOT support MC blocks wihtin
4944 # nested macro expansion, just to avoid lots of extra work.
4945 #
4946 if sBody.find("IEM_MC_BEGIN") < 0:
4947 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4948 return True;
4949
4950 #
4951 # Add the macro.
4952 #
4953 if self.fDebugPreProc:
4954 self.debug('#define %s on line %u' % (sName, self.iLine,));
4955 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
4956 return self.workerPreProcessRecreateMacroRegex();
4957
4958 def workerPreProcessUndef(self, sRest):
4959 """
4960 Handles a macro #undef, the sRest is what follows after the directive word.
4961 """
4962 # Quick comment strip and isolate the name.
4963 offSlash = sRest.find('/');
4964 if offSlash > 0:
4965 sRest = sRest[:offSlash];
4966 sName = sRest.strip();
4967
4968 # Remove the macro if we're clocking it.
4969 if sName in self.dMacros:
4970 if self.fDebugPreProc:
4971 self.debug('#undef %s on line %u' % (sName, self.iLine,));
4972 del self.dMacros[sName];
4973 return self.workerPreProcessRecreateMacroRegex();
4974
4975 return True;
4976
4977 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
4978 """
4979 Handles a preprocessor directive.
4980 """
4981 oMatch = self.oReHashDefine.match(sLine);
4982 if oMatch:
4983 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
4984
4985 oMatch = self.oReHashUndef.match(sLine);
4986 if oMatch:
4987 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
4988 return False;
4989
4990 def expandMacros(self, sLine, oMatch):
4991 """
4992 Expands macros we know about in the given line.
4993 Currently we ASSUME there is only one and that is what oMatch matched.
4994 """
4995 #
4996 # Get our bearings.
4997 #
4998 offMatch = oMatch.start();
4999 sName = oMatch.group(1);
5000 assert sName == sLine[oMatch.start() : oMatch.end()];
5001 fWithArgs = sName.endswith('(');
5002 if fWithArgs:
5003 sName = sName[:-1].strip();
5004 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5005
5006 #
5007 # Deal with simple macro invocations w/o parameters.
5008 #
5009 if not fWithArgs:
5010 if self.fDebugPreProc:
5011 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5012 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5013
5014 #
5015 # Complicated macro with parameters.
5016 # Start by extracting the parameters. ASSUMES they are all on the same line!
5017 #
5018 cLevel = 1;
5019 offCur = oMatch.end();
5020 offCurArg = offCur;
5021 asArgs = [];
5022 while True:
5023 if offCur >= len(sLine):
5024 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5025 ch = sLine[offCur];
5026 if ch == '(':
5027 cLevel += 1;
5028 elif ch == ')':
5029 cLevel -= 1;
5030 if cLevel == 0:
5031 asArgs.append(sLine[offCurArg:offCur].strip());
5032 break;
5033 elif ch == ',' and cLevel == 1:
5034 asArgs.append(sLine[offCurArg:offCur].strip());
5035 offCurArg = offCur + 1;
5036 offCur += 1;
5037 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5038 asArgs = [];
5039 if len(oMacro.asArgs) != len(asArgs):
5040 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5041
5042 #
5043 # Do the expanding.
5044 #
5045 if self.fDebugPreProc:
5046 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5047 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5048
5049 def parse(self):
5050 """
5051 Parses the given file.
5052
5053 Returns number or errors.
5054 Raises exception on fatal trouble.
5055 """
5056 #self.debug('Parsing %s' % (self.sSrcFile,));
5057
5058 #
5059 # Loop thru the lines.
5060 #
5061 # Please mind that self.iLine may be updated by checkCodeForMacro and
5062 # other worker methods.
5063 #
5064 while self.iLine < len(self.asLines):
5065 sLine = self.asLines[self.iLine];
5066 self.iLine += 1;
5067 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5068
5069 # Expand macros we know about if we're currently in code.
5070 if self.iState == self.kiCode and self.oReMacros:
5071 oMatch = self.oReMacros.search(sLine);
5072 if oMatch:
5073 sLine = self.expandMacros(sLine, oMatch);
5074 if self.fDebugPreProc:
5075 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5076 self.asLines[self.iLine - 1] = sLine;
5077
5078 # Look for comments.
5079 offSlash = sLine.find('/');
5080 if offSlash >= 0:
5081 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5082 offLine = 0;
5083 while offLine < len(sLine):
5084 if self.iState == self.kiCode:
5085 # Look for substantial multiline comment so we pass the following MC as a whole line:
5086 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5087 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5088 offHit = sLine.find('/*', offLine);
5089 while offHit >= 0:
5090 offEnd = sLine.find('*/', offHit + 2);
5091 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5092 break;
5093 offHit = sLine.find('/*', offEnd);
5094
5095 if offHit >= 0:
5096 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5097 self.sComment = '';
5098 self.iCommentLine = self.iLine;
5099 self.iState = self.kiCommentMulti;
5100 offLine = offHit + 2;
5101 else:
5102 self.checkCodeForMacro(sLine[offLine:], offLine);
5103 offLine = len(sLine);
5104
5105 elif self.iState == self.kiCommentMulti:
5106 offHit = sLine.find('*/', offLine);
5107 if offHit >= 0:
5108 self.sComment += sLine[offLine:offHit];
5109 self.iState = self.kiCode;
5110 offLine = offHit + 2;
5111 self.parseComment();
5112 else:
5113 self.sComment += sLine[offLine:];
5114 offLine = len(sLine);
5115 else:
5116 assert False;
5117 # C++ line comment.
5118 elif offSlash > 0:
5119 self.checkCodeForMacro(sLine[:offSlash], 0);
5120
5121 # No slash, but append the line if in multi-line comment.
5122 elif self.iState == self.kiCommentMulti:
5123 #self.debug('line %d: multi' % (self.iLine,));
5124 self.sComment += sLine;
5125
5126 # No slash, but check if this is a macro #define or #undef, since we
5127 # need to be able to selectively expand the ones containing MC blocks.
5128 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5129 if self.fDebugPreProc:
5130 self.debug('line %d: pre-proc' % (self.iLine,));
5131 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5132
5133 # No slash, but check code line for relevant macro.
5134 elif ( self.iState == self.kiCode
5135 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5136 #self.debug('line %d: macro' % (self.iLine,));
5137 self.checkCodeForMacro(sLine, 0);
5138
5139 # If the line is a '}' in the first position, complete the instructions.
5140 elif self.iState == self.kiCode and sLine[0] == '}':
5141 #self.debug('line %d: }' % (self.iLine,));
5142 self.doneInstructions(fEndOfFunction = True);
5143
5144 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5145 # so we can check/add @oppfx info from it.
5146 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5147 self.parseFunctionTable(sLine);
5148
5149 self.doneInstructions(fEndOfFunction = True);
5150 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5151 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5152 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5153 return self.printErrors();
5154
5155## The parsed content of IEMAllInstructionsCommonBodyMacros.h.
5156g_oParsedCommonBodyMacros = None # type: SimpleParser
5157
5158def __parseFileByName(sSrcFile, sDefaultMap):
5159 """
5160 Parses one source file for instruction specfications.
5161 """
5162 #
5163 # Read sSrcFile into a line array.
5164 #
5165 try:
5166 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5167 except Exception as oXcpt:
5168 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5169 try:
5170 asLines = oFile.readlines();
5171 except Exception as oXcpt:
5172 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5173 finally:
5174 oFile.close();
5175
5176 #
5177 # On the first call, we parse IEMAllInstructionsCommonBodyMacros.h so we
5178 # can use the macros from it when processing the other files.
5179 #
5180 global g_oParsedCommonBodyMacros;
5181 if g_oParsedCommonBodyMacros is None:
5182 # Locate the file.
5183 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5184 if not os.path.isfile(sCommonBodyMacros):
5185 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5186
5187 # Read it.
5188 try:
5189 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5190 asIncFiles = oIncFile.readlines();
5191 except Exception as oXcpt:
5192 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5193
5194 # Parse it.
5195 try:
5196 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5197 if oParser.parse() != 0:
5198 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5199 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5200 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5201 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5202 oParser.cTotalMcBlocks,
5203 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5204 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5205 except ParserException as oXcpt:
5206 print(str(oXcpt), file = sys.stderr);
5207 raise;
5208 g_oParsedCommonBodyMacros = oParser;
5209
5210 #
5211 # Do the parsing.
5212 #
5213 try:
5214 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5215 return (oParser.parse(), oParser) ;
5216 except ParserException as oXcpt:
5217 print(str(oXcpt), file = sys.stderr);
5218 raise;
5219
5220
5221def __doTestCopying():
5222 """
5223 Executes the asCopyTests instructions.
5224 """
5225 asErrors = [];
5226 for oDstInstr in g_aoAllInstructions:
5227 if oDstInstr.asCopyTests:
5228 for sSrcInstr in oDstInstr.asCopyTests:
5229 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5230 if oSrcInstr:
5231 aoSrcInstrs = [oSrcInstr,];
5232 else:
5233 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5234 if aoSrcInstrs:
5235 for oSrcInstr in aoSrcInstrs:
5236 if oSrcInstr != oDstInstr:
5237 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5238 else:
5239 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5240 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5241 else:
5242 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5243 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5244
5245 if asErrors:
5246 sys.stderr.write(u''.join(asErrors));
5247 return len(asErrors);
5248
5249
5250def __applyOnlyTest():
5251 """
5252 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5253 all other instructions so that only these get tested.
5254 """
5255 if g_aoOnlyTestInstructions:
5256 for oInstr in g_aoAllInstructions:
5257 if oInstr.aoTests:
5258 if oInstr not in g_aoOnlyTestInstructions:
5259 oInstr.aoTests = [];
5260 return 0;
5261
5262## List of all main instruction files and their default maps.
5263g_aasAllInstrFilesAndDefaultMap = (
5264 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5265 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5266 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5267 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5268 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5269 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5270 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5271 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5272 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5273);
5274
5275def __parseFilesWorker(asFilesAndDefaultMap):
5276 """
5277 Parses all the IEMAllInstruction*.cpp.h files.
5278
5279 Returns a list of the parsers on success.
5280 Raises exception on failure.
5281 """
5282 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5283 cErrors = 0;
5284 aoParsers = [];
5285 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5286 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5287 sFilename = os.path.join(sSrcDir, sFilename);
5288 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5289 cErrors += cThisErrors;
5290 aoParsers.append(oParser);
5291 cErrors += __doTestCopying();
5292 cErrors += __applyOnlyTest();
5293
5294 # Total stub stats:
5295 cTotalStubs = 0;
5296 for oInstr in g_aoAllInstructions:
5297 cTotalStubs += oInstr.fStub;
5298 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5299 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5300 file = sys.stderr);
5301
5302 if cErrors != 0:
5303 raise Exception('%d parse errors' % (cErrors,));
5304 return aoParsers;
5305
5306
5307def parseFiles(asFiles):
5308 """
5309 Parses a selection of IEMAllInstruction*.cpp.h files.
5310
5311 Returns a list of the parsers on success.
5312 Raises exception on failure.
5313 """
5314 # Look up default maps for the files and call __parseFilesWorker to do the job.
5315 asFilesAndDefaultMap = [];
5316 for sFilename in asFiles:
5317 sName = os.path.split(sFilename)[1].lower();
5318 sMap = None;
5319 for asCur in g_aasAllInstrFilesAndDefaultMap:
5320 if asCur[0].lower() == sName:
5321 sMap = asCur[1];
5322 break;
5323 if not sMap:
5324 raise Exception('Unable to classify file: %s' % (sFilename,));
5325 asFilesAndDefaultMap.append((sFilename, sMap));
5326
5327 return __parseFilesWorker(asFilesAndDefaultMap);
5328
5329
5330def parseAll():
5331 """
5332 Parses all the IEMAllInstruction*.cpp.h files.
5333
5334 Returns a list of the parsers on success.
5335 Raises exception on failure.
5336 """
5337 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5338
5339
5340#
5341# Generators (may perhaps move later).
5342#
5343def __formatDisassemblerTableEntry(oInstr):
5344 """
5345 """
5346 sMacro = 'OP';
5347 cMaxOperands = 3;
5348 if len(oInstr.aoOperands) > 3:
5349 sMacro = 'OPVEX'
5350 cMaxOperands = 4;
5351 assert len(oInstr.aoOperands) <= cMaxOperands;
5352
5353 #
5354 # Format string.
5355 #
5356 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5357 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5358 sTmp += ' ' if iOperand == 0 else ',';
5359 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5360 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5361 else:
5362 sTmp += g_kdOpTypes[oOperand.sType][2];
5363 sTmp += '",';
5364 asColumns = [ sTmp, ];
5365
5366 #
5367 # Decoders.
5368 #
5369 iStart = len(asColumns);
5370 if oInstr.sEncoding is None:
5371 pass;
5372 elif oInstr.sEncoding == 'ModR/M':
5373 # ASSUME the first operand is using the ModR/M encoding
5374 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5375 asColumns.append('IDX_ParseModRM,');
5376 elif oInstr.sEncoding in [ 'prefix', ]:
5377 for oOperand in oInstr.aoOperands:
5378 asColumns.append('0,');
5379 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5380 pass;
5381 elif oInstr.sEncoding == 'VEX.ModR/M':
5382 asColumns.append('IDX_ParseModRM,');
5383 elif oInstr.sEncoding == 'vex2':
5384 asColumns.append('IDX_ParseVex2b,')
5385 elif oInstr.sEncoding == 'vex3':
5386 asColumns.append('IDX_ParseVex3b,')
5387 elif oInstr.sEncoding in g_dInstructionMaps:
5388 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5389 else:
5390 ## @todo
5391 #IDX_ParseTwoByteEsc,
5392 #IDX_ParseGrp1,
5393 #IDX_ParseShiftGrp2,
5394 #IDX_ParseGrp3,
5395 #IDX_ParseGrp4,
5396 #IDX_ParseGrp5,
5397 #IDX_Parse3DNow,
5398 #IDX_ParseGrp6,
5399 #IDX_ParseGrp7,
5400 #IDX_ParseGrp8,
5401 #IDX_ParseGrp9,
5402 #IDX_ParseGrp10,
5403 #IDX_ParseGrp12,
5404 #IDX_ParseGrp13,
5405 #IDX_ParseGrp14,
5406 #IDX_ParseGrp15,
5407 #IDX_ParseGrp16,
5408 #IDX_ParseThreeByteEsc4,
5409 #IDX_ParseThreeByteEsc5,
5410 #IDX_ParseModFence,
5411 #IDX_ParseEscFP,
5412 #IDX_ParseNopPause,
5413 #IDX_ParseInvOpModRM,
5414 assert False, str(oInstr);
5415
5416 # Check for immediates and stuff in the remaining operands.
5417 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5418 sIdx = g_kdOpTypes[oOperand.sType][0];
5419 #if sIdx != 'IDX_UseModRM':
5420 asColumns.append(sIdx + ',');
5421 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5422
5423 #
5424 # Opcode and operands.
5425 #
5426 assert oInstr.sDisEnum, str(oInstr);
5427 asColumns.append(oInstr.sDisEnum + ',');
5428 iStart = len(asColumns)
5429 for oOperand in oInstr.aoOperands:
5430 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5431 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5432
5433 #
5434 # Flags.
5435 #
5436 sTmp = '';
5437 for sHint in sorted(oInstr.dHints.keys()):
5438 sDefine = g_kdHints[sHint];
5439 if sDefine.startswith('DISOPTYPE_'):
5440 if sTmp:
5441 sTmp += ' | ' + sDefine;
5442 else:
5443 sTmp += sDefine;
5444 if sTmp:
5445 sTmp += '),';
5446 else:
5447 sTmp += '0),';
5448 asColumns.append(sTmp);
5449
5450 #
5451 # Format the columns into a line.
5452 #
5453 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5454 sLine = '';
5455 for i, s in enumerate(asColumns):
5456 if len(sLine) < aoffColumns[i]:
5457 sLine += ' ' * (aoffColumns[i] - len(sLine));
5458 else:
5459 sLine += ' ';
5460 sLine += s;
5461
5462 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5463 # DISOPTYPE_HARMLESS),
5464 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5465 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5466 return sLine;
5467
5468def __checkIfShortTable(aoTableOrdered, oMap):
5469 """
5470 Returns (iInstr, cInstructions, fShortTable)
5471 """
5472
5473 # Determin how much we can trim off.
5474 cInstructions = len(aoTableOrdered);
5475 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5476 cInstructions -= 1;
5477
5478 iInstr = 0;
5479 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5480 iInstr += 1;
5481
5482 # If we can save more than 30%, we go for the short table version.
5483 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5484 return (iInstr, cInstructions, True);
5485 _ = oMap; # Use this for overriding.
5486
5487 # Output the full table.
5488 return (0, len(aoTableOrdered), False);
5489
5490def generateDisassemblerTables(oDstFile = sys.stdout):
5491 """
5492 Generates disassembler tables.
5493
5494 Returns exit code.
5495 """
5496
5497 #
5498 # Parse all.
5499 #
5500 try:
5501 parseAll();
5502 except Exception as oXcpt:
5503 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5504 traceback.print_exc(file = sys.stderr);
5505 return 1;
5506
5507
5508 #
5509 # The disassembler uses a slightly different table layout to save space,
5510 # since several of the prefix varia
5511 #
5512 aoDisasmMaps = [];
5513 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5514 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5515 if oMap.sSelector != 'byte+pfx':
5516 aoDisasmMaps.append(oMap);
5517 else:
5518 # Split the map by prefix.
5519 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5520 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5521 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5522 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5523
5524 #
5525 # Dump each map.
5526 #
5527 asHeaderLines = [];
5528 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5529 for oMap in aoDisasmMaps:
5530 sName = oMap.sName;
5531
5532 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5533
5534 #
5535 # Get the instructions for the map and see if we can do a short version or not.
5536 #
5537 aoTableOrder = oMap.getInstructionsInTableOrder();
5538 cEntriesPerByte = oMap.getEntriesPerByte();
5539 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5540
5541 #
5542 # Output the table start.
5543 # Note! Short tables are static and only accessible via the map range record.
5544 #
5545 asLines = [];
5546 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5547 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5548 if fShortTable:
5549 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5550 else:
5551 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5552 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5553 asLines.append('{');
5554
5555 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5556 asLines.append(' /* %#04x: */' % (iInstrStart,));
5557
5558 #
5559 # Output the instructions.
5560 #
5561 iInstr = iInstrStart;
5562 while iInstr < iInstrEnd:
5563 oInstr = aoTableOrder[iInstr];
5564 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5565 if iInstr != iInstrStart:
5566 asLines.append('');
5567 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5568
5569 if oInstr is None:
5570 # Invalid. Optimize blocks of invalid instructions.
5571 cInvalidInstrs = 1;
5572 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5573 cInvalidInstrs += 1;
5574 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5575 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5576 iInstr += 0x10 * cEntriesPerByte - 1;
5577 elif cEntriesPerByte > 1:
5578 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5579 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5580 iInstr += 3;
5581 else:
5582 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5583 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5584 else:
5585 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5586 elif isinstance(oInstr, list):
5587 if len(oInstr) != 0:
5588 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5589 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5590 else:
5591 asLines.append(__formatDisassemblerTableEntry(oInstr));
5592 else:
5593 asLines.append(__formatDisassemblerTableEntry(oInstr));
5594
5595 iInstr += 1;
5596
5597 if iInstrStart >= iInstrEnd:
5598 asLines.append(' /* dummy */ INVALID_OPCODE');
5599
5600 asLines.append('};');
5601 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5602
5603 #
5604 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5605 #
5606 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5607 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5608 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5609
5610 #
5611 # Write out the lines.
5612 #
5613 oDstFile.write('\n'.join(asLines));
5614 oDstFile.write('\n');
5615 oDstFile.write('\n');
5616 #break; #for now
5617 return 0;
5618
5619if __name__ == '__main__':
5620 sys.exit(generateDisassemblerTables());
5621
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use