#!/usr/bin/env python # -*- coding: utf-8 -*- # $Id: IEMAllInstructionsPython.py 66172 2017-03-20 23:36:10Z vboxsync $ """ IEM instruction extractor. This script/module parses the IEMAllInstruction*.cpp.h files next to it and collects information about the instructions. It can then be used to generate disassembler tables and tests. """ __copyright__ = \ """ Copyright (C) 2017 Oracle Corporation This file is part of VirtualBox Open Source Edition (OSE), as available from http://www.virtualbox.org. This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License (GPL) as published by the Free Software Foundation, in version 2 as it comes in the "COPYING" file of the VirtualBox OSE distribution. VirtualBox OSE is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. The contents of this file may alternatively be used under the terms of the Common Development and Distribution License Version 1.0 (CDDL) only, as it comes in the "COPYING.CDDL" file of the VirtualBox OSE distribution, in which case the provisions of the CDDL are applicable instead of those of the GPL. You may elect to license modified versions of this file under the terms and conditions of either the GPL or the CDDL or both. """ __version__ = "$Revision: 66172 $" # pylint: disable=anomalous-backslash-in-string # Standard python imports. import os import re import sys ## Only the main script needs to modify the path. #g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), # 'ValidationKit'); #sys.path.append(g_ksValidationKitDir); # #from common import utils; - Windows build boxes doesn't have pywin32. # Python 3 hacks: if sys.version_info[0] >= 3: long = int; # pylint: disable=redefined-builtin,invalid-name g_kdX86EFlagsConstants = { 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0) 'X86_EFL_1': 0x00000002, # RT_BIT_32(1) 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2) 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4) 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6) 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7) 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8) 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9) 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10) 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11) 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13)) 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14) 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16) 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17) 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18) 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19) 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20) 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21) 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5) 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1) }; ## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear. g_kdEFlagsMnemonics = { # Debugger flag notation (sorted by value): 'cf': 'X86_EFL_CF', ##< Carry Flag. 'nc': '!X86_EFL_CF', ##< No Carry. 'po': 'X86_EFL_PF', ##< Parity Pdd. 'pe': '!X86_EFL_PF', ##< Parity Even. 'af': 'X86_EFL_AF', ##< Aux Flag. 'na': '!X86_EFL_AF', ##< No Aux. 'zr': 'X86_EFL_ZF', ##< ZeRo. 'nz': '!X86_EFL_ZF', ##< No Zero. 'ng': 'X86_EFL_SF', ##< NeGative (sign). 'pl': '!X86_EFL_SF', ##< PLuss (sign). 'tf': 'X86_EFL_TF', ##< Trap flag. 'ei': 'X86_EFL_IF', ##< Enabled Interrupts. 'di': '!X86_EFL_IF', ##< Disabled Interrupts. 'dn': 'X86_EFL_DF', ##< DowN (string op direction). 'up': '!X86_EFL_DF', ##< UP (string op direction). 'ov': 'X86_EFL_OF', ##< OVerflow. 'nv': '!X86_EFL_OF', ##< No Overflow. 'nt': 'X86_EFL_NT', ##< Nested Task. 'rf': 'X86_EFL_RF', ##< Resume Flag. 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode. 'ac': 'X86_EFL_AC', ##< Alignment Check. 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag. 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending. # Reference manual notation not covered above (sorted by value): 'pf': 'X86_EFL_PF', 'zf': 'X86_EFL_ZF', 'sf': 'X86_EFL_SF', 'if': 'X86_EFL_IF', 'df': 'X86_EFL_DF', 'of': 'X86_EFL_OF', 'iopl': 'X86_EFL_IOPL', 'id': 'X86_EFL_ID', }; ## \@op[1-4] locations g_kdOpLocations = { 'reg': [], ## modrm.reg 'rm': [], ## modrm.rm 'imm': [], ## immediate instruction data 'vvvv': [], ## VEX.vvvv # fixed registers. 'AL': [], 'rAX': [], 'rSI': [], 'rDI': [], 'rFLAGS': [], 'CS': [], 'DS': [], 'ES': [], 'FS': [], 'GS': [], 'SS': [], }; ## \@op[1-4] types ## ## Value fields: ## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM). ## - 1: the location (g_kdOpLocations). ## - 2: disassembler format string version of the type. ## - 3: disassembler OP_PARAM_XXX (XXX only). ## ## Note! See the A.2.1 in SDM vol 2 for the type names. g_kdOpTypes = { # Fixed addresses 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ), # ModR/M.rm 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ), 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ), 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ), 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ), # ModR/M.rm - memory only. 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND. # ModR/M.reg 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ), 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ), 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ), 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ), # Immediate values. 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions. 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ), 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ), 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ), 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword # Address operands (no ModR/M). 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ), 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ), # Relative jump targets 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ), 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ), # DS:rSI 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ), 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ), # ES:rDI 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ), 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ), 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ), # Fixed registers. 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ), 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ), 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ), 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ), 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ), 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ), 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ), }; # IDX_ParseFixedReg # IDX_ParseVexDest ## IEMFORM_XXX mappings. g_kdIemForms = { # sEncoding, [ sWhere1, ... ] 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ), 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ), 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ), 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ), 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ), 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ), 'M': ( 'ModR/M', [ 'rm', ], ), 'M_REG': ( 'ModR/M', [ 'rm', ], ), 'M_MEM': ( 'ModR/M', [ 'rm', ], ), 'R': ( 'ModR/M', [ 'reg', ], ), 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ), 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ), 'FIXED': ( 'fixed', None, ) }; ## \@oppfx values. g_kdPrefixes = { '0x66': [], '0xf3': [], '0xf2': [], }; ## Special \@opcode tag values. g_kdSpecialOpcodes = { '/reg': [], 'mr/reg': [], '11 /reg': [], '!11 /reg': [], '11 mr/reg': [], '!11 mr/reg': [], }; ## Valid values for \@openc g_kdEncodings = { 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc). 'prefix': [ None, ], ##< Prefix }; ## \@opunused, \@opinvalid, \@opinvlstyle g_kdInvalidStyles = { 'immediate': [], ##< CPU stops decoding immediately after the opcode. 'intel-modrm': [], ##< Intel decodes ModR/M. 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate. 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.) 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate. }; g_kdCpuNames = { '8086': (), '80186': (), '80286': (), '80386': (), '80486': (), }; ## \@opcpuid g_kdCpuIdFlags = { 'vme': 'X86_CPUID_FEATURE_EDX_VME', 'tsc': 'X86_CPUID_FEATURE_EDX_TSC', 'msr': 'X86_CPUID_FEATURE_EDX_MSR', 'cx8': 'X86_CPUID_FEATURE_EDX_CX8', 'sep': 'X86_CPUID_FEATURE_EDX_SEP', 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV', 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH', 'mmx': 'X86_CPUID_FEATURE_EDX_MMX', 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR', 'sse': 'X86_CPUID_FEATURE_EDX_SSE', 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2', 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3', 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64', 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS', 'vmx': 'X86_CPUID_FEATURE_ECX_VMX', 'smx': 'X86_CPUID_FEATURE_ECX_TM2', 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3', 'fma': 'X86_CPUID_FEATURE_ECX_FMA', 'cx16': 'X86_CPUID_FEATURE_ECX_CX16', 'pcid': 'X86_CPUID_FEATURE_ECX_PCID', 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1', 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2', 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE', 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT', 'aes': 'X86_CPUID_FEATURE_ECX_AES', 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE', 'avx': 'X86_CPUID_FEATURE_ECX_AVX', 'f16c': 'X86_CPUID_FEATURE_ECX_F16C', 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND', 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX', '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX', '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW', 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM', 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L', 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM', 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A', '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF', 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP', 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4', }; ## \@ophints values. g_kdHints = { 'invalid': 'DISOPTYPE_INVALID', ##< 'harmless': 'DISOPTYPE_HARMLESS', ##< 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##< 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##< 'dangerous': 'DISOPTYPE_DANGEROUS', ##< 'portio': 'DISOPTYPE_PORTIO', ##< 'privileged': 'DISOPTYPE_PRIVILEGED', ##< 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##< 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##< 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##< 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##< 'interrupt': 'DISOPTYPE_INTERRUPT', ##< 'illegal': 'DISOPTYPE_ILLEGAL', ##< 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */ 'portio_read': 'DISOPTYPE_PORTIO_READ', ##< 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##< 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes ## (only in 16 & 32 bits mode!) 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet! 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! 'ignores_op_size': '', ##< Ignores both operand size prefixes. 'lock_allowed': '', ##< Lock prefix allowed. }; def _isValidOpcodeByte(sOpcode): """ Checks if sOpcode is a valid lower case opcode byte. Returns true/false. """ if len(sOpcode) == 4: if sOpcode[:2] == '0x': if sOpcode[2] in '0123456789abcdef': if sOpcode[3] in '0123456789abcdef': return True; return False; class InstructionMap(object): """ Instruction map. The opcode map provides the lead opcode bytes (empty for the one byte opcode map). An instruction can be member of multiple opcode maps as long as it uses the same opcode value within the map (because of VEX). """ kdEncodings = { 'legacy': [], 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3 'xop8': [], ##< XOP prefix with vvvvv = 8 'xop9': [], ##< XOP prefix with vvvvv = 9 'xop10': [], ##< XOP prefix with vvvvv = 10 }; ## Selectors. ## The first value is the number of table entries required by a ## decoder or disassembler for this type of selector. kdSelectors = { 'byte': [ 256, ], ##< next opcode byte selects the instruction (default). '/r': [ 8, ], ##< modrm.reg selects the instruction. 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction. '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11. '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11. '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11. }; def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None): assert sSelector in self.kdSelectors; assert sEncoding in self.kdEncodings; if asLeadOpcodes is None: asLeadOpcodes = []; else: for sOpcode in asLeadOpcodes: assert _isValidOpcodeByte(sOpcode); assert sDisParse is None or sDisParse.startswith('IDX_Parse'); self.sName = sName; self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'. self.sSelector = sSelector; ##< The member selector, see kdSelectors. self.sEncoding = sEncoding; ##< The encoding, see kdSelectors. self.aoInstructions = []; # type: Instruction self.sDisParse = sDisParse; ##< IDX_ParseXXX. def getTableSize(self): """ Number of table entries. This corresponds directly to the selector. """ return self.kdSelectors[self.sSelector][0]; def getInstructionIndex(self, oInstr): """ Returns the table index for the instruction. """ bOpcode = oInstr.getOpcodeByte(); # The byte selector is simple. We need a full opcode byte and need just return it. if self.sSelector == 'byte': assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr); return bOpcode; # The other selectors needs masking and shifting. if self.sSelector == '/r': return (bOpcode >> 3) & 0x7; if self.sSelector == 'mod /r': return (bOpcode >> 3) & 0x1f; if self.sSelector == '!11 /r': assert (bOpcode & 0xc0) != 0xc, str(oInstr); return (bOpcode >> 3) & 0x7; if self.sSelector == '11 /r': assert (bOpcode & 0xc0) == 0xc, str(oInstr); return (bOpcode >> 3) & 0x7; if self.sSelector == '11': assert (bOpcode & 0xc0) == 0xc, str(oInstr); return bOpcode & 0x3f; assert False, self.sSelector; return -1; def getInstructionsInTableOrder(self): """ Get instructions in table order. Returns array of instructions. Normally there is exactly one instruction per entry. However the entry could also be None if not instruction was specified for that opcode value. Or there could be a list of instructions to deal with special encodings where for instance prefix (e.g. REX.W) encodes a different instruction or different CPUs have different instructions or prefixes in the same place. """ # Start with empty table. cTable = self.getTableSize(); aoTable = [None] * cTable; # Insert the instructions. for oInstr in self.aoInstructions: if oInstr.sOpcode: idxOpcode = self.getInstructionIndex(oInstr); assert idxOpcode < cTable, str(idxOpcode); oExisting = aoTable[idxOpcode]; if oExisting is None: aoTable[idxOpcode] = oInstr; elif not isinstance(oExisting, list): aoTable[idxOpcode] = list([oExisting, oInstr]); else: oExisting.append(oInstr); return aoTable; def getDisasTableName(self): """ Returns the disassembler table name for this map. """ sName = 'g_aDisas'; for sWord in self.sName.split('_'): if sWord == 'm': # suffix indicating modrm.mod==mem sName += '_m'; elif sWord == 'r': # suffix indicating modrm.mod==reg sName += '_r'; elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord): sName += '_' + sWord; else: sWord = sWord.replace('grp', 'Grp'); sWord = sWord.replace('map', 'Map'); sName += sWord[0].upper() + sWord[1:]; return sName; class TestType(object): """ Test value type. This base class deals with integer like values. The fUnsigned constructor parameter indicates the default stance on zero vs sign extending. It is possible to override fUnsigned=True by prefixing the value with '+' or '-'. """ def __init__(self, sName, acbSizes = None, fUnsigned = True): self.sName = sName; self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes. self.fUnsigned = fUnsigned; class BadValue(Exception): """ Bad value exception. """ def __init__(self, sMessage): Exception.__init__(self, sMessage); self.sMessage = sMessage; ## For ascii ~ operator. kdHexInv = { '0': 'f', '1': 'e', '2': 'd', '3': 'c', '4': 'b', '5': 'a', '6': '9', '7': '8', '8': '7', '9': '6', 'a': '5', 'b': '4', 'c': '3', 'd': '2', 'e': '1', 'f': '0', }; def get(self, sValue): """ Get the shortest normal sized byte representation of oValue. Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ). The latter form is for AND+OR pairs where the first entry is what to AND with the field and the second the one or OR with. Raises BadValue if invalid value. """ if not sValue: raise TestType.BadValue('empty value'); # Deal with sign and detect hexadecimal or decimal. fSignExtend = not self.fUnsigned; if sValue[0] == '-' or sValue[0] == '+': fSignExtend = True; fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x'; else: fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x'; # try convert it to long integer. try: iValue = long(sValue, 16 if fHex else 10); except Exception as oXcpt: raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt)); # Convert the hex string and pad it to a decent value. Negative values # needs to be manually converted to something non-negative (~-n + 1). if iValue >= 0: sHex = hex(iValue); if sys.version_info[0] < 3: assert sHex[-1] == 'L'; sHex = sHex[:-1]; assert sHex[:2] == '0x'; sHex = sHex[2:]; else: sHex = hex(-iValue - 1); if sys.version_info[0] < 3: assert sHex[-1] == 'L'; sHex = sHex[:-1]; assert sHex[:2] == '0x'; sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]); if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']: sHex = 'f' + sHex; cDigits = len(sHex); if cDigits <= self.acbSizes[-1] * 2: for cb in self.acbSizes: cNaturalDigits = cb * 2; if cDigits <= cNaturalDigits: break; else: cNaturalDigits = self.acbSizes[-1] * 2; cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits; assert isinstance(cNaturalDigits, int) if cNaturalDigits != cDigits: cNeeded = cNaturalDigits - cDigits; if iValue >= 0: sHex = ('0' * cNeeded) + sHex; else: sHex = ('f' * cNeeded) + sHex; # Invert and convert to bytearray and return it. abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]); return ((fSignExtend, abValue),); def validate(self, sValue): """ Returns True if value is okay, error message on failure. """ try: self.get(sValue); except TestType.BadValue as oXcpt: return oXcpt.sMessage; return True; def isAndOrPair(self, sValue): """ Checks if sValue is a pair. """ _ = sValue; return False; class TestTypeEflags(TestType): """ Special value parsing for EFLAGS/RFLAGS/FLAGS. """ kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 }; def __init__(self, sName): TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True); def get(self, sValue): fClear = 0; fSet = 0; for sFlag in sValue.split(','): sConstant = g_kdEFlagsMnemonics.get(sFlag, None); if sConstant is None: raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue)) if sConstant[0] == '!': fClear |= g_kdX86EFlagsConstants[sConstant[1:]]; else: fSet |= g_kdX86EFlagsConstants[sConstant]; aoSet = TestType.get(self, '0x%x' % (fSet,)); if fClear != 0: aoClear = TestType.get(self, '%#x' % (fClear,)) assert self.isAndOrPair(sValue) is True; return (aoClear[0], aoSet[0]); assert self.isAndOrPair(sValue) is False; return aoSet; def isAndOrPair(self, sValue): for sZeroFlag in self.kdZeroValueFlags: if sValue.find(sZeroFlag) >= 0: return True; return False; class TestInOut(object): """ One input or output state modifier. This should be thought as values to modify BS3REGCTX and extended (needs to be structured) state. """ ## Assigned operators. kasOperators = [ '&|=', # Special AND(INV)+OR operator for use with EFLAGS. '&~=', '&=', '|=', '=' ]; ## Types kdTypes = { 'uint': TestType('uint', fUnsigned = True), 'int': TestType('int'), 'efl': TestTypeEflags('efl'), }; ## CPU context fields. kdFields = { # name: ( default type, [both|input|output], ) # Operands. 'op1': ( 'uint', 'both', ), ## \@op1 'op2': ( 'uint', 'both', ), ## \@op2 'op3': ( 'uint', 'both', ), ## \@op3 'op4': ( 'uint', 'both', ), ## \@op4 # Flags. 'efl': ( 'efl', 'both', ), 'efl_undef': ( 'uint', 'output', ), # 8-bit GPRs. 'al': ( 'uint', 'both', ), 'cl': ( 'uint', 'both', ), 'dl': ( 'uint', 'both', ), 'bl': ( 'uint', 'both', ), 'ah': ( 'uint', 'both', ), 'ch': ( 'uint', 'both', ), 'dh': ( 'uint', 'both', ), 'bh': ( 'uint', 'both', ), 'r8l': ( 'uint', 'both', ), 'r9l': ( 'uint', 'both', ), 'r10l': ( 'uint', 'both', ), 'r11l': ( 'uint', 'both', ), 'r12l': ( 'uint', 'both', ), 'r13l': ( 'uint', 'both', ), 'r14l': ( 'uint', 'both', ), 'r15l': ( 'uint', 'both', ), # 16-bit GPRs. 'ax': ( 'uint', 'both', ), 'dx': ( 'uint', 'both', ), 'cx': ( 'uint', 'both', ), 'bx': ( 'uint', 'both', ), 'sp': ( 'uint', 'both', ), 'bp': ( 'uint', 'both', ), 'si': ( 'uint', 'both', ), 'di': ( 'uint', 'both', ), 'r8w': ( 'uint', 'both', ), 'r9w': ( 'uint', 'both', ), 'r10w': ( 'uint', 'both', ), 'r11w': ( 'uint', 'both', ), 'r12w': ( 'uint', 'both', ), 'r13w': ( 'uint', 'both', ), 'r14w': ( 'uint', 'both', ), 'r15w': ( 'uint', 'both', ), # 32-bit GPRs. 'eax': ( 'uint', 'both', ), 'edx': ( 'uint', 'both', ), 'ecx': ( 'uint', 'both', ), 'ebx': ( 'uint', 'both', ), 'esp': ( 'uint', 'both', ), 'ebp': ( 'uint', 'both', ), 'esi': ( 'uint', 'both', ), 'edi': ( 'uint', 'both', ), 'r8d': ( 'uint', 'both', ), 'r9d': ( 'uint', 'both', ), 'r10d': ( 'uint', 'both', ), 'r11d': ( 'uint', 'both', ), 'r12d': ( 'uint', 'both', ), 'r13d': ( 'uint', 'both', ), 'r14d': ( 'uint', 'both', ), 'r15d': ( 'uint', 'both', ), # 64-bit GPRs. 'rax': ( 'uint', 'both', ), 'rdx': ( 'uint', 'both', ), 'rcx': ( 'uint', 'both', ), 'rbx': ( 'uint', 'both', ), 'rsp': ( 'uint', 'both', ), 'rbp': ( 'uint', 'both', ), 'rsi': ( 'uint', 'both', ), 'rdi': ( 'uint', 'both', ), 'r8': ( 'uint', 'both', ), 'r9': ( 'uint', 'both', ), 'r10': ( 'uint', 'both', ), 'r11': ( 'uint', 'both', ), 'r12': ( 'uint', 'both', ), 'r13': ( 'uint', 'both', ), 'r14': ( 'uint', 'both', ), 'r15': ( 'uint', 'both', ), # 16-bit, 32-bit or 64-bit registers according to operand size. 'oz.rax': ( 'uint', 'both', ), 'oz.rdx': ( 'uint', 'both', ), 'oz.rcx': ( 'uint', 'both', ), 'oz.rbx': ( 'uint', 'both', ), 'oz.rsp': ( 'uint', 'both', ), 'oz.rbp': ( 'uint', 'both', ), 'oz.rsi': ( 'uint', 'both', ), 'oz.rdi': ( 'uint', 'both', ), 'oz.r8': ( 'uint', 'both', ), 'oz.r9': ( 'uint', 'both', ), 'oz.r10': ( 'uint', 'both', ), 'oz.r11': ( 'uint', 'both', ), 'oz.r12': ( 'uint', 'both', ), 'oz.r13': ( 'uint', 'both', ), 'oz.r14': ( 'uint', 'both', ), 'oz.r15': ( 'uint', 'both', ), # Special ones. 'value.xcpt': ( 'uint', 'output', ), }; def __init__(self, sField, sOp, sValue, sType): assert sField in self.kdFields; assert sOp in self.kasOperators; self.sField = sField; self.sOp = sOp; self.sValue = sValue; self.sType = sType; assert isinstance(sField, str); assert isinstance(sOp, str); assert isinstance(sType, str); assert isinstance(sValue, str); class TestSelector(object): """ One selector for an instruction test. """ ## Selector compare operators. kasCompareOps = [ '==', '!=' ]; ## Selector variables and their valid values. kdVariables = { # Operand size. 'size': { 'o16': 'size_o16', 'o32': 'size_o32', 'o64': 'size_o64', }, # Execution ring. 'ring': { '0': 'ring_0', '1': 'ring_1', '2': 'ring_2', '3': 'ring_3', '0..2': 'ring_0_thru_2', '1..3': 'ring_1_thru_3', }, # Basic code mode. 'codebits': { '64': 'code_64bit', '32': 'code_32bit', '16': 'code_16bit', }, # cpu modes. 'mode': { 'real': 'mode_real', 'prot': 'mode_prot', 'long': 'mode_long', 'v86': 'mode_v86', 'smm': 'mode_smm', 'vmx': 'mode_vmx', 'svm': 'mode_svm', }, # paging on/off 'paging': { 'on': 'paging_on', 'off': 'paging_off', }, }; ## Selector shorthand predicates. ## These translates into variable expressions. kdPredicates = { 'o16': 'size==o16', 'o32': 'size==o32', 'o64': 'size==o64', 'ring0': 'ring==0', '!ring0': 'ring==1..3', 'ring1': 'ring==1', 'ring2': 'ring==2', 'ring3': 'ring==3', 'user': 'ring==3', 'supervisor': 'ring==0..2', 'real': 'mode==real', 'prot': 'mode==prot', 'long': 'mode==long', 'v86': 'mode==v86', 'smm': 'mode==smm', 'vmx': 'mode==vmx', 'svm': 'mode==svm', 'paging': 'paging==on', '!paging': 'paging==off', }; def __init__(self, sVariable, sOp, sValue): assert sVariable in self.kdVariables; assert sOp in self.kasCompareOps; assert sValue in self.kdVariables[sVariable]; self.sVariable = sVariable; self.sOp = sOp; self.sValue = sValue; class InstructionTest(object): """ Instruction test. """ def __init__(self, oInstr): # type: (InstructionTest, Instruction) self.oInstr = oInstr; # type: InstructionTest self.aoInputs = []; # type: list(TestInOut) self.aoOutputs = []; # type: list(TestInOut) self.aoSelectors = []; # type: list(TestSelector) def toString(self, fRepr = False): """ Converts it to string representation. """ asWords = []; if self.aoSelectors: for oSelector in self.aoSelectors: asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,)); asWords.append('/'); for oModifier in self.aoInputs: asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,)); asWords.append('->'); for oModifier in self.aoOutputs: asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,)); if fRepr: return '<' + ' '.join(asWords) + '>'; return ' '.join(asWords); def __str__(self): """ Provide string represenation. """ return self.toString(False); def __repr__(self): """ Provide unambigious string representation. """ return self.toString(True); class Operand(object): """ Instruction operand. """ def __init__(self, sWhere, sType): assert sWhere in g_kdOpLocations, sWhere; assert sType in g_kdOpTypes, sType; self.sWhere = sWhere; ##< g_kdOpLocations self.sType = sType; ##< g_kdOpTypes def usesModRM(self): """ Returns True if using some form of ModR/M encoding. """ return self.sType[0] in ['E', 'G', 'M']; class Instruction(object): # pylint: disable=too-many-instance-attributes """ Instruction. """ def __init__(self, sSrcFile, iLine): ## @name Core attributes. ## @{ self.sMnemonic = None; self.sBrief = None; self.asDescSections = []; # type: list(str) self.aoMaps = []; # type: list(InstructionMap) self.aoOperands = []; # type: list(Operand) self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2 self.sOpcode = None; # type: str self.sEncoding = None; self.asFlTest = None; self.asFlModify = None; self.asFlUndefined = None; self.asFlSet = None; self.asFlClear = None; self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value). self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic. self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND. self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction. self.aoTests = []; # type: list(InstructionTest) self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is. self.oCpuExpr = None; ##< Some CPU restriction expression... self.sGroup = None; self.fUnused = False; ##< Unused instruction. self.fInvalid = False; ##< Invalid instruction (like UD2). self.sInvalidStyle = None; ##< Invalid behviour style ## @} ## @name Implementation attributes. ## @{ self.sStats = None; self.sFunction = None; self.fStub = False; self.fUdStub = False; ## @} ## @name Decoding info ## @{ self.sSrcFile = sSrcFile; self.iLineCreated = iLine; self.iLineCompleted = None; self.cOpTags = 0; self.iLineFnIemOpMacro = -1; self.iLineMnemonicMacro = -1; ## @} ## @name Intermediate input fields. ## @{ self.sRawDisOpNo = None; self.asRawDisParams = []; self.sRawIemOpFlags = None; self.sRawOldOpcodes = None; self.asCopyTests = []; ## @} def toString(self, fRepr = False): """ Turn object into a string. """ aasFields = []; aasFields.append(['opcode', self.sOpcode]); aasFields.append(['mnemonic', self.sMnemonic]); for iOperand, oOperand in enumerate(self.aoOperands): aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]); if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]); aasFields.append(['encoding', self.sEncoding]); if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]); aasFields.append(['disenum', self.sDisEnum]); if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]); aasFields.append(['group', self.sGroup]); if self.fUnused: aasFields.append(['unused', 'True']); if self.fInvalid: aasFields.append(['invalid', 'True']); aasFields.append(['invlstyle', self.sInvalidStyle]); aasFields.append(['fltest', self.asFlTest]); aasFields.append(['flmodify', self.asFlModify]); aasFields.append(['flundef', self.asFlUndefined]); aasFields.append(['flset', self.asFlSet]); aasFields.append(['flclear', self.asFlClear]); aasFields.append(['mincpu', self.sMinCpu]); aasFields.append(['stats', self.sStats]); aasFields.append(['sFunction', self.sFunction]); if self.fStub: aasFields.append(['fStub', 'True']); if self.fUdStub: aasFields.append(['fUdStub', 'True']); if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]); if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]); if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]); sRet = '<' if fRepr else ''; for sField, sValue in aasFields: if sValue != None: if len(sRet) > 1: sRet += '; '; sRet += '%s=%s' % (sField, sValue,); if fRepr: sRet += '>'; return sRet; def __str__(self): """ Provide string represenation. """ return self.toString(False); def __repr__(self): """ Provide unambigious string representation. """ return self.toString(True); def getOpcodeByte(self): """ Decodes sOpcode into a byte range integer value. Raises exception if sOpcode is None or invalid. """ if self.sOpcode is None: raise Exception('No opcode byte for %s!' % (self,)); sOpcode = str(self.sOpcode); # pylint type confusion workaround. # Full hex byte form. if sOpcode[:2] == '0x': return int(sOpcode, 16); # The /r form: if sOpcode[0] == '/' and sOpcode[1].isdigit() and len(sOpcode) == 2: return int(sOpcode[1:]) << 3; raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,)); @staticmethod def _flagsToIntegerMask(asFlags): """ Returns the integer mask value for asFlags. """ uRet = 0; if asFlags: for sFlag in asFlags: sConstant = g_kdEFlagsMnemonics[sFlag]; assert sConstant[0] != '!', sConstant uRet |= g_kdX86EFlagsConstants[sConstant]; return uRet; def getTestedFlagsMask(self): """ Returns asFlTest into a integer mask value """ return self._flagsToIntegerMask(self.asFlTest); def getModifiedFlagsMask(self): """ Returns asFlModify into a integer mask value """ return self._flagsToIntegerMask(self.asFlModify); def getUndefinedFlagsMask(self): """ Returns asFlUndefined into a integer mask value """ return self._flagsToIntegerMask(self.asFlUndefined); def getSetFlagsMask(self): """ Returns asFlSet into a integer mask value """ return self._flagsToIntegerMask(self.asFlSet); def getClearedFlagsMask(self): """ Returns asFlClear into a integer mask value """ return self._flagsToIntegerMask(self.asFlClear); ## All the instructions. g_aoAllInstructions = []; # type: list(Instruction) ## All the instructions indexed by statistics name (opstat). g_dAllInstructionsByStat = {}; # type: dict(Instruction) ## All the instructions indexed by function name (opfunction). g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction)) ## Instructions tagged by oponlytest g_aoOnlyTestInstructions = []; # type: list(Instruction) ## Instruction maps. g_dInstructionMaps = { 'one': InstructionMap('one'), 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]), 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'), 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'), 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'), 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'), 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'), 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'), 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'), 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'), 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'), 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'), 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'), 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'), 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'), 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'), 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'), 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'), 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'), 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'), 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'), 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'), 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'), 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'), 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'), 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'), 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'), 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'), 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'), 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]), 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]), 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'), 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'), 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'), 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'), 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'), 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'), 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'), 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'), 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'), 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'), 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'), 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'), 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'), 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'), 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'), }; class ParserException(Exception): """ Parser exception """ def __init__(self, sMessage): Exception.__init__(self, sMessage); class SimpleParser(object): """ Parser of IEMAllInstruction*.cpp.h instruction specifications. """ ## @name Parser state. ## @{ kiCode = 0; kiCommentMulti = 1; ## @} def __init__(self, sSrcFile, asLines, sDefaultMap): self.sSrcFile = sSrcFile; self.asLines = asLines; self.iLine = 0; self.iState = self.kiCode; self.sComment = ''; self.iCommentLine = 0; self.aoCurInstrs = []; assert sDefaultMap in g_dInstructionMaps; self.oDefaultMap = g_dInstructionMaps[sDefaultMap]; self.cTotalInstr = 0; self.cTotalStubs = 0; self.cTotalTagged = 0; self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$'); self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$'); self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$'); self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$'); self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$'); self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$'); self.fDebug = True; self.dTagHandlers = { '@opbrief': self.parseTagOpBrief, '@opdesc': self.parseTagOpDesc, '@opmnemonic': self.parseTagOpMnemonic, '@op1': self.parseTagOpOperandN, '@op2': self.parseTagOpOperandN, '@op3': self.parseTagOpOperandN, '@op4': self.parseTagOpOperandN, '@oppfx': self.parseTagOpPfx, '@opmaps': self.parseTagOpMaps, '@opcode': self.parseTagOpcode, '@openc': self.parseTagOpEnc, '@opfltest': self.parseTagOpEFlags, '@opflmodify': self.parseTagOpEFlags, '@opflundef': self.parseTagOpEFlags, '@opflset': self.parseTagOpEFlags, '@opflclear': self.parseTagOpEFlags, '@ophints': self.parseTagOpHints, '@opdisenum': self.parseTagOpDisEnum, '@opmincpu': self.parseTagOpMinCpu, '@opcpuid': self.parseTagOpCpuId, '@opgroup': self.parseTagOpGroup, '@opunused': self.parseTagOpUnusedInvalid, '@opinvalid': self.parseTagOpUnusedInvalid, '@opinvlstyle': self.parseTagOpUnusedInvalid, '@optest': self.parseTagOpTest, '@optestign': self.parseTagOpTestIgnore, '@optestignore': self.parseTagOpTestIgnore, '@opcopytests': self.parseTagOpCopyTests, '@oponlytest': self.parseTagOpOnlyTest, '@opstats': self.parseTagOpStats, '@opfunction': self.parseTagOpFunction, '@opdone': self.parseTagOpDone, }; self.asErrors = []; def raiseError(self, sMessage): """ Raise error prefixed with the source and line number. """ raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,)); def raiseCommentError(self, iLineInComment, sMessage): """ Similar to raiseError, but the line number is iLineInComment + self.iCommentLine. """ raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,)); def error(self, sMessage): """ Adds an error. returns False; """ self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,)); return False; def errorComment(self, iLineInComment, sMessage): """ Adds a comment error. returns False; """ self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,)); return False; def printErrors(self): """ Print the errors to stderr. Returns number of errors. """ if self.asErrors: sys.stderr.write(u''.join(self.asErrors)); return len(self.asErrors); def debug(self, sMessage): """ For debugging. """ if self.fDebug: print('debug: %s' % (sMessage,)); def addInstruction(self, iLine = None): """ Adds an instruction. """ oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine); g_aoAllInstructions.append(oInstr); self.aoCurInstrs.append(oInstr); return oInstr; def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats): """ Derives the mnemonic and operands from a IEM stats base name like string. """ if oInstr.sMnemonic is None: asWords = sStats.split('_'); oInstr.sMnemonic = asWords[0].lower(); if len(asWords) > 1 and not oInstr.aoOperands: for sType in asWords[1:]: if sType in g_kdOpTypes: oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType)); else: #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr)) return False; return True; def doneInstructionOne(self, oInstr, iLine): """ Complete the parsing by processing, validating and expanding raw inputs. """ assert oInstr.iLineCompleted is None; oInstr.iLineCompleted = iLine; # # Specified instructions. # if oInstr.cOpTags > 0: if oInstr.sStats is None: pass; # # Unspecified legacy stuff. We generally only got a few things to go on here. # /** Opcode 0x0f 0x00 /0. */ # FNIEMOPRM_DEF(iemOp_Grp6_sldt) # else: #if oInstr.sRawOldOpcodes: # #if oInstr.sMnemonic: pass; # # Common defaults. # # Guess mnemonic and operands from stats if the former is missing. if oInstr.sMnemonic is None: if oInstr.sStats is not None: self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats); elif oInstr.sFunction is not None: self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', '')); # Derive the disassembler op enum constant from the mnemonic. if oInstr.sDisEnum is None and oInstr.sMnemonic is not None: oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper(); # Derive the IEM statistics base name from mnemonic and operand types. if oInstr.sStats is None: if oInstr.sFunction is not None: oInstr.sStats = oInstr.sFunction.replace('iemOp_', ''); elif oInstr.sMnemonic is not None: oInstr.sStats = oInstr.sMnemonic; for oOperand in oInstr.aoOperands: if oOperand.sType: oInstr.sStats += '_' + oOperand.sType; # Derive the IEM function name from mnemonic and operand types. if oInstr.sFunction is None: if oInstr.sMnemonic is not None: oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic; for oOperand in oInstr.aoOperands: if oOperand.sType: oInstr.sFunction += '_' + oOperand.sType; elif oInstr.sStats: oInstr.sFunction = 'iemOp_' + oInstr.sStats; # Derive encoding from operands. if oInstr.sEncoding is None: if not oInstr.aoOperands: oInstr.sEncoding = 'fixed'; elif oInstr.aoOperands[0].usesModRM(): if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv': oInstr.sEncoding = 'ModR/M+VEX'; else: oInstr.sEncoding = 'ModR/M'; # # Apply default map and then add the instruction to all it's groups. # if not oInstr.aoMaps: oInstr.aoMaps = [ self.oDefaultMap, ]; for oMap in oInstr.aoMaps: oMap.aoInstructions.append(oInstr); # # Check the opstat value and add it to the opstat indexed dictionary. # if oInstr.sStats: if oInstr.sStats not in g_dAllInstructionsByStat: g_dAllInstructionsByStat[oInstr.sStats] = oInstr; else: self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s' % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],)); # # Add to function indexed dictionary. We allow multiple instructions per function. # if oInstr.sFunction: if oInstr.sFunction not in g_dAllInstructionsByFunction: g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,]; else: g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr); #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags)); return True; def doneInstructions(self, iLineInComment = None): """ Done with current instruction. """ for oInstr in self.aoCurInstrs: self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment); if oInstr.fStub: self.cTotalStubs += 1; self.cTotalInstr += len(self.aoCurInstrs); self.sComment = ''; self.aoCurInstrs = []; return True; def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False): """ Sets the sAttrib of all current instruction to oValue. If fOverwrite is False, only None values and empty strings are replaced. """ for oInstr in self.aoCurInstrs: if fOverwrite is not True: oOldValue = getattr(oInstr, sAttrib); if oOldValue is not None: continue; setattr(oInstr, sAttrib, oValue); def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False): """ Sets the iEntry of the array sAttrib of all current instruction to oValue. If fOverwrite is False, only None values and empty strings are replaced. """ for oInstr in self.aoCurInstrs: aoArray = getattr(oInstr, sAttrib); while len(aoArray) <= iEntry: aoArray.append(None); if fOverwrite is True or aoArray[iEntry] is None: aoArray[iEntry] = oValue; def parseCommentOldOpcode(self, asLines): """ Deals with 'Opcode 0xff /4' like comments """ asWords = asLines[0].split(); if len(asWords) >= 2 \ and asWords[0] == 'Opcode' \ and ( asWords[1].startswith('0x') or asWords[1].startswith('0X')): asWords = asWords[:1]; for iWord, sWord in enumerate(asWords): if sWord.startswith('0X'): sWord = '0x' + sWord[:2]; asWords[iWord] = asWords; self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords)); return False; def ensureInstructionForOpTag(self, iTagLine): """ Ensure there is an instruction for the op-tag being parsed. """ if not self.aoCurInstrs: self.addInstruction(self.iCommentLine + iTagLine); for oInstr in self.aoCurInstrs: oInstr.cOpTags += 1; if oInstr.cOpTags == 1: self.cTotalTagged += 1; return self.aoCurInstrs[-1]; @staticmethod def flattenSections(aasSections): """ Flattens multiline sections into stripped single strings. Returns list of strings, on section per string. """ asRet = []; for asLines in aasSections: if asLines: asRet.append(' '.join([sLine.strip() for sLine in asLines])); return asRet; @staticmethod def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'): """ Flattens sections into a simple stripped string with newlines as section breaks. The final section does not sport a trailing newline. """ # Typical: One section with a single line. if len(aasSections) == 1 and len(aasSections[0]) == 1: return aasSections[0][0].strip(); sRet = ''; for iSection, asLines in enumerate(aasSections): if asLines: if iSection > 0: sRet += sSectionSep; sRet += sLineSep.join([sLine.strip() for sLine in asLines]); return sRet; ## @name Tag parsers ## @{ def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opbrief Value: Text description, multiple sections, appended. Brief description. If not given, it's the first sentence from @opdesc. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sBrief = self.flattenAllSections(aasSections); if not sBrief: return self.errorComment(iTagLine, '%s: value required' % (sTag,)); if sBrief[-1] != '.': sBrief = sBrief + '.'; if len(sBrief) > 180: return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief)); offDot = sBrief.find('.'); while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ': offDot = sBrief.find('.', offDot + 1); if offDot >= 0 and offDot != len(sBrief) - 1: return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief)); # Update the instruction. if oInstr.sBrief is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"' % (sTag, oInstr.sBrief, sBrief,)); _ = iEndLine; return True; def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opdesc Value: Text description, multiple sections, appended. It is used to describe instructions. """ oInstr = self.ensureInstructionForOpTag(iTagLine); if aasSections: oInstr.asDescSections.extend(self.flattenSections(aasSections)); return True; _ = sTag; _ = iEndLine; return True; def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: @opmenmonic Value: mnemonic The 'mnemonic' value must be a valid C identifier string. Because of prefixes, groups and whatnot, there times when the mnemonic isn't that of an actual assembler mnemonic. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sMnemonic = self.flattenAllSections(aasSections); if not self.oReMnemonic.match(sMnemonic): return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,)); if oInstr.sMnemonic is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"' % (sTag, oInstr.sMnemonic, sMnemonic,)); oInstr.sMnemonic = sMnemonic _ = iEndLine; return True; def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine): """ Tags: \@op1, \@op2, \@op3, \@op4 Value: [where:]type The 'where' value indicates where the operand is found, like the 'reg' part of the ModR/M encoding. See Instruction.kdOperandLocations for a list. The 'type' value indicates the operand type. These follow the types given in the opcode tables in the CPU reference manuals. See Instruction.kdOperandTypes for a list. """ oInstr = self.ensureInstructionForOpTag(iTagLine); idxOp = int(sTag[-1]) - 1; assert idxOp >= 0 and idxOp < 4; # flatten, split up, and validate the "where:type" value. sFlattened = self.flattenAllSections(aasSections); asSplit = sFlattened.split(':'); if len(asSplit) == 1: sType = asSplit[0]; sWhere = None; elif len(asSplit) == 2: (sWhere, sType) = asSplit; else: return self.errorComment(iTagLine, 'expected %s value on format "[:]" not "%s"' % (sTag, sFlattened,)); if sType not in g_kdOpTypes: return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s' % (sTag, sType, ', '.join(g_kdOpTypes.keys()),)); if sWhere is None: sWhere = g_kdOpTypes[sType][1]; elif sWhere not in g_kdOpLocations: return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s' % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),)); # Insert the operand, refusing to overwrite an existing one. while idxOp >= len(oInstr.aoOperands): oInstr.aoOperands.append(None); if oInstr.aoOperands[idxOp] is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"' % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType, sWhere, sType,)); oInstr.aoOperands[idxOp] = Operand(sWhere, sType); _ = iEndLine; return True; def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opmaps Value: map[,map2] Indicates which maps the instruction is in. There is a default map associated with each input file. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten, split up and validate the value. sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ','); asMaps = sFlattened.split(','); if not asMaps: return self.errorComment(iTagLine, '%s: value required' % (sTag,)); for sMap in asMaps: if sMap not in g_dInstructionMaps: return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)' % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),)); # Add the maps to the current list. Throw errors on duplicates. for oMap in oInstr.aoMaps: if oMap.sName in asMaps: return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName)); for sMap in asMaps: oMap = g_dInstructionMaps[sMap]; if oMap not in oInstr.aoMaps: oInstr.aoMaps.append(oMap); else: self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap)); _ = iEndLine; return True; def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@oppfx Value: none|0x66|0xf3|0xf2 Required prefix for the instruction. (In a (E)VEX context this is the value of the 'pp' field rather than an actual prefix.) """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sFlattened = self.flattenAllSections(aasSections); asPrefixes = sFlattened.split(); if len(asPrefixes) > 1: return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,)); sPrefix = asPrefixes[0].lower(); if sPrefix == 'none': sPrefix = None; else: if len(sPrefix) == 2: sPrefix = '0x' + sPrefix; if not _isValidOpcodeByte(sPrefix): return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,)); if sPrefix is not None and sPrefix not in g_kdPrefixes: return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,)); # Set it. if oInstr.sPrefix is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,)); oInstr.sPrefix = sPrefix; _ = iEndLine; return True; def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opcode Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg The opcode byte or sub-byte for the instruction in the context of a map. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sOpcode = self.flattenAllSections(aasSections); if sOpcode in g_kdSpecialOpcodes: pass; elif not _isValidOpcodeByte(sOpcode): return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,)); # Set it. if oInstr.sOpcode is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,)); oInstr.sOpcode = sOpcode; _ = iEndLine; return True; def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@openc Value: ModR/M|fixed|prefix| The instruction operand encoding style. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sEncoding = self.flattenAllSections(aasSections); if sEncoding in g_kdEncodings: pass; elif sEncoding in g_dInstructionMaps: pass; elif not _isValidOpcodeByte(sEncoding): return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,)); # Set it. if oInstr.sEncoding is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sEncoding, sEncoding,)); oInstr.sEncoding = sEncoding; _ = iEndLine; return True; ## EFlags tag to Instruction attribute name. kdOpFlagToAttr = { '@opfltest': 'asFlTest', '@opflmodify': 'asFlModify', '@opflundef': 'asFlUndefined', '@opflset': 'asFlSet', '@opflclear': 'asFlClear', }; def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine): """ Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear Value: """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten, split up and validate the values. asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(','); if len(asFlags) == 1 and asFlags[0].lower() == 'none': asFlags = []; else: fRc = True; for iFlag, sFlag in enumerate(asFlags): if sFlag not in g_kdEFlagsMnemonics: if sFlag.strip() in g_kdEFlagsMnemonics: asFlags[iFlag] = sFlag.strip(); else: fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,)); if not fRc: return False; # Set them. asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]); if asOld is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,)); setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags); _ = iEndLine; return True; def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@ophints Value: Comma or space separated list of flags and hints. This covers the disassembler flags table and more. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split(); if len(asHints) == 1 and asHints[0].lower() == 'none': asHints = []; else: fRc = True; for iHint, sHint in enumerate(asHints): if sHint not in g_kdHints: if sHint.strip() in g_kdHints: sHint[iHint] = sHint.strip(); else: fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,)); if not fRc: return False; # Append them. for sHint in asHints: if sHint not in oInstr.dHints: oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed) else: self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,)); _ = iEndLine; return True; def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opdisenum Value: OP_XXXX This is for select a specific (legacy) disassembler enum value for the instruction. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and split. asWords = self.flattenAllSections(aasSections).split(); if len(asWords) != 1: self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,)); if not asWords: return False; sDisEnum = asWords[0]; if not self.oReDisEnum.match(sDisEnum): return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)' % (sTag, sDisEnum, self.oReDisEnum.pattern)); # Set it. if oInstr.sDisEnum is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,)); oInstr.sDisEnum = sDisEnum; _ = iEndLine; return True; def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opmincpu Value: Indicates when this instruction was introduced. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten the value, split into words, make sure there's just one, valid it. asCpus = self.flattenAllSections(aasSections).split(); if len(asCpus) > 1: self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),)); sMinCpu = asCpus[0]; if sMinCpu in g_kdCpuNames: oInstr.sMinCpu = sMinCpu; else: return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)' % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),)); # Set it. if oInstr.sMinCpu is None: oInstr.sMinCpu = sMinCpu; elif oInstr.sMinCpu != sMinCpu: self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,)); _ = iEndLine; return True; def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opcpuid Value: none | CPUID feature bit which is required for the instruction to be present. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split(); if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none': asCpuIds = []; else: fRc = True; for iCpuId, sCpuId in enumerate(asCpuIds): if sCpuId not in g_kdCpuIdFlags: if sCpuId.strip() in g_kdCpuIdFlags: sCpuId[iCpuId] = sCpuId.strip(); else: fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,)); if not fRc: return False; # Append them. for sCpuId in asCpuIds: if sCpuId not in oInstr.asCpuIds: oInstr.asCpuIds.append(sCpuId); else: self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,)); _ = iEndLine; return True; def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opgroup Value: op_grp1[_subgrp2[_subsubgrp3]] Instruction grouping. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asGroups = self.flattenAllSections(aasSections).split(); if len(asGroups) != 1: return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,)); sGroup = asGroups[0]; if not self.oReGroupName.match(sGroup): return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)' % (sTag, sGroup, self.oReGroupName.pattern)); # Set it. if oInstr.sGroup is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,)); oInstr.sGroup = sGroup; _ = iEndLine; return True; def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opunused, \@opinvalid, \@opinvlstyle Value: The \@opunused indicates the specification is for a currently unused instruction encoding. The \@opinvalid indicates the specification is for an invalid currently instruction encoding (like UD2). The \@opinvlstyle just indicates how CPUs decode the instruction when not supported (\@opcpuid, \@opmincpu) or disabled. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asStyles = self.flattenAllSections(aasSections).split(); if len(asStyles) != 1: return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,)); sStyle = asStyles[0]; if sStyle not in g_kdInvalidStyles: return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)' % (sTag, sStyle, g_kdInvalidStyles.keys(),)); # Set it. if oInstr.sInvlStyle is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)' % ( sTag, oInstr.sInvlStyle, sStyle,)); oInstr.sInvlStyle = sStyle; if sTag == '@opunused': oInstr.fUnused = True; elif sTag == '@opinvalid': oInstr.fInvalid = True; _ = iEndLine; return True; def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals """ Tag: \@optest Value: [[ ]?] -> Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p? The main idea here is to generate basic instruction tests. The probably simplest way of handling the diverse input, would be to use it to produce size optimized byte code for a simple interpreter that modifies the register input and output states. An alternative to the interpreter would be creating multiple tables, but that becomes rather complicated wrt what goes where and then to use them in an efficient manner. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # # Do it section by section. # for asSectionLines in aasSections: # # Sort the input into outputs, inputs and selector conditions. # sFlatSection = self.flattenAllSections([asSectionLines,]); if not sFlatSection: self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections)); continue; oTest = InstructionTest(oInstr); asSelectors = []; asInputs = []; asOutputs = []; asCur = asOutputs; fRc = True; asWords = sFlatSection.split(); for iWord in range(len(asWords) - 1, -1, -1): sWord = asWords[iWord]; # Check for array switchers. if sWord == '->': if asCur != asOutputs: fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,)); break; asCur = asInputs; elif sWord == '/': if asCur != asInputs: fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,)); break; asCur = asSelectors; else: asCur.insert(0, sWord); # # Validate and add selectors. # for sCond in asSelectors: sCondExp = TestSelector.kdPredicates.get(sCond, sCond); oSelector = None; for sOp in TestSelector.kasCompareOps: off = sCondExp.find(sOp); if off >= 0: sVariable = sCondExp[:off]; sValue = sCondExp[off + len(sOp):]; if sVariable in TestSelector.kdVariables: if sValue in TestSelector.kdVariables[sVariable]: oSelector = TestSelector(sVariable, sOp, sValue); else: self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)' % ( sTag, sValue, sCond, TestSelector.kdVariables[sVariable].keys(),)); else: self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)' % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),)); break; if oSelector is not None: for oExisting in oTest.aoSelectors: if oExisting.sVariable == oSelector.sVariable: self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)' % ( sTag, oSelector.sVariable, oExisting, oSelector,)); oTest.aoSelectors.append(oSelector); else: fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,)); # # Validate outputs and inputs, adding them to the test as we go along. # for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]: asValidFieldKinds = [ 'both', sDesc, ]; for sItem in asItems: oItem = None; for sOp in TestInOut.kasOperators: off = sItem.find(sOp); if off < 0: continue; sField = sItem[:off]; sValueType = sItem[off + len(sOp):]; if sField in TestInOut.kdFields \ and TestInOut.kdFields[sField][1] in asValidFieldKinds: asSplit = sValueType.split(':', 1); sValue = asSplit[0]; sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0]; if sType in TestInOut.kdTypes: oValid = TestInOut.kdTypes[sType].validate(sValue); if oValid is True: if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=': oItem = TestInOut(sField, sOp, sValue, sType); else: self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="' % ( sTag, sDesc, sItem, )); else: self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s' % ( sTag, sDesc, sValue, sItem, sType, oValid, )); else: self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)' % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),)); else: self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s' % ( sTag, sDesc, sField, sItem, ', '.join([sKey for sKey in TestInOut.kdFields.keys() if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),)); break; if oItem is not None: for oExisting in aoDst: if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp: self.errorComment(iTagLine, '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)' % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,)); aoDst.append(oItem); else: fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,)); # # . # if fRc: oInstr.aoTests.append(oTest); else: self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),)); self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s' % (sTag, asSelectors, asInputs, asOutputs,)); _ = iEndLine; return True; def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@optestign | \@optestignore Value: This is a simple trick to ignore a test while debugging another. See also \@oponlytest. """ _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine; return True; def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opcopytests Value: [..] Example: \@opcopytests add_Eb_Gb Trick to avoid duplicating tests for different encodings of the same operation. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten, validate and append the copy job to the instruction. We execute # them after parsing all the input so we can handle forward references. asToCopy = self.flattenAllSections(aasSections).split(); if not asToCopy: return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,)); for sToCopy in asToCopy: if sToCopy not in oInstr.asCopyTests: if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy): oInstr.asCopyTests.append(sToCopy); else: self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)' % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern)); else: self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,)); _ = iEndLine; return True; def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@oponlytest Value: none Only test instructions with this tag. This is a trick that is handy for singling out one or two new instructions or tests. See also \@optestignore. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Validate and add instruction to only test dictionary. sValue = self.flattenAllSections(aasSections).strip(); if sValue: return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue)); if oInstr not in g_aoOnlyTestInstructions: g_aoOnlyTestInstructions.append(oInstr); _ = iEndLine; return True; def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opfunction Value: This is for explicitly setting the IEM function name. Normally we pick this up from the FNIEMOP_XXX macro invocation after the description, or generate it from the mnemonic and operands. It it thought it maybe necessary to set it when specifying instructions which implementation isn't following immediately or aren't implemented yet. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sFunction = self.flattenAllSections(aasSections); if not self.oReFunctionName.match(sFunction): return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)' % (sTag, sFunction, self.oReFunctionName.pattern)); if oInstr.sFunction is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"' % (sTag, oInstr.sFunction, sFunction,)); oInstr.sFunction = sFunction; _ = iEndLine; return True; def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opstats Value: This is for explicitly setting the statistics name. Normally we pick this up from the IEMOP_MNEMONIC macro invocation, or generate it from the mnemonic and operands. It it thought it maybe necessary to set it when specifying instructions which implementation isn't following immediately or aren't implemented yet. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sStats = self.flattenAllSections(aasSections); if not self.oReStatsName.match(sStats): return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)' % (sTag, sStats, self.oReStatsName.pattern)); if oInstr.sStats is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"' % (sTag, oInstr.sStats, sStats,)); oInstr.sStats = sStats; _ = iEndLine; return True; def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opdone Value: none Used to explictily flush the instructions that have been specified. """ sFlattened = self.flattenAllSections(aasSections); if sFlattened != '': return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,)); _ = sTag; _ = iEndLine; return self.doneInstructions(); ## @} def parseComment(self): """ Parse the current comment (self.sComment). If it's a opcode specifiying comment, we reset the macro stuff. """ # # Reject if comment doesn't seem to contain anything interesting. # if self.sComment.find('Opcode') < 0 \ and self.sComment.find('@') < 0: return False; # # Split the comment into lines, removing leading asterisks and spaces. # Also remove leading and trailing empty lines. # asLines = self.sComment.split('\n'); for iLine, sLine in enumerate(asLines): asLines[iLine] = sLine.lstrip().lstrip('*').lstrip(); while asLines and not asLines[0]: self.iCommentLine += 1; asLines.pop(0); while asLines and not asLines[-1]: asLines.pop(len(asLines) - 1); # # Check for old style: Opcode 0x0f 0x12 # if asLines[0].startswith('Opcode '): self.parseCommentOldOpcode(asLines); # # Look for @op* tagged data. # cOpTags = 0; sFlatDefault = None; sCurTag = '@default'; iCurTagLine = 0; asCurSection = []; aasSections = [ asCurSection, ]; for iLine, sLine in enumerate(asLines): if not sLine.startswith('@'): if sLine: asCurSection.append(sLine); elif asCurSection: asCurSection = []; aasSections.append(asCurSection); else: # # Process the previous tag. # if not asCurSection and len(aasSections) > 1: aasSections.pop(-1); if sCurTag in self.dTagHandlers: self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine); cOpTags += 1; elif sCurTag.startswith('@op'): self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag)); elif sCurTag == '@default': sFlatDefault = self.flattenAllSections(aasSections); elif '@op' + sCurTag[1:] in self.dTagHandlers: self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag)); elif sCurTag in ['@encoding', '@opencoding']: self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,)); # # New tag. # asSplit = sLine.split(None, 1); sCurTag = asSplit[0].lower(); if len(asSplit) > 1: asCurSection = [asSplit[1],]; else: asCurSection = []; aasSections = [asCurSection, ]; iCurTagLine = iLine; # # Process the final tag. # if not asCurSection and len(aasSections) > 1: aasSections.pop(-1); if sCurTag in self.dTagHandlers: self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine); cOpTags += 1; elif sCurTag.startswith('@op'): self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag)); elif sCurTag == '@default': sFlatDefault = self.flattenAllSections(aasSections); # # Don't allow default text in blocks containing @op*. # if cOpTags > 0 and sFlatDefault: self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,)); return True; def parseMacroInvocation(self, sInvocation): """ Parses a macro invocation. Returns a tuple, first element is the offset following the macro invocation. The second element is a list of macro arguments, where the zero'th is the macro name. """ # First the name. offOpen = sInvocation.find('('); if offOpen <= 0: self.raiseError("macro invocation open parenthesis not found"); sName = sInvocation[:offOpen].strip(); if not self.oReMacroName.match(sName): return self.error("invalid macro name '%s'" % (sName,)); asRet = [sName, ]; # Arguments. iLine = self.iLine; cDepth = 1; off = offOpen + 1; offStart = off; while cDepth > 0: if off >= len(sInvocation): if iLine >= len(self.asLines): return self.error('macro invocation beyond end of file'); sInvocation += self.asLines[iLine]; iLine += 1; ch = sInvocation[off]; if ch == ',' or ch == ')': if cDepth == 1: asRet.append(sInvocation[offStart:off].strip()); offStart = off + 1; if ch == ')': cDepth -= 1; elif ch == '(': cDepth += 1; off += 1; return (off, asRet); def findAndParseMacroInvocationEx(self, sCode, sMacro): """ Returns (len(sCode), None) if not found, parseMacroInvocation result if found. """ offHit = sCode.find(sMacro); if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(': offAfter, asRet = self.parseMacroInvocation(sCode[offHit:]) return (offHit + offAfter, asRet); return (len(sCode), None); def findAndParseMacroInvocation(self, sCode, sMacro): """ Returns None if not found, arguments as per parseMacroInvocation if found. """ return self.findAndParseMacroInvocationEx(sCode, sMacro)[1]; def findAndParseFirstMacroInvocation(self, sCode, asMacro): """ Returns same as findAndParseMacroInvocation. """ for sMacro in asMacro: asRet = self.findAndParseMacroInvocation(sCode, sMacro); if asRet is not None: return asRet; return None; def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments sDisHints, sIemHints, asOperands): """ Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX, IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros. """ # # Some invocation checks. # if sUpper != sUpper.upper(): self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,)); if sLower != sLower.lower(): self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,)); if sUpper.lower() != sLower: self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,)); if not self.oReMnemonic.match(sLower): self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,)); # # Check if sIemHints tells us to not consider this macro invocation. # if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0: return True; # Apply to the last instruction only for now. if not self.aoCurInstrs: self.addInstruction(); oInstr = self.aoCurInstrs[-1]; if oInstr.iLineMnemonicMacro == -1: oInstr.iLineMnemonicMacro = self.iLine; else: self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction' % (sMacro, oInstr.iLineMnemonicMacro,)); # Mnemonic if oInstr.sMnemonic is None: oInstr.sMnemonic = sLower; elif oInstr.sMnemonic != sLower: self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,)); # Process operands. if len(oInstr.aoOperands) not in [0, len(asOperands)]: self.error('%s: number of operands given by @opN does not match macro: %s vs %s' % (sMacro, len(oInstr.aoOperands), len(asOperands),)); for iOperand, sType in enumerate(asOperands): sWhere = g_kdOpTypes.get(sType, [None, None])[1]; if sWhere is None: self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType)); if iOperand < len(oInstr.aoOperands): # error recovery. sWhere = oInstr.aoOperands[iOperand].sWhere; sType = oInstr.aoOperands[iOperand].sType; else: sWhere = 'reg'; sType = 'Gb'; if iOperand == len(oInstr.aoOperands): oInstr.aoOperands.append(Operand(sWhere, sType)) elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType: self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s' % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, oInstr.aoOperands[iOperand].sType, sWhere, sType,)); # Encoding. if sForm not in g_kdIemForms: self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,)); else: if oInstr.sEncoding is None: oInstr.sEncoding = g_kdIemForms[sForm][0]; elif g_kdIemForms[sForm][0] != oInstr.sEncoding: self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)' % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm)); # Check the parameter locations for the encoding. if g_kdIemForms[sForm][1] is not None: for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]): if oInstr.aoOperands[iOperand].sWhere != sWhere: self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)' % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,)); # Stats. if not self.oReStatsName.match(sStats): self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,)); elif oInstr.sStats is None: oInstr.sStats = sStats; elif oInstr.sStats != sStats: self.error('%s: mismatching @opstats and a_Stats value: %s vs %s' % (sMacro, oInstr.sStats, sStats,)); # Process the hints (simply merge with @ophints w/o checking anything). for sHint in sDisHints.split('|'): sHint = sHint.strip(); if sHint.startswith('DISOPTYPE_'): sShortHint = sHint[len('DISOPTYPE_'):].lower(); if sShortHint in g_kdHints: oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed) else: self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,)); elif sHint != '0': self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,)); for sHint in sIemHints.split('|'): sHint = sHint.strip(); if sHint.startswith('IEMOPHINT_'): sShortHint = sHint[len('IEMOPHINT_'):].lower(); if sShortHint in g_kdHints: oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed) else: self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,)); elif sHint != '0': self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,)); _ = sAsm; return True; def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands): """ Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2, IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros. """ if not asOperands: return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands); return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands), sForm, sUpper, sLower, sDisHints, sIemHints, asOperands); def checkCodeForMacro(self, sCode): """ Checks code for relevant macro invocation. """ # # Scan macro invocations. # if sCode.find('(') > 0: # Look for instruction decoder function definitions. ASSUME single line. asArgs = self.findAndParseFirstMacroInvocation(sCode, [ 'FNIEMOP_DEF', 'FNIEMOP_STUB', 'FNIEMOP_STUB_1', 'FNIEMOP_UD_STUB', 'FNIEMOP_UD_STUB_1' ]); if asArgs is not None: sFunction = asArgs[1]; if not self.aoCurInstrs: self.addInstruction(); for oInstr in self.aoCurInstrs: if oInstr.iLineFnIemOpMacro == -1: oInstr.iLineFnIemOpMacro = self.iLine; else: self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) ); self.setInstrunctionAttrib('sFunction', sFunction); self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True); self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True); if asArgs[0].find('STUB') > 0: self.doneInstructions(); return True; # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC'); if asArgs is not None: if len(self.aoCurInstrs) == 1: oInstr = self.aoCurInstrs[0]; if oInstr.sStats is None: oInstr.sStats = asArgs[1]; self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]); # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7], []); # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8], [asArgs[6],]); # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9], [asArgs[6], asArgs[7]]); # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9], asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]); # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, # a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10], asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]); # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []); # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]); # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7], [asArgs[4], asArgs[5],]); # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8], [asArgs[4], asArgs[5], asArgs[6],]); # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9], [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]); return False; def parse(self): """ Parses the given file. Returns number or errors. Raises exception on fatal trouble. """ self.debug('Parsing %s' % (self.sSrcFile,)); while self.iLine < len(self.asLines): sLine = self.asLines[self.iLine]; self.iLine += 1; # We only look for comments, so only lines with a slash might possibly # influence the parser state. if sLine.find('/') >= 0: #self.debug('line %d: slash' % (self.iLine,)); offLine = 0; while offLine < len(sLine): if self.iState == self.kiCode: offHit = sLine.find('/*', offLine); # only multiline comments for now. if offHit >= 0: self.checkCodeForMacro(sLine[offLine:offHit]); self.sComment = ''; self.iCommentLine = self.iLine; self.iState = self.kiCommentMulti; offLine = offHit + 2; else: self.checkCodeForMacro(sLine[offLine:]); offLine = len(sLine); elif self.iState == self.kiCommentMulti: offHit = sLine.find('*/', offLine); if offHit >= 0: self.sComment += sLine[offLine:offHit]; self.iState = self.kiCode; offLine = offHit + 2; self.parseComment(); else: self.sComment += sLine[offLine:]; offLine = len(sLine); else: assert False; # No slash, but append the line if in multi-line comment. elif self.iState == self.kiCommentMulti: #self.debug('line %d: multi' % (self.iLine,)); self.sComment += sLine; # No slash, but check code line for relevant macro. elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0: #self.debug('line %d: macro' % (self.iLine,)); self.checkCodeForMacro(sLine); # If the line is a '}' in the first position, complete the instructions. elif self.iState == self.kiCode and sLine[0] == '}': #self.debug('line %d: }' % (self.iLine,)); self.doneInstructions(); self.doneInstructions(); self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,)); self.debug('%s instruction stubs' % (self.cTotalStubs,)); return self.printErrors(); def __parseFileByName(sSrcFile, sDefaultMap): """ Parses one source file for instruction specfications. """ # # Read sSrcFile into a line array. # try: oFile = open(sSrcFile, "r"); except Exception as oXcpt: raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,)); try: asLines = oFile.readlines(); except Exception as oXcpt: raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,)); finally: oFile.close(); # # Do the parsing. # try: cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse(); except ParserException as oXcpt: print(str(oXcpt)); raise; except Exception as oXcpt: raise; return cErrors; def __doTestCopying(): """ Executes the asCopyTests instructions. """ asErrors = []; for oDstInstr in g_aoAllInstructions: if oDstInstr.asCopyTests: for sSrcInstr in oDstInstr.asCopyTests: oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None); if oSrcInstr: aoSrcInstrs = [oSrcInstr,]; else: aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []); if aoSrcInstrs: for oSrcInstr in aoSrcInstrs: if oSrcInstr != oDstInstr: oDstInstr.aoTests.extend(oSrcInstr.aoTests); else: asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n' % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr)); else: asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n' % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr)); if asErrors: sys.stderr.write(u''.join(asErrors)); return len(asErrors); def __applyOnlyTest(): """ If g_aoOnlyTestInstructions contains any instructions, drop aoTests from all other instructions so that only these get tested. """ if g_aoOnlyTestInstructions: for oInstr in g_aoAllInstructions: if oInstr.aoTests: if oInstr not in g_aoOnlyTestInstructions: oInstr.aoTests = []; return 0; def __parseAll(): """ Parses all the IEMAllInstruction*.cpp.h files. Raises exception on failure. """ sSrcDir = os.path.dirname(os.path.abspath(__file__)); cErrors = 0; for sDefaultMap, sName in [ ( 'one', 'IEMAllInstructionsOneByte.cpp.h'), ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'), ]: cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap); cErrors += __doTestCopying(); cErrors += __applyOnlyTest(); if cErrors != 0: #raise Exception('%d parse errors' % (cErrors,)); sys.exit(1); return True; __parseAll(); # # Generators (may perhaps move later). # def generateDisassemblerTables(oDstFile = sys.stdout): """ Generates disassembler tables. """ for sName, oMap in sorted(iter(g_dInstructionMaps.items()), key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)): assert oMap.sName == sName; asLines = []; asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */' % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), )); asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),)); asLines.append('{'); aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199]; aoTableOrder = oMap.getInstructionsInTableOrder(); for iInstr, oInstr in enumerate(aoTableOrder): if (iInstr & 0xf) == 0: if iInstr != 0: asLines.append(''); asLines.append(' /* %x */' % (iInstr >> 4,)); if oInstr is None: pass;#asLines.append(' /* %#04x */ None,' % (iInstr)); elif isinstance(oInstr, list): asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr)); else: sMacro = 'OP'; cMaxOperands = 3; if len(oInstr.aoOperands) > 3: sMacro = 'OPVEX' cMaxOperands = 4; assert len(oInstr.aoOperands) <= cMaxOperands; # # Format string. # sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,); for iOperand, oOperand in enumerate(oInstr.aoOperands): sTmp += ' ' if iOperand == 0 else ','; if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later. sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later. else: sTmp += g_kdOpTypes[oOperand.sType][2]; sTmp += '",'; asColumns = [ sTmp, ]; # # Decoders. # iStart = len(asColumns); if oInstr.sEncoding is None: pass; elif oInstr.sEncoding == 'ModR/M': # ASSUME the first operand is using the ModR/M encoding assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(); asColumns.append('IDX_ParseModRM,'); ## @todo IDX_ParseVexDest # Is second operand using ModR/M too? if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM(): asColumns.append('IDX_UseModRM,') elif oInstr.sEncoding in [ 'prefix', ]: for oOperand in oInstr.aoOperands: asColumns.append('0,'); elif oInstr.sEncoding in [ 'fixed' ]: pass; elif oInstr.sEncoding == 'vex2': asColumns.append('IDX_ParseVex2b,') elif oInstr.sEncoding == 'vex3': asColumns.append('IDX_ParseVex3b,') elif oInstr.sEncoding in g_dInstructionMaps: asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ','); else: ## @todo #IDX_ParseTwoByteEsc, #IDX_ParseGrp1, #IDX_ParseShiftGrp2, #IDX_ParseGrp3, #IDX_ParseGrp4, #IDX_ParseGrp5, #IDX_Parse3DNow, #IDX_ParseGrp6, #IDX_ParseGrp7, #IDX_ParseGrp8, #IDX_ParseGrp9, #IDX_ParseGrp10, #IDX_ParseGrp12, #IDX_ParseGrp13, #IDX_ParseGrp14, #IDX_ParseGrp15, #IDX_ParseGrp16, #IDX_ParseThreeByteEsc4, #IDX_ParseThreeByteEsc5, #IDX_ParseModFence, #IDX_ParseEscFP, #IDX_ParseNopPause, #IDX_ParseInvOpModRM, assert False, str(oInstr); # Check for immediates and stuff in the remaining operands. for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]: sIdx = g_kdOpTypes[oOperand.sType][0]; if sIdx != 'IDX_UseModRM': asColumns.append(sIdx + ','); asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart))); # # Opcode and operands. # assert oInstr.sDisEnum, str(oInstr); asColumns.append(oInstr.sDisEnum + ','); iStart = len(asColumns) for oOperand in oInstr.aoOperands: asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ','); asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart))); # # Flags. # sTmp = ''; for sHint in sorted(oInstr.dHints.keys()): sDefine = g_kdHints[sHint]; if sDefine.startswith('DISOPTYPE_'): if sTmp: sTmp += ' | ' + sDefine; else: sTmp += sDefine; if sTmp: sTmp += '),'; else: sTmp += '0),'; asColumns.append(sTmp); # # Format the columns into a line. # sLine = ''; for i, s in enumerate(asColumns): if len(sLine) < aoffColumns[i]: sLine += ' ' * (aoffColumns[i] - len(sLine)); else: sLine += ' '; sLine += s; # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE, # DISOPTYPE_HARMLESS), # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \ # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype } asLines.append(sLine); asLines.append('};'); asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),)); # # Write out the lines. # oDstFile.write('\n'.join(asLines)); oDstFile.write('\n'); break; #for now if __name__ == '__main__': generateDisassemblerTables();