[Pkg-virtualbox-commits] [virtualbox] 01/03: Imported Upstream version 5.1.22-dfsg

Gianfranco Costamagna locutusofborg at moszumanska.debian.org
Sat Apr 29 09:15:29 UTC 2017


This is an automated email from the git hooks/post-receive script.

locutusofborg pushed a commit to branch master
in repository virtualbox.

commit 2089bd5d47ffca3ae52d394222d50af7b43e3f74
Author: Gianfranco Costamagna <costamagnagianfranco at yahoo.it>
Date:   Sat Apr 29 11:03:33 2017 +0200

    Imported Upstream version 5.1.22-dfsg
---
 Config.kmk                                         |    36 +-
 Doxyfile.Core                                      |     9 +-
 doc/manual/en_US/user_Installation.xml             |     2 +-
 doc/manual/en_US/user_Introduction.xml             |     4 +
 doc/manual/user_ChangeLogImpl.xml                  |    55 +-
 include/VBox/cdefs.h                               |     6 +-
 include/VBox/disopcode.h                           |    48 +-
 include/VBox/err.h                                 |     7 +
 include/VBox/err.mac                               |     8 +
 include/VBox/vmm/cpum.h                            |   174 +-
 include/VBox/vmm/dbgf.h                            |    45 +-
 include/VBox/vmm/em.h                              |     1 +
 include/VBox/vmm/hm_svm.h                          |   406 +-
 include/VBox/vmm/iem.h                             |    47 +-
 include/VBox/vmm/mm.h                              |     2 +
 include/iprt/asm-amd64-x86.h                       |    14 +
 include/iprt/cdefs.h                               |   133 +-
 include/iprt/formats/omf.h                         |    28 +
 include/iprt/mangling.h                            |     4 +
 include/iprt/x86.h                                 |   110 +-
 include/iprt/x86.mac                               |    64 +-
 src/VBox/Additions/linux/installer/vboxadd.sh      |     4 +-
 src/VBox/Debugger/DBGCEmulateCodeView.cpp          |     2 +-
 src/VBox/Debugger/VBoxDbgStatsQt4.cpp              |     1 +
 src/VBox/Devices/EFI/FirmwareBin/VBoxEFI32.fd      |   Bin 2097152 -> 2097152 bytes
 src/VBox/Devices/EFI/FirmwareBin/VBoxEFI64.fd      |   Bin 2097152 -> 2097152 bytes
 .../Graphics/BIOS/VBoxVgaBiosAlternative286.asm    |     6 +-
 .../Graphics/BIOS/VBoxVgaBiosAlternative286.md5sum |     2 +-
 .../Graphics/BIOS/VBoxVgaBiosAlternative386.asm    |     6 +-
 .../Graphics/BIOS/VBoxVgaBiosAlternative386.md5sum |     2 +-
 .../Graphics/BIOS/VBoxVgaBiosAlternative8086.asm   |     6 +-
 .../BIOS/VBoxVgaBiosAlternative8086.md5sum         |     2 +-
 src/VBox/Devices/Graphics/DevVGA.cpp               |    16 +-
 src/VBox/Devices/Input/UsbKbd.cpp                  |     1 +
 src/VBox/Devices/Input/UsbMouse.cpp                |     1 +
 src/VBox/Devices/Network/DrvIntNet.cpp             |     2 +
 src/VBox/Devices/Network/DrvUDPTunnel.cpp          |     1 +
 .../Devices/PC/BIOS/VBoxBiosAlternative286.asm     |     4 +-
 .../Devices/PC/BIOS/VBoxBiosAlternative286.md5sum  |     2 +-
 .../Devices/PC/BIOS/VBoxBiosAlternative386.asm     |     4 +-
 .../Devices/PC/BIOS/VBoxBiosAlternative386.md5sum  |     2 +-
 .../Devices/PC/BIOS/VBoxBiosAlternative8086.asm    |     4 +-
 .../Devices/PC/BIOS/VBoxBiosAlternative8086.md5sum |     2 +-
 src/VBox/Devices/Storage/DevBusLogic.cpp           |     2 +
 src/VBox/Devices/Storage/DevFdc.cpp                |     1 +
 src/VBox/Devices/Storage/DrvVD.cpp                 |     1 +
 src/VBox/Devices/Storage/UsbMsd.cpp                |     1 +
 src/VBox/Devices/USB/DevOHCI.cpp                   |     1 +
 src/VBox/Devices/USB/VUSBUrb.cpp                   |     1 +
 src/VBox/Disassembler/DisasmTables.cpp             |   426 +-
 src/VBox/Frontends/VirtualBox/nls/VirtualBox_eu.ts |    24 +-
 src/VBox/Frontends/VirtualBox/nls/VirtualBox_sl.ts |    26 +-
 .../VirtualBox/src/globals/VBoxGlobal.cpp          |     6 +-
 .../src/settings/machine/UIMachineSettingsUSB.cpp  |     5 +
 .../src/settings/machine/UIMachineSettingsUSB.h    |     1 +
 .../UIWizardImportAppPageBasic2.cpp                |     1 +
 src/VBox/GuestHost/OpenGL/packer/pack_program.c    |    44 +-
 src/VBox/GuestHost/OpenGL/packer/packer.py         |     2 +-
 src/VBox/GuestHost/OpenGL/packer/packer_bbox.py    |     2 +-
 src/VBox/HostDrivers/Support/Makefile.kmk          |     7 +-
 src/VBox/HostDrivers/Support/SUPDrv.cpp            |     3 +-
 src/VBox/HostDrivers/Support/SUPLibInternal.h      |     2 +-
 .../HostDrivers/Support/SUPR3HardenedVerify.cpp    |    44 +-
 .../Support/posix/SUPR3HardenedMain-posix.cpp      |   660 +
 .../Support/posix/SUPR3HardenedMainA-posix.asm     |   160 +
 src/VBox/HostDrivers/Support/win/SUPLib-win.cpp    |     4 +-
 .../VBoxNetFlt/win/drv/VBoxNetLwf-win.cpp          |     2 +
 .../HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf.inf  |     2 +-
 src/VBox/HostServices/SharedFolders/vbsf.cpp       |    27 +-
 src/VBox/Main/src-client/ConsoleImpl2.cpp          |     1 +
 src/VBox/Main/src-client/ConsoleImplTeleporter.cpp |     1 +
 src/VBox/Main/src-server/HostImpl.cpp              |    11 +-
 src/VBox/Main/src-server/HostUSBDeviceImpl.cpp     |     2 +
 src/VBox/Main/src-server/win/NetIf-win.cpp         |     2 +-
 src/VBox/Runtime/Makefile.kmk                      |    10 +
 src/VBox/Runtime/common/asm/ASMCpuId.asm           |   111 +
 src/VBox/Runtime/common/asm/ASMCpuIdExSlow.asm     |    58 +-
 .../common/asm/ASMFxRstor.asm}                     |    48 +-
 .../common/asm/ASMFxSave.asm}                      |    48 +-
 src/VBox/Storage/VD.cpp                            |    20 +-
 src/VBox/Storage/testcase/VDScriptInterp.cpp       |     2 +
 src/VBox/Storage/testcase/tstVDCompact.vd          |     2 +
 src/VBox/Storage/testcase/tstVDCopy.vd             |     4 +
 src/VBox/Storage/testcase/tstVDIo.cpp              |    38 +
 src/VBox/Storage/testcase/tstVDResize.vd           |     1 +
 src/VBox/VMM/Makefile.kmk                          |   123 +-
 src/VBox/VMM/VMMAll/APICAll.cpp                    |     2 +-
 src/VBox/VMM/VMMAll/CPUMAllRegs.cpp                |    41 +-
 src/VBox/VMM/VMMAll/EMAll.cpp                      |    12 +
 src/VBox/VMM/VMMAll/GIMAll.cpp                     |     3 +
 src/VBox/VMM/VMMAll/IEMAll.cpp                     |   770 +-
 src/VBox/VMM/VMMAll/IEMAllAImpl.asm                |    72 +-
 src/VBox/VMM/VMMAll/IEMAllAImplC.cpp               |    36 +
 src/VBox/VMM/VMMAll/IEMAllCImpl.cpp.h              |  1544 +-
 src/VBox/VMM/VMMAll/IEMAllCImplStrInstr.cpp.h      |    68 +
 src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h       | 17678 +------------------
 src/VBox/VMM/VMMAll/IEMAllInstructions3DNow.cpp.h  |   133 +
 ...tions.cpp.h => IEMAllInstructionsOneByte.cpp.h} | 10241 ++---------
 src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py    |  3410 ++++
 .../VMM/VMMAll/IEMAllInstructionsThree0f38.cpp.h   |   779 +
 .../VMM/VMMAll/IEMAllInstructionsThree0f3a.cpp.h   |   502 +
 .../VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h   |  8538 +++++++++
 .../VMM/VMMAll/IEMAllInstructionsVexMap1.cpp.h     |  3065 ++++
 .../VMM/VMMAll/IEMAllInstructionsVexMap2.cpp.h     |   855 +
 .../VMM/VMMAll/IEMAllInstructionsVexMap3.cpp.h     |   557 +
 src/VBox/VMM/VMMAll/IOMAllMMIO.cpp                 |     2 +
 src/VBox/VMM/VMMAll/MMAll.cpp                      |     2 +
 src/VBox/VMM/VMMAll/PGMAllBth.h                    |     3 +-
 src/VBox/VMM/VMMAll/PGMAllPhys.cpp                 |     3 +
 src/VBox/VMM/VMMAll/PGMAllPool.cpp                 |     2 +-
 src/VBox/VMM/VMMAll/TMAll.cpp                      |     1 +
 src/VBox/VMM/VMMAll/TMAllCpu.cpp                   |     1 +
 src/VBox/VMM/VMMAll/TRPMAll.cpp                    |     4 +-
 src/VBox/VMM/VMMR0/HMSVMR0.cpp                     |    21 +-
 src/VBox/VMM/VMMR0/HMVMXR0.cpp                     |     2 +-
 src/VBox/VMM/VMMR3/CFGM.cpp                        |     2 +-
 src/VBox/VMM/VMMR3/CPUM.cpp                        |     6 +-
 src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp                 |    88 +-
 src/VBox/VMM/VMMR3/CPUMR3Db.cpp                    |    15 +-
 src/VBox/VMM/VMMR3/CSAM.cpp                        |     5 +-
 src/VBox/VMM/VMMR3/DBGF.cpp                        |   353 +-
 src/VBox/VMM/VMMR3/DBGFCpu.cpp                     |    38 +
 src/VBox/VMM/VMMR3/DBGFDisas.cpp                   |     1 +
 src/VBox/VMM/VMMR3/EM.cpp                          |     3 +
 src/VBox/VMM/VMMR3/EMRaw.cpp                       |     1 +
 src/VBox/VMM/VMMR3/HM.cpp                          |    24 +-
 src/VBox/VMM/VMMR3/IEMR3.cpp                       |    29 +
 src/VBox/VMM/VMMR3/PATM.cpp                        |    10 +-
 src/VBox/VMM/VMMR3/PATMA.asm                       |    34 +-
 src/VBox/VMM/VMMR3/PATMA.mac                       |     8 +-
 .../VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp     |    20 +-
 src/VBox/VMM/VMMR3/PDMDriver.cpp                   |    26 +-
 src/VBox/VMM/VMMR3/PDMNetShaper.cpp                |     2 +
 src/VBox/VMM/VMMR3/PGM.cpp                         |     6 +
 src/VBox/VMM/VMMR3/PGMPhys.cpp                     |     5 +-
 src/VBox/VMM/VMMR3/PGMPool.cpp                     |     2 +-
 src/VBox/VMM/VMMR3/PGMSavedState.cpp               |     1 +
 src/VBox/VMM/VMMR3/SSM.cpp                         |     4 +
 src/VBox/VMM/VMMR3/STAM.cpp                        |     7 +-
 src/VBox/VMM/VMMR3/TM.cpp                          |     3 +
 src/VBox/VMM/VMMR3/VM.cpp                          |    66 +-
 src/VBox/VMM/VMMR3/VMEmt.cpp                       |    22 +-
 src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp           |     2 +-
 src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h       |     1 +
 .../VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h   |     1 +
 src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h   |     1 +
 src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h   |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_80186.h              |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_80286.h              |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_80386.h              |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_8086.h               |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h   |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h       |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h     |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h      |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h      |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h      |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h  |     1 +
 .../VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h |     1 +
 .../VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h   |     1 +
 src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h |     1 +
 .../VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h    |     1 +
 .../VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h    |     1 +
 src/VBox/VMM/VMMRC/CPUMRCPatchHlp.asm              |    24 +-
 src/VBox/VMM/VMMRC/TRPMRCHandlers.cpp              |     1 +
 src/VBox/VMM/include/CPUMInternal.h                |    12 +-
 src/VBox/VMM/include/CPUMInternal.mac              |     2 +-
 src/VBox/VMM/include/DBGFInternal.h                |    28 +
 src/VBox/VMM/include/EMHandleRCTmpl.h              |     7 +
 src/VBox/VMM/include/IEMInternal.h                 |   234 +-
 .../VMM/include/PDMAsyncCompletionFileInternal.h   |     2 +
 src/VBox/VMM/include/PGMInline.h                   |    18 +-
 src/VBox/VMM/include/PGMInternal.h                 |     9 +-
 .../testcase/Instructions/InstructionTestGen.py    |     6 +-
 src/VBox/VMM/testcase/tstIEMCheckMc.cpp            |    46 +-
 src/VBox/VMM/testcase/tstVMStructDTrace.cpp        |     1 +
 src/VBox/VMM/testcase/tstVMStructRC.cpp            |     1 +
 src/VBox/VMM/testcase/tstVMStructSize.cpp          |     1 +
 src/VBox/VMM/tools/VBoxCpuReport.cpp               |    35 +-
 src/VBox/ValidationKit/bootsectors/Config.kmk      |     9 +-
 src/VBox/ValidationKit/bootsectors/Makefile.kmk    |    40 +-
 .../bootsector2-common-routines-template-1.mac     |     2 +-
 .../bootsector2-cpu-instr-1-template.mac           |     4 +-
 .../{bs3-cpu-instr-2.c => bs3-cpu-basic-2-32.c32}  |    26 +-
 .../bootsectors/bs3-cpu-basic-2-asm.asm            |    51 +
 .../bootsectors/bs3-cpu-basic-2-pf.c32             |  1880 ++
 .../bootsectors/bs3-cpu-basic-2-template.mac       |    82 +
 .../ValidationKit/bootsectors/bs3-cpu-basic-2.c    |    20 +-
 .../bootsectors/bs3-cpu-decoding-1-asm.asm         |     1 +
 .../bootsectors/bs3-cpu-decoding-1-template.mac    |    75 +-
 .../bootsectors/bs3-cpu-decoding-1.c32             |  1657 +-
 ...oding-1-asm.asm => bs3-cpu-generated-1-asm.asm} |    12 +-
 .../bootsectors/bs3-cpu-generated-1-data.py        |   611 +
 .../bootsectors/bs3-cpu-generated-1-template.c     |  3829 ++++
 .../{bs3-cpu-instr-2.c => bs3-cpu-generated-1.c}   |    21 +-
 .../bootsectors/bs3-cpu-generated-1.h              |   657 +
 .../bootsectors/bs3-cpu-instr-2-template.c         |   135 +
 .../bootsectors/bs3-cpu-instr-2-template.mac       |    81 +
 .../ValidationKit/bootsectors/bs3-cpu-instr-2.c    |     2 +
 .../ValidationKit/bootsectors/bs3kit/Makefile.kmk  |    44 +
 .../bootsectors/bs3kit/VBoxBs3ObjConverter.cpp     |   735 +-
 .../bs3kit/bs3-c16-TrapRmV86Generic.asm            |    17 +-
 ...mn-SelFar32ToFlat32.c => bs3-cmn-ExtCtxAlloc.c} |    23 +-
 ...cmn-SelFar32ToFlat32.c => bs3-cmn-ExtCtxCopy.c} |    22 +-
 ...cmn-SelFar32ToFlat32.c => bs3-cmn-ExtCtxFree.c} |    25 +-
 ...3-cmn-RegCtxPrint.c => bs3-cmn-ExtCtxGetSize.c} |    48 +-
 ...{bs3-cmn-RegCtxPrint.c => bs3-cmn-ExtCtxInit.c} |    51 +-
 .../bootsectors/bs3kit/bs3-cmn-ExtCtxRestore.asm   |   124 +
 .../bootsectors/bs3kit/bs3-cmn-ExtCtxSave.asm      |   130 +
 ...n-SelFar32ToFlat32.c => bs3-cmn-GetCpuVendor.c} |    28 +-
 .../bs3kit/bs3-cmn-GetModeNameShortLower.c         |    62 +
 .../bootsectors/bs3kit/bs3-cmn-MemPrintInfo.c      |    85 +
 .../bootsectors/bs3kit/bs3-cmn-PagingAlias.c       |     6 +-
 .../bootsectors/bs3kit/bs3-cmn-PagingProtect.c     |    37 +-
 .../bs3kit/bs3-cmn-PagingQueryAddressInfo.c        |   149 +
 .../bs3kit/bs3-cmn-PagingSetupCanonicalTraps.c     |     4 +-
 .../bootsectors/bs3kit/bs3-cmn-PitIrqHandler.c     |     2 +-
 .../bs3kit/bs3-cmn-RegCtxConvertToRingX.c          |     2 +-
 .../bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c       |    32 +-
 .../bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm   |     2 +
 .../bootsectors/bs3kit/bs3-cmn-RegCtxSaveEx.asm    |   137 +-
 .../bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c  |     2 +-
 .../bs3kit/bs3-cmn-SelFlatDataToProtFar16.asm      |    10 +-
 .../bs3kit/bs3-cmn-SelProtModeCodeToRealMode.asm   |     1 -
 ...n-RegCtxPrint.c => bs3-cmn-SelSetup16BitCode.c} |    40 +-
 ...n-RegCtxPrint.c => bs3-cmn-SelSetup16BitData.c} |    40 +-
 .../bootsectors/bs3kit/bs3-cmn-StrFormatV.c        |    12 +-
 .../bootsectors/bs3kit/bs3-cmn-SwitchTo16Bit.asm   |     2 +-
 .../bootsectors/bs3kit/bs3-cmn-SwitchTo32Bit.asm   |     3 +-
 .../bs3kit/bs3-cmn-TestDoModesByOneHlp.asm         |    16 +-
 .../bootsectors/bs3kit/bs3-cmn-TestFailed.c        |    12 +-
 .../bootsectors/bs3kit/bs3-cmn-TestPrintf.c        |     2 +-
 .../bootsectors/bs3kit/bs3-cmn-TestSub.c           |     3 +
 .../bs3kit/bs3-cmn-TrapDefaultHandler.c            |     1 +
 .../bootsectors/bs3kit/bs3-cmn-paging.h            |     6 +-
 .../bootsectors/bs3kit/bs3-cmn-test.h              |     2 +-
 .../bootsectors/bs3kit/bs3-first-common.mac        |     8 +-
 .../bootsectors/bs3kit/bs3-mode-CpuDetect.asm      |    11 +-
 .../bs3-mode-NameShortLower.asm}                   |    20 +-
 .../bs3kit/bs3-mode-SwitchTo32BitAndCallC.asm      |   154 +
 .../bootsectors/bs3kit/bs3-mode-SwitchToLM32.asm   |     2 +-
 .../bootsectors/bs3kit/bs3-mode-SwitchToPAE16.asm  |    15 +-
 .../bs3kit/bs3-mode-SwitchToPAE16_32.asm           |     2 +-
 .../bs3kit/bs3-mode-SwitchToPAE16_V86.asm          |     2 +-
 .../bootsectors/bs3kit/bs3-mode-SwitchToPAE32.asm  |     8 +-
 .../bs3kit/bs3-mode-SwitchToPAE32_16.asm           |     6 +-
 .../bootsectors/bs3kit/bs3-mode-SwitchToPE16.asm   |     4 +-
 .../bs3kit/bs3-mode-SwitchToPE16_32.asm            |     2 +-
 .../bs3kit/bs3-mode-SwitchToPE16_V86.asm           |     2 +-
 .../bootsectors/bs3kit/bs3-mode-SwitchToPE32.asm   |     9 +-
 .../bs3kit/bs3-mode-SwitchToPE32_16.asm            |     6 +-
 .../bootsectors/bs3kit/bs3-mode-SwitchToPP16.asm   |    14 +-
 .../bs3kit/bs3-mode-SwitchToPP16_32.asm            |     2 +-
 .../bs3kit/bs3-mode-SwitchToPP16_V86.asm           |     2 +-
 .../bootsectors/bs3kit/bs3-mode-SwitchToPP32.asm   |     8 +-
 .../bs3kit/bs3-mode-SwitchToPP32_16.asm            |     4 +-
 .../bootsectors/bs3kit/bs3-mode-SwitchToRM.asm     |     2 +-
 .../bootsectors/bs3kit/bs3-mode-TestDoModes.c      |    74 +-
 .../bootsectors/bs3kit/bs3-mode-TestDoModes.h      |    86 +
 ...e-TestDoModes.c => bs3-mode-TestDoModesByMax.c} |   194 +-
 ...eStub.asm => bs3-mode-TestDoModesByMaxStub.asm} |    24 +-
 .../bootsectors/bs3kit/bs3-mode-TestDoModesByOne.c |   101 +-
 .../bs3kit/bs3-mode-TestDoModesByOneStub.asm       |     6 +-
 .../bootsectors/bs3kit/bs3-mode-TestDoModesHlp.asm |   154 +-
 ...sByOneStub.asm => bs3-mode-TestDoModesStub.asm} |    23 +-
 .../bootsectors/bs3kit/bs3-rm-InitMemory.c         |     2 +-
 .../bootsectors/bs3kit/bs3kit-autostubs.kmk        |    23 +-
 .../bs3kit/bs3kit-mangling-code-define.h           |    18 +-
 .../bs3kit/bs3kit-mangling-code-undef.h            |    18 +-
 .../bootsectors/bs3kit/bs3kit-template-footer.mac  |     2 +
 .../bootsectors/bs3kit/bs3kit-template-header.mac  |    25 +
 src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.h |   558 +-
 .../ValidationKit/bootsectors/bs3kit/bs3kit.mac    |    46 +
 src/VBox/ValidationKit/common/utils.py             |     9 +-
 src/VBox/ValidationKit/common/webutils.py          |     6 +-
 src/VBox/ValidationKit/utils/cpu/cidet-core.cpp    |    36 +-
 276 files changed, 37732 insertions(+), 27855 deletions(-)

diff --git a/Config.kmk b/Config.kmk
index 3fcf0de..3ff6865 100644
--- a/Config.kmk
+++ b/Config.kmk
@@ -208,7 +208,7 @@ VBOX_VERSION_MINOR = 1
 # This is the current build number. It should be increased every time we publish a
 # new build. The define is available in every source file. Only even build numbers
 # will be published, odd numbers are set during development.
-VBOX_VERSION_BUILD = 20
+VBOX_VERSION_BUILD = 22
 # The raw version string. This *must not* contain any other information/fields than
 # major, minor and build revision (as it is now) -- also will be used for host/guest version
 # comparison.
@@ -2781,9 +2781,11 @@ VBOX_GCC_NON_PEDANTIC_C   ?= -Wno-sign-compare -Werror-implicit-function-declara
 # Want to enable these:
 #      -wd4242: '=' : conversion from 'uint32_t' to 'uint8_t', possible loss of data
 #      -wd4244: 'conversion' conversion from 'type1' to 'type2', possible loss of data
+#      -wd4200: nonstandard extension used : zero-sized array in struct/union
+#               Cannot generate copy-ctor or copy-assignment operator when UDT contains a zero-sized array
 VBOX_VCC_WARN_ALL ?= -W4 -Wall -wd4065 -wd4996 -wd4127 -wd4706 -wd4201 -wd4214 -wd4510 -wd4512 -wd4610 \
 	-wd4514 -wd4820 -wd4365 -wd4987 -wd4710 -wd4061 -wd4986 -wd4191 -wd4574 -wd4917 -wd4711 -wd4611 -wd4571 -wd4324 -wd4505 \
-	-wd4263 -wd4264 -wd4738
+	-wd4263 -wd4264 -wd4738 -wd4200
 #ifndef VBOX_WITH_MASOCHISTIC_WARNINGS
 # VBOX_VCC_WARN_ALL +=
 #endif
@@ -3097,15 +3099,14 @@ ifdef VBOX_SIGNING_MODE
    VBOX_CERTIFICATE_SUBJECT_NAME_ARGS ?= /n "$(VBOX_CERTIFICATE_SUBJECT_NAME)" /a
    #VBOX_CERTIFICATE_FINGERPRINT ?= 7e 92 b6 6b e5 1b 79 d8 ce 3f f2 5c 15 c2 df 6a b8 c7 f2 f2
    VBOX_CERTIFICATE_FINGERPRINT ?= 5b de fe 58 0a 81 66 61 cd b5 7a 57 10 7b f4 18 74 86 ef cc
-   # TEMPORARY HACK!
-   if defined(VBOX_ONLY_VALIDATIONKIT)
-    VBOX_CROSS_CERTIFICATE_FILE ?= $(VBOX_PATH_SELFSIGN)/MSCV-VSClass3.cer
-   else
-    VBOX_CROSS_CERTIFICATE_FILE ?= $(VBOX_PATH_SELFSIGN)/VeriSign Class 3 Public Primary Certification Authority - G5.cer
-   endif
-   VBOX_CROSS_CERTIFICATE_FILE_ARGS ?= /ac "$(VBOX_CROSS_CERTIFICATE_FILE)"
+   VBOX_CROSS_CERTIFICATE_FILE ?= $(VBOX_PATH_SELFSIGN)/VeriSign Class 3 Public Primary Certification Authority - G5.cer
    VBOX_TSA_URL ?= http://timestamp.verisign.com/scripts/timestamp.dll
-   VBOX_TSA_URL_ARGS ?= /t "$(VBOX_TSA_URL)"
+  endif
+  if !defined(VBOX_CROSS_CERTIFICATE_FILE_ARGS) && defined(VBOX_CROSS_CERTIFICATE_FILE)
+   VBOX_CROSS_CERTIFICATE_FILE_ARGS = /ac "$(VBOX_CROSS_CERTIFICATE_FILE)"
+  endif
+  if !defined(VBOX_TSA_URL_ARGS) && defined(VBOX_TSA_URL)
+   VBOX_TSA_URL_ARGS = /t "$(VBOX_TSA_URL)"
   endif
   VBOX_CERTIFICATE_STORE ?= my
   VBOX_CERTIFICATE_STORE_ARGS ?= /s "$(VBOX_CERTIFICATE_STORE)"
@@ -3116,16 +3117,18 @@ ifdef VBOX_SIGNING_MODE
    #VBOX_CERTIFICATE_SHA2_SUBJECT_NAME ?= MyTestCertificateSha2
    VBOX_CERTIFICATE_SHA2_SUBJECT_NAME_ARGS ?= /n "$(VBOX_CERTIFICATE_SHA2_SUBJECT_NAME)"
   else # release
-   if !defined(VBOX_ONLY_VALIDATIONKIT) # TEMPORARY HACK for the validation kitty! The build box is too old.
-    VBOX_CERTIFICATE_SHA2_SUBJECT_NAME ?= Oracle Corporation
-   endif
+   VBOX_CERTIFICATE_SHA2_SUBJECT_NAME ?= Oracle Corporation
    VBOX_CERTIFICATE_SHA2_SUBJECT_NAME_ARGS ?= /n "$(VBOX_CERTIFICATE_SHA2_SUBJECT_NAME)" /a
    #VBOX_CERTIFICATE_SHA2_FINGERPRINT ?= 31 31 bb 58 8b 19 9e 6e 85 0f d3 35 82 b0 c5 82 55 e1 6c 49
    VBOX_CERTIFICATE_SHA2_FINGERPRINT ?= 22 05 6a 4d 46 2e 3d 2b b2 c3 2f bf b0 5b 84 c4 65 9c f7 fe
    VBOX_CROSS_CERTIFICATE_SHA2_FILE ?= $(VBOX_PATH_SELFSIGN)/VeriSign Class 3 Public Primary Certification Authority - G5.cer
-   VBOX_CROSS_CERTIFICATE_SHA2_FILE_ARGS ?= /ac "$(VBOX_CROSS_CERTIFICATE_SHA2_FILE)"
    VBOX_TSA_SHA2_URL ?= http://sha256timestamp.ws.symantec.com/sha256/timestamp
-   VBOX_TSA_SHA2_URL_ARGS ?= /tr "$(VBOX_TSA_SHA2_URL)"
+  endif
+  if !defined(VBOX_CROSS_CERTIFICATE_SHA2_FILE_ARGS) && defined(VBOX_CROSS_CERTIFICATE_SHA2_FILE)
+   VBOX_CROSS_CERTIFICATE_SHA2_FILE_ARGS = /ac "$(VBOX_CROSS_CERTIFICATE_SHA2_FILE)"
+  endif
+  if !defined(VBOX_TSA_SHA2_URL_ARGS) && defined(VBOX_TSA_SHA2_URL)
+   VBOX_TSA_SHA2_URL_ARGS = /tr "$(VBOX_TSA_SHA2_URL)"
   endif
   VBOX_CERTIFICATE_SHA2_STORE ?= my
   VBOX_CERTIFICATE_SHA2_STORE_ARGS ?= /s "$(VBOX_CERTIFICATE_SHA2_STORE)"
@@ -6854,7 +6857,7 @@ endif
 SVN                    ?= svn$(HOSTSUFF_EXE)
 VBOX_SVN_REV_KMK        = $(PATH_OUT)/revision.kmk
 ifndef VBOX_SVN_REV
- VBOX_SVN_REV_FALLBACK := $(patsubst %:,,  $Rev: 114628 $  )
+ VBOX_SVN_REV_FALLBACK := $(patsubst %:,,  $Rev: 115126 $  )
  VBOX_SVN_DEP          := $(firstword $(wildcard $(PATH_ROOT)/.svn/wc.db $(abspath $(PATH_ROOT)/../.svn/wc.db) $(abspath $(PATH_ROOT)/../../.svn/wc.db) $(PATH_ROOT)/.svn/entries))
  ifeq ($(which $(SVN)),)
   VBOX_SVN_DEP         :=
@@ -7041,4 +7044,3 @@ st stat status:
 quick:
 	$(MAKE) VBOX_QUICK=1
 
-
diff --git a/Doxyfile.Core b/Doxyfile.Core
index 3039af2..928a538 100644
--- a/Doxyfile.Core
+++ b/Doxyfile.Core
@@ -559,7 +559,12 @@ PREDEFINED += \
     VMMRC_INT_DECL(type)=type \
     VMMRZ_INT_DECL(type)=type \
     GMMR0DECL(type)=type \
-    GMMR3DECL(type)=type 
+    GMMR3DECL(type)=type \
+    VBOX_DND_FN_DECL_LOG(x)=x \
+    DECLSPEC_HIDDEN= \
+    VMSVGA3DCOCOA_DECL(type)=type \
+    VBOX_LISTENER_DECLARE(a)= \
+    VBOX_WITH_RAW_MODE_NOT_R0
 
 # BS3Kit
 PREDEFINED += \
@@ -570,6 +575,7 @@ PREDEFINED += \
     BS3_DECL_NEAR(a_Type)=a_Type \
     BS3_DECL_FAR(a_Type)=a_Type \
     BS3_DECL_CALLBACK(a_Type)=a_Type \
+    BS3_DECL_NEAR_CALLBACK(a_Type)=a_Type \
     BS3_CMN_NM(a_Name)=a_Name \
     BS3_CMN_FAR_NM(a_Name)=a_Name \
     BS3_CMN_FN_NM(a_Name)=a_Name \
@@ -578,6 +584,7 @@ PREDEFINED += \
     TMPL_FAR_NM(a_Name)=a_Name##_mmm \
     "BS3_CMN_PROTO_STUB(a_RetType,a_Name,a_Params)=a_RetType a_Name a_Params" \
     "BS3_CMN_PROTO_NOSB(a_RetType,a_Name,a_Params)=a_RetType a_Name a_Params" \
+    "BS3_CMN_PROTO_FARSTUB(a_cbParam16, a_RetType, a_Name, a_Params)=a_RetType a_Name a_Params" \
     "BS3_CMN_DEF(a_RetType,a_Name,a_Params)=a_RetType a_Name a_Params" \
     "BS3_MODE_PROTO_STUB(a_RetType,a_Name,a_Params)=a_RetType a_Name##_mmm a_Params" \
     "BS3_MODE_PROTO_NOSB(a_RetType,a_Name,a_Params)=a_RetType a_Name##_mmm a_Params" \
diff --git a/doc/manual/en_US/user_Installation.xml b/doc/manual/en_US/user_Installation.xml
index bb708ef..a2784d9 100644
--- a/doc/manual/en_US/user_Installation.xml
+++ b/doc/manual/en_US/user_Installation.xml
@@ -331,7 +331,7 @@
 
       <itemizedlist>
         <listitem>
-          <para>Qt 4.8.0 or higher;</para>
+          <para>Qt 5.3.2 or higher (Qt 5.6.2 or higher recommended);</para>
         </listitem>
 
         <listitem>
diff --git a/doc/manual/en_US/user_Introduction.xml b/doc/manual/en_US/user_Introduction.xml
index 810fd60..88e5f83 100644
--- a/doc/manual/en_US/user_Introduction.xml
+++ b/doc/manual/en_US/user_Introduction.xml
@@ -465,6 +465,10 @@
             <para>10.11 (El Capitan)</para>
           </listitem>
 
+          <listitem>
+            <para>10.12 (Sierra)</para>
+          </listitem>
+
         </itemizedlist>
 
         <para>Intel hardware is required; please see <xref
diff --git a/doc/manual/user_ChangeLogImpl.xml b/doc/manual/user_ChangeLogImpl.xml
index 10628bd..d56b941 100644
--- a/doc/manual/user_ChangeLogImpl.xml
+++ b/doc/manual/user_ChangeLogImpl.xml
@@ -3,6 +3,50 @@
 <chapter> <!-- HACK ALERT! Seems we must have a single top level element for xi:include to work.
                            So, we use chapter and xpointer="xpointer(/chapter/)" with xi:include. -->
   <sect1>
+    <title>Version 5.1.22 (2017-04-28)</title>
+
+    <para>This is a maintenance release. The following items were fixed and/or
+      added:</para>
+
+    <itemizedlist>
+
+      <listitem>
+        <para>VMM: fixed <emphasis>VERR_IEM_INSTR_NOT_IMPLEMENTED</emphasis>
+          Guru Meditation under certain conditions (5.1 regression;
+          mostly Mac OS X hosts; bugs #15693, #15801, #16274, #16569, #16663)</para>
+      </listitem>
+
+      <listitem>
+        <para>VMM: fixed software virtualization on Solaris hosts
+          (5.1.20 regression)</para>
+      </listitem>
+
+      <listitem>
+        <para>Storage: fixed a potential hang under rare circumstances
+          (bug #16677)</para>
+      </listitem>
+
+      <listitem>
+        <para>Storage: fixed a potential crash under rare circumstances
+          (asynchronous I/O disabled or during maintenance file operations
+          like merging snapshots)</para>
+      </listitem>
+
+      <listitem>
+        <para>Linux hosts: make the ALSA backend work again as well as
+          loading the GL libraries on certain hosts (5.1.20 regression;
+          bugs #16667, #16693)</para>
+      </listitem>
+
+      <listitem>
+        <para>Linux Additions: fixed mount.vboxsf symlink problem (5.1.20
+          regression; bug #16670)</para>
+      </listitem>
+
+    </itemizedlist>
+  </sect1>
+
+  <sect1>
     <title>Version 5.1.20 (2017-04-18)</title>
 
     <para>This is a maintenance release. The following items were fixed and/or
@@ -54,7 +98,7 @@
   </sect1>
 
   <sect1>
-    <title>Version 5.1.18 (2017-08-15)</title>
+    <title>Version 5.1.18 (2017-03-15)</title>
 
     <para>This is a maintenance release. The following items were fixed and/or
       added:</para>
@@ -77,8 +121,8 @@
       </listitem>
 
       <listitem>
-        <para>API: make 32-bit Windows guests in raw mode boot again if the
-          ICH9 chipset is used (5.1.16 regression)</para>
+        <para>API: make 32-bit Windows guests boot again with software
+          virtualization if the ICH9 chipset is used (5.1.16 regression)</para>
       </listitem>
 
       <listitem>
@@ -3589,8 +3633,9 @@
 
       <listitem>
         <para>Storage: fixed a Guru Meditation under certain conditions when
-          using the DevLsiLogic controller with VMs running in raw mode
-          (4.3 regression; bugs #12254, #12655, #12709, #12774, #12886)</para>
+          using the DevLsiLogic controller with VMs running in software
+          virtualization mode (4.3 regression; bugs #12254, #12655, #12709,
+          #12774, #12886)</para>
       </listitem>
 
       <listitem>
diff --git a/include/VBox/cdefs.h b/include/VBox/cdefs.h
index fe34760..36a4a72 100644
--- a/include/VBox/cdefs.h
+++ b/include/VBox/cdefs.h
@@ -97,7 +97,11 @@
  * @param   type    The return type of the function declaration.
  */
 #if defined(IN_DIS)
-# define DISDECL(type)      DECLEXPORT(type) VBOXCALL
+# ifdef IN_DIS_STATIC
+#  define DISDECL(type)     DECLHIDDEN(type) VBOXCALL
+# else
+#  define DISDECL(type)     DECLEXPORT(type) VBOXCALL
+# endif
 #else
 # define DISDECL(type)      DECLIMPORT(type) VBOXCALL
 #endif
diff --git a/include/VBox/disopcode.h b/include/VBox/disopcode.h
index 46c3a45..3a8ce66 100644
--- a/include/VBox/disopcode.h
+++ b/include/VBox/disopcode.h
@@ -179,6 +179,7 @@ enum OPCODES
     OP_3DNOW,
     OP_MOVUPS,
     OP_MOVLPS,
+    OP_MOVHLPS = OP_MOVLPS, /**< @todo OP_MOVHLPS */
     OP_UNPCKLPS,
     OP_MOVHPS,
     OP_UNPCKHPS,
@@ -688,6 +689,7 @@ enum OPCODES
     OP_PFSWAPD,
     OP_PAVGUSB,
     OP_PFNACC,
+/** @}  */
     OP_ROL,
     OP_ROR,
     OP_RCL,
@@ -721,6 +723,8 @@ enum OPCODES
     OP_FXRSTOR,
     OP_LDMXCSR,
     OP_STMXCSR,
+    OP_XSAVE,
+    OP_XRSTOR,
     OP_LFENCE,
     OP_MFENCE,
     OP_SFENCE,
@@ -728,12 +732,19 @@ enum OPCODES
     OP_MONITOR,
     OP_MWAIT,
     OP_CLFLUSH,
+    OP_CLFLUSHOPT,
     OP_MOV_DR,
     OP_MOV_TR,
     OP_SWAPGS,
-/** @}  */
+    OP_UD1,
+    OP_UD2,
+/** @name AVX instructions
+ * @{ */
+    OP_VLDMXCSR,
+    OP_VSTMXCSR,
+/** @} */
 /** @name VT-x instructions
-* @{ */
+ * @{ */
     OP_VMREAD,
     OP_VMWRITE,
     OP_VMCALL,
@@ -879,6 +890,28 @@ enum OP_PARM
     OP_PARM_REG_GEN64_END = OP_PARM_REG_R15
 };
 
+
+/* 8-bit GRP aliases (for IEM). */
+#define OP_PARM_AL OP_PARM_REG_AL
+
+/* GPR aliases for op-size specified register sizes (for IEM). */
+#define OP_PARM_rAX OP_PARM_REG_EAX
+#define OP_PARM_rCX OP_PARM_REG_ECX
+#define OP_PARM_rDX OP_PARM_REG_EDX
+#define OP_PARM_rBX OP_PARM_REG_EBX
+#define OP_PARM_rSP OP_PARM_REG_ESP
+#define OP_PARM_rBP OP_PARM_REG_EBP
+#define OP_PARM_rSI OP_PARM_REG_ESI
+#define OP_PARM_rDI OP_PARM_REG_EDI
+
+/* SREG aliases (for IEM). */
+#define OP_PARM_ES  OP_PARM_REG_ES
+#define OP_PARM_CS  OP_PARM_REG_CS
+#define OP_PARM_SS  OP_PARM_REG_SS
+#define OP_PARM_DS  OP_PARM_REG_DS
+#define OP_PARM_FS  OP_PARM_REG_FS
+#define OP_PARM_GS  OP_PARM_REG_GS
+
 /*
  * Note! We don't document anything here if we can help it, because it we love
  *       wasting other peoples time figuring out crypting crap.  The new VEX
@@ -1028,6 +1061,17 @@ enum OP_PARM
 #define OP_PARM_Udq             (OP_PARM_U+OP_PARM_dq)
 #define OP_PARM_Lx              (OP_PARM_L+OP_PARM_x)
 
+/* For making IEM / bs3-cpu-generated-1 happy: */
+#define OP_PARM_Uq              (OP_PARM_U+OP_PARM_q)
+#define OP_PARM_UqHi            OP_PARM_Uq
+#define OP_PARM_WqZxReg         OP_PARM_Wq              /**< Annotates that register targets get their upper bits cleared. */
+#define OP_PARM_VssZxReg        OP_PARM_Vss             /**< Annotates that register targets get their upper bits cleared. */
+#define OP_PARM_MbRO            OP_PARM_Mb              /**< Annotates read only memory byte operand. */
+#define OP_PARM_MdRO            OP_PARM_Md              /**< Annotates read only memory byte operand. */
+#define OP_PARM_MdWO            OP_PARM_Md              /**< Annotates write only memory byte operand. */
+#define OP_PARM_MRO             OP_PARM_M               /**< Annotates read only memory of variable operand size (xrstor). */
+#define OP_PARM_MRW             OP_PARM_M               /**< Annotates read-write memory of variable operand size (xsave). */
+
 /** @} */
 
 #endif
diff --git a/include/VBox/err.h b/include/VBox/err.h
index 618c944..48e86c4 100644
--- a/include/VBox/err.h
+++ b/include/VBox/err.h
@@ -2132,6 +2132,10 @@
 #define VERR_SVM_IPE_4                              (-4064)
 /** Internal SVM processing error no 5. */
 #define VERR_SVM_IPE_5                              (-4065)
+/** The nested-guest \#VMEXIT processing failed, initiate shutdown. */
+#define VERR_SVM_VMEXIT_FAILED                      (-4066)
+/** An operation caused a nested-guest SVM \#VMEXIT. */
+#define VINF_SVM_VMEXIT                              4067
 /** @} */
 
 
@@ -2174,6 +2178,9 @@
 #define VERR_HM_INVALID_HM64ON32OP                  (-4116)
 /** Resume guest execution after injecting a double-fault. */
 #define VINF_HM_DOUBLE_FAULT                        4117
+/** The requested nested-guest VM-exit intercept is not active or not in
+ *  nested-guest execution mode. */
+#define VINF_HM_INTERCEPT_NOT_ACTIVE                4118
 /** @} */
 
 
diff --git a/include/VBox/err.mac b/include/VBox/err.mac
index 20f60f4..12dbee0 100644
--- a/include/VBox/err.mac
+++ b/include/VBox/err.mac
@@ -717,6 +717,9 @@
 %define VERR_SUPLIB_NT_PROCESS_UNTRUSTED_3    (-3773)
 %define VERR_SUPLIB_NT_PROCESS_UNTRUSTED_4    (-3774)
 %define VERR_SUPLIB_NT_PROCESS_UNTRUSTED_5    (-3775)
+%define VERR_SUPLIB_TEXT_NOT_WRITEABLE    (-3776)
+%define VERR_SUPLIB_TEXT_NOT_SEALED    (-3777)
+%define VERR_SUPLIB_UNEXPECTED_INSTRUCTION    (-3778)
 %define VERR_GMM_SEED_ME    (-3800)
 %define VERR_GMM_OUT_OF_MEMORY    (-3801)
 %define VERR_GMM_HIT_GLOBAL_LIMIT    (-3802)
@@ -804,6 +807,8 @@
 %define VERR_SVM_IPE_3    (-4063)
 %define VERR_SVM_IPE_4    (-4064)
 %define VERR_SVM_IPE_5    (-4065)
+%define VERR_SVM_VMEXIT_FAILED    (-4066)
+%define VINF_SVM_VMEXIT    4067
 %define VERR_HM_UNKNOWN_CPU    (-4100)
 %define VERR_HM_NO_CPUID    (-4101)
 %define VERR_HM_SUSPEND_PENDING    (-4102)
@@ -822,6 +827,7 @@
 %define VERR_HM_IPE_5    (-4115)
 %define VERR_HM_INVALID_HM64ON32OP    (-4116)
 %define VINF_HM_DOUBLE_FAULT    4117
+%define VINF_HM_INTERCEPT_NOT_ACTIVE    4118
 %define VERR_DIS_INVALID_OPCODE    (-4200)
 %define VERR_DIS_GEN_FAILURE    (-4201)
 %define VERR_DIS_NO_READ_CALLBACK    (-4202)
@@ -997,6 +1003,7 @@
 %define VERR_SUP_VP_REPLACE_VIRTUAL_MEMORY_FAILED    (-5673)
 %define VERR_SUP_VP_FILE_MODE_ERROR    (-5674)
 %define VERR_SUP_VP_CREATE_READ_EVT_SEM_FAILED    (-5675)
+%define VERR_SUP_VP_UNDESIRABLE_MODULE    (-5676)
 %define VERR_EXTPACK_UNSUPPORTED_HOST_UNINSTALL    (-6000)
 %define VERR_EXTPACK_VBOX_VERSION_MISMATCH    (-6001)
 %define VERR_GSTCTL_GUEST_ERROR    (-6200)
@@ -1029,6 +1036,7 @@
 %define VERR_AUDIO_NO_FREE_INPUT_STREAMS    (-6601)
 %define VERR_AUDIO_NO_FREE_OUTPUT_STREAMS    (-6602)
 %define VERR_AUDIO_STREAM_PENDING_DISABLE    (-6603)
+%define VINF_AUDIO_MORE_DATA_AVAILABLE    (6604)
 %define VERR_APIC_INTR_NOT_PENDING    (-6700)
 %define VERR_APIC_INTR_MASKED_BY_TPR    (-6701)
 %define VERR_APIC_INTR_DISCARDED    (-6702)
diff --git a/include/VBox/vmm/cpum.h b/include/VBox/vmm/cpum.h
index 2892dd9..9e67597 100644
--- a/include/VBox/vmm/cpum.h
+++ b/include/VBox/vmm/cpum.h
@@ -1002,6 +1002,10 @@ typedef struct CPUMFEATURES
     uint32_t        fMWaitExtensions : 1;
     /** Supports CMPXCHG16B in 64-bit mode. */
     uint32_t        fMovCmpXchg16b : 1;
+    /** Supports CLFLUSH. */
+    uint32_t        fClFlush : 1;
+    /** Supports CLFLUSHOPT. */
+    uint32_t        fClFlushOpt : 1;
 
     /** Supports AMD 3DNow instructions. */
     uint32_t        f3DNow : 1;
@@ -1020,15 +1024,52 @@ typedef struct CPUMFEATURES
     uint32_t        fRdTscP : 1;
     /** AMD64: Supports MOV CR8 in 32-bit code (lock prefix hack). */
     uint32_t        fMovCr8In32Bit : 1;
+    /** AMD64: Supports XOP (similar to VEX3/AVX). */
+    uint32_t        fXop : 1;
 
     /** Indicates that FPU instruction and data pointers may leak.
      * This generally applies to recent AMD CPUs, where the FPU IP and DP pointer
      * is only saved and restored if an exception is pending. */
     uint32_t        fLeakyFxSR : 1;
 
+    /** AMD64: Supports AMD SVM. */
+    uint32_t        fSvm : 1;
+
+    /** Support for Intel VMX. */
+    uint32_t        fVmx : 1;
+
     /** Alignment padding / reserved for future use. */
-    uint32_t        fPadding : 28;
-    uint32_t        auPadding[3];
+    uint32_t        fPadding : 23;
+
+    /** SVM: Supports Nested-paging. */
+    uint32_t        fSvmNestedPaging : 1;
+    /** SVM: Support LBR (Last Branch Record) virtualization. */
+    uint32_t        fSvmLbrVirt : 1;
+    /** SVM: Supports SVM lock. */
+    uint32_t        fSvmSvmLock : 1;
+    /** SVM: Supports Next RIP save. */
+    uint32_t        fSvmNextRipSave : 1;
+    /** SVM: Supports TSC rate MSR. */
+    uint32_t        fSvmTscRateMsr : 1;
+    /** SVM: Supports VMCB clean bits. */
+    uint32_t        fSvmVmcbClean : 1;
+    /** SVM: Supports Flush-by-ASID. */
+    uint32_t        fSvmFlusbByAsid : 1;
+    /** SVM: Supports decode assist. */
+    uint32_t        fSvmDecodeAssist : 1;
+    /** SVM: Supports Pause filter. */
+    uint32_t        fSvmPauseFilter : 1;
+    /** SVM: Supports Pause filter threshold. */
+    uint32_t        fSvmPauseFilterThreshold : 1;
+    /** SVM: Supports AVIC (Advanced Virtual Interrupt Controller). */
+    uint32_t        fSvmAvic : 1;
+    /** SVM: Padding / reserved for future features. */
+    uint32_t        fSvmPadding0 : 21;
+    /** SVM: Maximum supported ASID. */
+    uint32_t        uSvmMaxAsid;
+
+    /** @todo VMX features. */
+    uint32_t        auPadding[1];
 } CPUMFEATURES;
 #ifndef VBOX_FOR_DTRACE_LIB
 AssertCompileSize(CPUMFEATURES, 32);
@@ -1071,6 +1112,8 @@ VMMDECL(RTSEL)      CPUMGetGuestES(PVMCPU pVCpu);
 VMMDECL(RTSEL)      CPUMGetGuestFS(PVMCPU pVCpu);
 VMMDECL(RTSEL)      CPUMGetGuestGS(PVMCPU pVCpu);
 VMMDECL(RTSEL)      CPUMGetGuestSS(PVMCPU pVCpu);
+VMMDECL(uint64_t)   CPUMGetGuestFlatPC(PVMCPU pVCpu);
+VMMDECL(uint64_t)   CPUMGetGuestFlatSP(PVMCPU pVCpu);
 VMMDECL(uint64_t)   CPUMGetGuestDR0(PVMCPU pVCpu);
 VMMDECL(uint64_t)   CPUMGetGuestDR1(PVMCPU pVCpu);
 VMMDECL(uint64_t)   CPUMGetGuestDR2(PVMCPU pVCpu);
@@ -1158,7 +1201,7 @@ VMM_INT_DECL(bool)  CPUMIsGuestInRawMode(PVMCPU pVCpu);
  * Tests if the guest is running in real mode or not.
  *
  * @returns true if in real mode, otherwise false.
- * @param   pCtx    Current CPU context
+ * @param   pCtx    Current CPU context.
  */
 DECLINLINE(bool) CPUMIsGuestInRealModeEx(PCPUMCTX pCtx)
 {
@@ -1169,7 +1212,7 @@ DECLINLINE(bool) CPUMIsGuestInRealModeEx(PCPUMCTX pCtx)
  * Tests if the guest is running in real or virtual 8086 mode.
  *
  * @returns @c true if it is, @c false if not.
- * @param   pCtx    Current CPU context
+ * @param   pCtx    Current CPU context.
  */
 DECLINLINE(bool) CPUMIsGuestInRealOrV86ModeEx(PCPUMCTX pCtx)
 {
@@ -1181,7 +1224,7 @@ DECLINLINE(bool) CPUMIsGuestInRealOrV86ModeEx(PCPUMCTX pCtx)
  * Tests if the guest is running in virtual 8086 mode.
  *
  * @returns @c true if it is, @c false if not.
- * @param   pCtx    Current CPU context
+ * @param   pCtx    Current CPU context.
  */
 DECLINLINE(bool) CPUMIsGuestInV86ModeEx(PCPUMCTX pCtx)
 {
@@ -1192,7 +1235,7 @@ DECLINLINE(bool) CPUMIsGuestInV86ModeEx(PCPUMCTX pCtx)
  * Tests if the guest is running in paged protected or not.
  *
  * @returns true if in paged protected mode, otherwise false.
- * @param   pCtx    Current CPU context
+ * @param   pCtx    Current CPU context.
  */
 DECLINLINE(bool) CPUMIsGuestInPagedProtectedModeEx(PCPUMCTX pCtx)
 {
@@ -1203,7 +1246,7 @@ DECLINLINE(bool) CPUMIsGuestInPagedProtectedModeEx(PCPUMCTX pCtx)
  * Tests if the guest is running in long mode or not.
  *
  * @returns true if in long mode, otherwise false.
- * @param   pCtx    Current CPU context
+ * @param   pCtx    Current CPU context.
  */
 DECLINLINE(bool) CPUMIsGuestInLongModeEx(PCPUMCTX pCtx)
 {
@@ -1216,7 +1259,7 @@ VMM_INT_DECL(bool) CPUMIsGuestIn64BitCodeSlow(PCPUMCTX pCtx);
  * Tests if the guest is running in 64 bits mode or not.
  *
  * @returns true if in 64 bits protected mode, otherwise false.
- * @param   pCtx    Current CPU context
+ * @param   pCtx    Current CPU context.
  */
 DECLINLINE(bool) CPUMIsGuestIn64BitCodeEx(PCPUMCTX pCtx)
 {
@@ -1231,7 +1274,7 @@ DECLINLINE(bool) CPUMIsGuestIn64BitCodeEx(PCPUMCTX pCtx)
  * Tests if the guest has paging enabled or not.
  *
  * @returns true if paging is enabled, otherwise false.
- * @param   pCtx    Current CPU context
+ * @param   pCtx    Current CPU context.
  */
 DECLINLINE(bool) CPUMIsGuestPagingEnabledEx(PCPUMCTX pCtx)
 {
@@ -1242,7 +1285,7 @@ DECLINLINE(bool) CPUMIsGuestPagingEnabledEx(PCPUMCTX pCtx)
  * Tests if the guest is running in PAE mode or not.
  *
  * @returns true if in PAE mode, otherwise false.
- * @param   pCtx    Current CPU context
+ * @param   pCtx    Current CPU context.
  */
 DECLINLINE(bool) CPUMIsGuestInPAEModeEx(PCPUMCTX pCtx)
 {
@@ -1253,6 +1296,115 @@ DECLINLINE(bool) CPUMIsGuestInPAEModeEx(PCPUMCTX pCtx)
             && !(pCtx->msrEFER & MSR_K6_EFER_LMA));
 }
 
+# if 0 /* part of too large nested virtualization commit  */
+
+/**
+ * Tests is if the guest has AMD SVM enabled or not.
+ *
+ * @returns true if SMV is enabled, otherwise false.
+ * @param   pCtx    Current CPU context.
+ */
+DECLINLINE(bool) CPUMIsGuestSvmEnabled(PCPUMCTX pCtx)
+{
+    return RT_BOOL(pCtx->msrEFER & MSR_K6_EFER_SVME);
+}
+
+/**
+ * Checks if the guest VMCB has the specified ctrl/instruction intercept active.
+ *
+ * @returns @c true if in intercept is set, @c false otherwise.
+ * @param   pCtx          Pointer to the context.
+ * @param   Intercept     The SVM control/instruction intercept,
+ *                        see SVM_CTRL_INTERCEPT_*.
+ */
+DECLINLINE(bool) CPUMIsGuestSvmCtrlInterceptSet(PCPUMCTX pCtx, uint64_t fIntercept)
+{
+    return RT_BOOL(pCtx->hwvirt.svm.VmcbCtrl.u64InterceptCtrl & fIntercept);
+}
+
+/**
+ * Checks if the guest VMCB has the specified CR read intercept
+ * active.
+ *
+ * @returns @c true if in intercept is set, @c false otherwise.
+ * @param   pCtx          Pointer to the context.
+ * @param   uCr           The CR register number (0 to 15).
+ */
+DECLINLINE(bool) CPUMIsGuestSvmReadCRxInterceptSet(PCPUMCTX pCtx, uint8_t uCr)
+{
+    return RT_BOOL(pCtx->hwvirt.svm.VmcbCtrl.u16InterceptRdCRx & (1 << uCr));
+}
+
+/**
+ * Checks if the guest VMCB has the specified CR write intercept
+ * active.
+ *
+ * @returns @c true if in intercept is set, @c false otherwise.
+ * @param   pCtx          Pointer to the context.
+ * @param   uCr           The CR register number (0 to 15).
+ */
+DECLINLINE(bool) CPUMIsGuestSvmWriteCRxInterceptSet(PCPUMCTX pCtx, uint8_t uCr)
+{
+    return RT_BOOL(pCtx->hwvirt.svm.VmcbCtrl.u16InterceptWrCRx & (1 << uCr));
+}
+
+/**
+ * Checks if the guest VMCB has the specified DR read intercept
+ * active.
+ *
+ * @returns @c true if in intercept is set, @c false otherwise.
+ * @param   pCtx    Pointer to the context.
+ * @param   uDr     The DR register number (0 to 15).
+ */
+DECLINLINE(bool) CPUMIsGuestSvmReadDRxInterceptSet(PCPUMCTX pCtx, uint8_t uDr)
+{
+    return RT_BOOL(pCtx->hwvirt.svm.VmcbCtrl.u16InterceptRdDRx & (1 << uDr));
+}
+
+/**
+ * Checks if the guest VMCB has the specified DR write intercept
+ * active.
+ *
+ * @returns @c true if in intercept is set, @c false otherwise.
+ * @param   pCtx    Pointer to the context.
+ * @param   uDr     The DR register number (0 to 15).
+ */
+DECLINLINE(bool) CPUMIsGuestSvmWriteDRxInterceptSet(PCPUMCTX pCtx, uint8_t uDr)
+{
+    return RT_BOOL(pCtx->hwvirt.svm.VmcbCtrl.u16InterceptWrDRx & (1 << uDr));
+}
+
+/**
+ * Checks if the guest VMCB has the specified exception
+ * intercept active.
+ *
+ * @returns true if in intercept is active, false otherwise.
+ * @param   pCtx        Pointer to the context.
+ * @param   uVector     The exception / interrupt vector.
+ */
+DECLINLINE(bool) CPUMIsGuestSvmXcptInterceptSet(PCCPUMCTX pCtx, uint8_t uVector)
+{
+    Assert(uVector < 32);
+    return RT_BOOL(pCtx->hwvirt.svm.VmcbCtrl.u32InterceptXcpt & (UINT32_C(1) << uVector));
+}
+
+/**
+ * Checks if we are executing inside the nested hardware-virtualized guest.
+ *
+ * @returns true if in nested-guest mode, false otherwise.
+ * @param   pCtx        Pointer to the context.
+ */
+DECLINLINE(bool) CPUMIsGuestInNestedHwVirtMode(PCPUMCTX pCtx)
+{
+    /*
+     * With AMD-V, the VMRUN intercept is a pre-requisite to entering SVM guest-mode.
+     * See AMD spec. 15.5 "VMRUN instruction" subsection "Canonicalization and Consistency Checks".
+     */
+    return RT_BOOL(pCtx->hwvirt.svm.VmcbCtrl.u64InterceptCtrl & SVM_CTRL_INTERCEPT_VMRUN);
+    /** @todo Intel VMX.  */
+}
+
+# endif
 #endif /* VBOX_WITHOUT_UNNAMED_UNIONS */
 
 /** @} */
@@ -1387,6 +1539,7 @@ VMMDECL(uint32_t)       CPUMGetGuestCPL(PVMCPU pVCpu);
 VMMDECL(CPUMMODE)       CPUMGetGuestMode(PVMCPU pVCpu);
 VMMDECL(uint32_t)       CPUMGetGuestCodeBits(PVMCPU pVCpu);
 VMMDECL(DISCPUMODE)     CPUMGetGuestDisMode(PVMCPU pVCpu);
+VMMDECL(uint32_t)       CPUMGetGuestMxCsrMask(PVM pVM);
 VMMDECL(uint64_t)       CPUMGetGuestScalableBusFrequency(PVM pVM);
 
 /** @name Typical scalable bus frequency values.
@@ -1430,6 +1583,7 @@ VMMR3DECL(int)              CPUMR3CpuIdDetectUnknownLeafMethod(PCPUMUNKNOWNCPUID
 VMMR3DECL(const char *)     CPUMR3CpuIdUnknownLeafMethodName(CPUMUNKNOWNCPUID enmUnknownMethod);
 VMMR3DECL(CPUMCPUVENDOR)    CPUMR3CpuIdDetectVendorEx(uint32_t uEAX, uint32_t uEBX, uint32_t uECX, uint32_t uEDX);
 VMMR3DECL(const char *)     CPUMR3CpuVendorName(CPUMCPUVENDOR enmVendor);
+VMMR3DECL(uint32_t)         CPUMR3DeterminHostMxCsrMask(void);
 
 VMMR3DECL(int)              CPUMR3MsrRangesInsert(PVM pVM, PCCPUMMSRRANGE pNewRange);
 
diff --git a/include/VBox/vmm/dbgf.h b/include/VBox/vmm/dbgf.h
index dffd5bc..acc35ae 100644
--- a/include/VBox/vmm/dbgf.h
+++ b/include/VBox/vmm/dbgf.h
@@ -86,7 +86,7 @@ typedef const DBGFADDRESS *PCDBGFADDRESS;
 #define DBGFADDRESS_FLAGS_FLAT          3
 /** A physical address. */
 #define DBGFADDRESS_FLAGS_PHYS          4
-/** A physical address. */
+/** A ring-0 host address (internal use only). */
 #define DBGFADDRESS_FLAGS_RING0         5
 /** The address type mask. */
 #define DBGFADDRESS_FLAGS_TYPE_MASK     7
@@ -108,10 +108,14 @@ typedef const DBGFADDRESS *PCDBGFADDRESS;
 #define DBGFADDRESS_IS_FAR32(pAddress)   ( ((pAddress)->fFlags & DBGFADDRESS_FLAGS_TYPE_MASK) == DBGFADDRESS_FLAGS_FAR32 )
 /** Checks if the mixed address is far 16:64 or not. */
 #define DBGFADDRESS_IS_FAR64(pAddress)   ( ((pAddress)->fFlags & DBGFADDRESS_FLAGS_TYPE_MASK) == DBGFADDRESS_FLAGS_FAR64 )
+/** Checks if the mixed address host context ring-0 (special). */
+#define DBGFADDRESS_IS_R0_HC(pAddress)   ( ((pAddress)->fFlags & DBGFADDRESS_FLAGS_TYPE_MASK) == DBGFADDRESS_FLAGS_RING0 )
+/** Checks if the mixed address a virtual guest context address (incl HMA). */
+#define DBGFADDRESS_IS_VIRT_GC(pAddress) ( ((pAddress)->fFlags & DBGFADDRESS_FLAGS_TYPE_MASK) <= DBGFADDRESS_FLAGS_FLAT )
 /** Checks if the mixed address is valid. */
-#define DBGFADDRESS_IS_VALID(pAddress)   ( !!((pAddress)->fFlags & DBGFADDRESS_FLAGS_VALID) )
+#define DBGFADDRESS_IS_VALID(pAddress)   RT_BOOL((pAddress)->fFlags & DBGFADDRESS_FLAGS_VALID)
 /** Checks if the address is flagged as within the HMA. */
-#define DBGFADDRESS_IS_HMA(pAddress)     ( !!((pAddress)->fFlags & DBGFADDRESS_FLAGS_HMA) )
+#define DBGFADDRESS_IS_HMA(pAddress)     RT_BOOL((pAddress)->fFlags & DBGFADDRESS_FLAGS_HMA)
 /** @} */
 
 VMMR3DECL(int)          DBGFR3AddrFromSelOff(PUVM pUVM, VMCPUID idCpu, PDBGFADDRESS pAddress, RTSEL Sel, RTUINTPTR off);
@@ -545,8 +549,40 @@ VMMR3DECL(int)          DBGFR3Halt(PUVM pUVM);
 VMMR3DECL(bool)         DBGFR3IsHalted(PUVM pUVM);
 VMMR3DECL(int)          DBGFR3QueryWaitable(PUVM pUVM);
 VMMR3DECL(int)          DBGFR3Resume(PUVM pUVM);
-VMMR3DECL(int)          DBGFR3Step(PUVM pUVM, VMCPUID idCpu);
 VMMR3DECL(int)          DBGFR3InjectNMI(PUVM pUVM, VMCPUID idCpu);
+VMMR3DECL(int)          DBGFR3Step(PUVM pUVM, VMCPUID idCpu);
+VMMR3DECL(int)          DBGFR3StepEx(PUVM pUVM, VMCPUID idCpu, uint32_t fFlags, PCDBGFADDRESS pStopPcAddr,
+                                     PCDBGFADDRESS pStopPopAddr, RTGCUINTPTR cbStopPop, uint32_t cMaxSteps);
+
+/** @name DBGF_STEP_F_XXX - Flags for DBGFR3StepEx.
+ *
+ * @note The stop filters are not applied to the starting instruction.
+ *
+ * @{ */
+/** Step into CALL, INT, SYSCALL and SYSENTER instructions. */
+#define DBGF_STEP_F_INTO                RT_BIT_32(0)
+/** Step over CALL, INT, SYSCALL and SYSENTER instruction when considering
+ *  what's "next". */
+#define DBGF_STEP_F_OVER                RT_BIT_32(1)
+
+/** Stop on the next CALL, INT, SYSCALL, SYSENTER instruction. */
+#define DBGF_STEP_F_STOP_ON_CALL        RT_BIT_32(8)
+/** Stop on the next RET, IRET, SYSRET, SYSEXIT instruction. */
+#define DBGF_STEP_F_STOP_ON_RET         RT_BIT_32(9)
+/** Stop after the next RET, IRET, SYSRET, SYSEXIT instruction. */
+#define DBGF_STEP_F_STOP_AFTER_RET      RT_BIT_32(10)
+/** Stop on the given address.
+ * The comparison will be made using effective (flat) addresses.  */
+#define DBGF_STEP_F_STOP_ON_ADDRESS     RT_BIT_32(11)
+/** Stop when the stack pointer pops to or past the given address.
+ * The comparison will be made using effective (flat) addresses.  */
+#define DBGF_STEP_F_STOP_ON_STACK_POP   RT_BIT_32(12)
+/** Mask of stop filter flags. */
+#define DBGF_STEP_F_STOP_FILTER_MASK    UINT32_C(0x00001f00)
+
+/** Mask of valid flags. */
+#define DBGF_STEP_F_VALID_MASK          UINT32_C(0x00001f03)
+/** @} */
 
 /**
  * Event configuration array element, see DBGFR3EventConfigEx.
@@ -852,6 +888,7 @@ VMM_INT_DECL(VBOXSTRICTRC)  DBGFEventGenericWithArg(PVM pVM, PVMCPU pVCpu, DBGFE
 VMMR3DECL(CPUMMODE)         DBGFR3CpuGetMode(PUVM pUVM, VMCPUID idCpu);
 VMMR3DECL(VMCPUID)          DBGFR3CpuGetCount(PUVM pUVM);
 VMMR3DECL(bool)             DBGFR3CpuIsIn64BitCode(PUVM pUVM, VMCPUID idCpu);
+VMMR3DECL(bool)             DBGFR3CpuIsInV86Code(PUVM pUVM, VMCPUID idCpu);
 #endif
 
 
diff --git a/include/VBox/vmm/em.h b/include/VBox/vmm/em.h
index 4cdc014..ac7d280 100644
--- a/include/VBox/vmm/em.h
+++ b/include/VBox/vmm/em.h
@@ -198,6 +198,7 @@ VMM_INT_DECL(int)               EMInterpretWrmsr(PVM pVM, PVMCPU pVCpu, PCPUMCTX
 VMM_INT_DECL(bool)              EMShouldContinueAfterHalt(PVMCPU pVCpu, PCPUMCTX pCtx);
 VMM_INT_DECL(bool)              EMMonitorWaitShouldContinue(PVMCPU pVCpu, PCPUMCTX pCtx);
 VMM_INT_DECL(int)               EMMonitorWaitPrepare(PVMCPU pVCpu, uint64_t rax, uint64_t rcx, uint64_t rdx, RTGCPHYS GCPhys);
+VMM_INT_DECL(bool)              EMMonitorIsArmed(PVMCPU pVCpu);
 VMM_INT_DECL(int)               EMMonitorWaitPerform(PVMCPU pVCpu, uint64_t rax, uint64_t rcx);
 VMM_INT_DECL(int)               EMUnhaltAndWakeUp(PVM pVM, PVMCPU pVCpuDst);
 
diff --git a/include/VBox/vmm/hm_svm.h b/include/VBox/vmm/hm_svm.h
index 5fccece..ff35c37 100644
--- a/include/VBox/vmm/hm_svm.h
+++ b/include/VBox/vmm/hm_svm.h
@@ -66,238 +66,244 @@
 #define AMD_CPUID_SVM_FEATURE_EDX_AVIC                      RT_BIT(13)
 /** @} */
 
-
+/*
+ * Ugly!
+ * When compiling the recompiler, its own svm.h defines clash with
+ * the following defines. Avoid just the duplicates here as we still
+ * require other definitions and structures in this header.
+ */
+#ifndef IN_REM_R3
 /** @name SVM Basic Exit Reasons.
  * @{
  */
 /** Invalid guest state in VMCB. */
-#define SVM_EXIT_INVALID                (-1)
+# define SVM_EXIT_INVALID                (uint64_t)(-1)
 /** Read from CR0-CR15. */
-#define SVM_EXIT_READ_CR0               0x0
-#define SVM_EXIT_READ_CR1               0x1
-#define SVM_EXIT_READ_CR2               0x2
-#define SVM_EXIT_READ_CR3               0x3
-#define SVM_EXIT_READ_CR4               0x4
-#define SVM_EXIT_READ_CR5               0x5
-#define SVM_EXIT_READ_CR6               0x6
-#define SVM_EXIT_READ_CR7               0x7
-#define SVM_EXIT_READ_CR8               0x8
-#define SVM_EXIT_READ_CR9               0x9
-#define SVM_EXIT_READ_CR10              0xA
-#define SVM_EXIT_READ_CR11              0xB
-#define SVM_EXIT_READ_CR12              0xC
-#define SVM_EXIT_READ_CR13              0xD
-#define SVM_EXIT_READ_CR14              0xE
-#define SVM_EXIT_READ_CR15              0xF
+# define SVM_EXIT_READ_CR0               0x0
+# define SVM_EXIT_READ_CR1               0x1
+# define SVM_EXIT_READ_CR2               0x2
+# define SVM_EXIT_READ_CR3               0x3
+# define SVM_EXIT_READ_CR4               0x4
+# define SVM_EXIT_READ_CR5               0x5
+# define SVM_EXIT_READ_CR6               0x6
+# define SVM_EXIT_READ_CR7               0x7
+# define SVM_EXIT_READ_CR8               0x8
+# define SVM_EXIT_READ_CR9               0x9
+# define SVM_EXIT_READ_CR10              0xA
+# define SVM_EXIT_READ_CR11              0xB
+# define SVM_EXIT_READ_CR12              0xC
+# define SVM_EXIT_READ_CR13              0xD
+# define SVM_EXIT_READ_CR14              0xE
+# define SVM_EXIT_READ_CR15              0xF
 /** Writes to CR0-CR15. */
-#define SVM_EXIT_WRITE_CR0              0x10
-#define SVM_EXIT_WRITE_CR1              0x11
-#define SVM_EXIT_WRITE_CR2              0x12
-#define SVM_EXIT_WRITE_CR3              0x13
-#define SVM_EXIT_WRITE_CR4              0x14
-#define SVM_EXIT_WRITE_CR5              0x15
-#define SVM_EXIT_WRITE_CR6              0x16
-#define SVM_EXIT_WRITE_CR7              0x17
-#define SVM_EXIT_WRITE_CR8              0x18
-#define SVM_EXIT_WRITE_CR9              0x19
-#define SVM_EXIT_WRITE_CR10             0x1A
-#define SVM_EXIT_WRITE_CR11             0x1B
-#define SVM_EXIT_WRITE_CR12             0x1C
-#define SVM_EXIT_WRITE_CR13             0x1D
-#define SVM_EXIT_WRITE_CR14             0x1E
-#define SVM_EXIT_WRITE_CR15             0x1F
+# define SVM_EXIT_WRITE_CR0              0x10
+# define SVM_EXIT_WRITE_CR1              0x11
+# define SVM_EXIT_WRITE_CR2              0x12
+# define SVM_EXIT_WRITE_CR3              0x13
+# define SVM_EXIT_WRITE_CR4              0x14
+# define SVM_EXIT_WRITE_CR5              0x15
+# define SVM_EXIT_WRITE_CR6              0x16
+# define SVM_EXIT_WRITE_CR7              0x17
+# define SVM_EXIT_WRITE_CR8              0x18
+# define SVM_EXIT_WRITE_CR9              0x19
+# define SVM_EXIT_WRITE_CR10             0x1A
+# define SVM_EXIT_WRITE_CR11             0x1B
+# define SVM_EXIT_WRITE_CR12             0x1C
+# define SVM_EXIT_WRITE_CR13             0x1D
+# define SVM_EXIT_WRITE_CR14             0x1E
+# define SVM_EXIT_WRITE_CR15             0x1F
 /** Read from DR0-DR15. */
-#define SVM_EXIT_READ_DR0               0x20
-#define SVM_EXIT_READ_DR1               0x21
-#define SVM_EXIT_READ_DR2               0x22
-#define SVM_EXIT_READ_DR3               0x23
-#define SVM_EXIT_READ_DR4               0x24
-#define SVM_EXIT_READ_DR5               0x25
-#define SVM_EXIT_READ_DR6               0x26
-#define SVM_EXIT_READ_DR7               0x27
-#define SVM_EXIT_READ_DR8               0x28
-#define SVM_EXIT_READ_DR9               0x29
-#define SVM_EXIT_READ_DR10              0x2A
-#define SVM_EXIT_READ_DR11              0x2B
-#define SVM_EXIT_READ_DR12              0x2C
-#define SVM_EXIT_READ_DR13              0x2D
-#define SVM_EXIT_READ_DR14              0x2E
-#define SVM_EXIT_READ_DR15              0x2F
+# define SVM_EXIT_READ_DR0               0x20
+# define SVM_EXIT_READ_DR1               0x21
+# define SVM_EXIT_READ_DR2               0x22
+# define SVM_EXIT_READ_DR3               0x23
+# define SVM_EXIT_READ_DR4               0x24
+# define SVM_EXIT_READ_DR5               0x25
+# define SVM_EXIT_READ_DR6               0x26
+# define SVM_EXIT_READ_DR7               0x27
+# define SVM_EXIT_READ_DR8               0x28
+# define SVM_EXIT_READ_DR9               0x29
+# define SVM_EXIT_READ_DR10              0x2A
+# define SVM_EXIT_READ_DR11              0x2B
+# define SVM_EXIT_READ_DR12              0x2C
+# define SVM_EXIT_READ_DR13              0x2D
+# define SVM_EXIT_READ_DR14              0x2E
+# define SVM_EXIT_READ_DR15              0x2F
 /** Writes to DR0-DR15. */
-#define SVM_EXIT_WRITE_DR0              0x30
-#define SVM_EXIT_WRITE_DR1              0x31
-#define SVM_EXIT_WRITE_DR2              0x32
-#define SVM_EXIT_WRITE_DR3              0x33
-#define SVM_EXIT_WRITE_DR4              0x34
-#define SVM_EXIT_WRITE_DR5              0x35
-#define SVM_EXIT_WRITE_DR6              0x36
-#define SVM_EXIT_WRITE_DR7              0x37
-#define SVM_EXIT_WRITE_DR8              0x38
-#define SVM_EXIT_WRITE_DR9              0x39
-#define SVM_EXIT_WRITE_DR10             0x3A
-#define SVM_EXIT_WRITE_DR11             0x3B
-#define SVM_EXIT_WRITE_DR12             0x3C
-#define SVM_EXIT_WRITE_DR13             0x3D
-#define SVM_EXIT_WRITE_DR14             0x3E
-#define SVM_EXIT_WRITE_DR15             0x3F
+# define SVM_EXIT_WRITE_DR0              0x30
+# define SVM_EXIT_WRITE_DR1              0x31
+# define SVM_EXIT_WRITE_DR2              0x32
+# define SVM_EXIT_WRITE_DR3              0x33
+# define SVM_EXIT_WRITE_DR4              0x34
+# define SVM_EXIT_WRITE_DR5              0x35
+# define SVM_EXIT_WRITE_DR6              0x36
+# define SVM_EXIT_WRITE_DR7              0x37
+# define SVM_EXIT_WRITE_DR8              0x38
+# define SVM_EXIT_WRITE_DR9              0x39
+# define SVM_EXIT_WRITE_DR10             0x3A
+# define SVM_EXIT_WRITE_DR11             0x3B
+# define SVM_EXIT_WRITE_DR12             0x3C
+# define SVM_EXIT_WRITE_DR13             0x3D
+# define SVM_EXIT_WRITE_DR14             0x3E
+# define SVM_EXIT_WRITE_DR15             0x3F
 /* Exception 0-31. */
-#define SVM_EXIT_EXCEPTION_0            0x40
-#define SVM_EXIT_EXCEPTION_1            0x41
-#define SVM_EXIT_EXCEPTION_2            0x42
-#define SVM_EXIT_EXCEPTION_3            0x43
-#define SVM_EXIT_EXCEPTION_4            0x44
-#define SVM_EXIT_EXCEPTION_5            0x45
-#define SVM_EXIT_EXCEPTION_6            0x46
-#define SVM_EXIT_EXCEPTION_7            0x47
-#define SVM_EXIT_EXCEPTION_8            0x48
-#define SVM_EXIT_EXCEPTION_9            0x49
-#define SVM_EXIT_EXCEPTION_A            0x4A
-#define SVM_EXIT_EXCEPTION_B            0x4B
-#define SVM_EXIT_EXCEPTION_C            0x4C
-#define SVM_EXIT_EXCEPTION_D            0x4D
-#define SVM_EXIT_EXCEPTION_E            0x4E
-#define SVM_EXIT_EXCEPTION_F            0x4F
-#define SVM_EXIT_EXCEPTION_10           0x50
-#define SVM_EXIT_EXCEPTION_11           0x51
-#define SVM_EXIT_EXCEPTION_12           0x52
-#define SVM_EXIT_EXCEPTION_13           0x53
-#define SVM_EXIT_EXCEPTION_14           0x54
-#define SVM_EXIT_EXCEPTION_15           0x55
-#define SVM_EXIT_EXCEPTION_16           0x56
-#define SVM_EXIT_EXCEPTION_17           0x57
-#define SVM_EXIT_EXCEPTION_18           0x58
-#define SVM_EXIT_EXCEPTION_19           0x59
-#define SVM_EXIT_EXCEPTION_1A           0x5A
-#define SVM_EXIT_EXCEPTION_1B           0x5B
-#define SVM_EXIT_EXCEPTION_1C           0x5C
-#define SVM_EXIT_EXCEPTION_1D           0x5D
-#define SVM_EXIT_EXCEPTION_1E           0x5E
-#define SVM_EXIT_EXCEPTION_1F           0x5F
+# define SVM_EXIT_EXCEPTION_0            0x40
+# define SVM_EXIT_EXCEPTION_1            0x41
+# define SVM_EXIT_EXCEPTION_2            0x42
+# define SVM_EXIT_EXCEPTION_3            0x43
+# define SVM_EXIT_EXCEPTION_4            0x44
+# define SVM_EXIT_EXCEPTION_5            0x45
+# define SVM_EXIT_EXCEPTION_6            0x46
+# define SVM_EXIT_EXCEPTION_7            0x47
+# define SVM_EXIT_EXCEPTION_8            0x48
+# define SVM_EXIT_EXCEPTION_9            0x49
+# define SVM_EXIT_EXCEPTION_A            0x4A
+# define SVM_EXIT_EXCEPTION_B            0x4B
+# define SVM_EXIT_EXCEPTION_C            0x4C
+# define SVM_EXIT_EXCEPTION_D            0x4D
+# define SVM_EXIT_EXCEPTION_E            0x4E
+# define SVM_EXIT_EXCEPTION_F            0x4F
+# define SVM_EXIT_EXCEPTION_10           0x50
+# define SVM_EXIT_EXCEPTION_11           0x51
+# define SVM_EXIT_EXCEPTION_12           0x52
+# define SVM_EXIT_EXCEPTION_13           0x53
+# define SVM_EXIT_EXCEPTION_14           0x54
+# define SVM_EXIT_EXCEPTION_15           0x55
+# define SVM_EXIT_EXCEPTION_16           0x56
+# define SVM_EXIT_EXCEPTION_17           0x57
+# define SVM_EXIT_EXCEPTION_18           0x58
+# define SVM_EXIT_EXCEPTION_19           0x59
+# define SVM_EXIT_EXCEPTION_1A           0x5A
+# define SVM_EXIT_EXCEPTION_1B           0x5B
+# define SVM_EXIT_EXCEPTION_1C           0x5C
+# define SVM_EXIT_EXCEPTION_1D           0x5D
+# define SVM_EXIT_EXCEPTION_1E           0x5E
+# define SVM_EXIT_EXCEPTION_1F           0x5F
 /** Physical maskable interrupt. */
-#define SVM_EXIT_INTR                   0x60
+# define SVM_EXIT_INTR                   0x60
 /** Non-maskable interrupt. */
-#define SVM_EXIT_NMI                    0x61
+# define SVM_EXIT_NMI                    0x61
 /** System Management interrupt. */
-#define SVM_EXIT_SMI                    0x62
+# define SVM_EXIT_SMI                    0x62
 /** Physical INIT signal. */
-#define SVM_EXIT_INIT                   0x63
+# define SVM_EXIT_INIT                   0x63
 /** Virtual interrupt. */
-#define SVM_EXIT_VINTR                  0x64
+# define SVM_EXIT_VINTR                  0x64
 /** Write to CR0 that changed any bits other than CR0.TS or CR0.MP. */
-#define SVM_EXIT_CR0_SEL_WRITE          0x65
+# define SVM_EXIT_CR0_SEL_WRITE          0x65
 /** IDTR read. */
-#define SVM_EXIT_IDTR_READ              0x66
+# define SVM_EXIT_IDTR_READ              0x66
 /** GDTR read. */
-#define SVM_EXIT_GDTR_READ              0x67
+# define SVM_EXIT_GDTR_READ              0x67
 /** LDTR read. */
-#define SVM_EXIT_LDTR_READ              0x68
+# define SVM_EXIT_LDTR_READ              0x68
 /** TR read. */
-#define SVM_EXIT_TR_READ                0x69
+# define SVM_EXIT_TR_READ                0x69
 /** IDTR write. */
-#define SVM_EXIT_IDTR_WRITE             0x6A
+# define SVM_EXIT_IDTR_WRITE             0x6A
 /** GDTR write. */
-#define SVM_EXIT_GDTR_WRITE             0x6B
+# define SVM_EXIT_GDTR_WRITE             0x6B
 /** LDTR write. */
-#define SVM_EXIT_LDTR_WRITE             0x6C
+# define SVM_EXIT_LDTR_WRITE             0x6C
 /** TR write. */
-#define SVM_EXIT_TR_WRITE               0x6D
+# define SVM_EXIT_TR_WRITE               0x6D
 /** RDTSC instruction. */
-#define SVM_EXIT_RDTSC                  0x6E
+# define SVM_EXIT_RDTSC                  0x6E
 /** RDPMC instruction. */
-#define SVM_EXIT_RDPMC                  0x6F
+# define SVM_EXIT_RDPMC                  0x6F
 /** PUSHF instruction. */
-#define SVM_EXIT_PUSHF                  0x70
+# define SVM_EXIT_PUSHF                  0x70
 /** POPF instruction. */
-#define SVM_EXIT_POPF                   0x71
+# define SVM_EXIT_POPF                   0x71
 /** CPUID instruction. */
-#define SVM_EXIT_CPUID                  0x72
+# define SVM_EXIT_CPUID                  0x72
 /** RSM instruction. */
-#define SVM_EXIT_RSM                    0x73
+# define SVM_EXIT_RSM                    0x73
 /** IRET instruction. */
-#define SVM_EXIT_IRET                   0x74
+# define SVM_EXIT_IRET                   0x74
 /** software interrupt (INTn instructions). */
-#define SVM_EXIT_SWINT                  0x75
+# define SVM_EXIT_SWINT                  0x75
 /** INVD instruction. */
-#define SVM_EXIT_INVD                   0x76
+# define SVM_EXIT_INVD                   0x76
 /** PAUSE instruction. */
-#define SVM_EXIT_PAUSE                  0x77
+# define SVM_EXIT_PAUSE                  0x77
 /** HLT instruction. */
-#define SVM_EXIT_HLT                    0x78
+# define SVM_EXIT_HLT                    0x78
 /** INVLPG instructions. */
-#define SVM_EXIT_INVLPG                 0x79
+# define SVM_EXIT_INVLPG                 0x79
 /** INVLPGA instruction. */
-#define SVM_EXIT_INVLPGA                0x7A
+# define SVM_EXIT_INVLPGA                0x7A
 /** IN or OUT accessing protected port (the EXITINFO1 field provides more information). */
-#define SVM_EXIT_IOIO                   0x7B
+# define SVM_EXIT_IOIO                   0x7B
 /** RDMSR or WRMSR access to protected MSR. */
-#define SVM_EXIT_MSR                    0x7C
+# define SVM_EXIT_MSR                    0x7C
 /** task switch. */
-#define SVM_EXIT_TASK_SWITCH            0x7D
+# define SVM_EXIT_TASK_SWITCH            0x7D
 /** FP legacy handling enabled, and processor is frozen in an x87/mmx instruction waiting for an interrupt. */
-#define SVM_EXIT_FERR_FREEZE            0x7E
+# define SVM_EXIT_FERR_FREEZE            0x7E
 /** Shutdown. */
-#define SVM_EXIT_SHUTDOWN               0x7F
+# define SVM_EXIT_SHUTDOWN               0x7F
 /** VMRUN instruction. */
-#define SVM_EXIT_VMRUN                  0x80
+# define SVM_EXIT_VMRUN                  0x80
 /** VMMCALL instruction. */
-#define SVM_EXIT_VMMCALL                0x81
+# define SVM_EXIT_VMMCALL                0x81
 /** VMLOAD instruction. */
-#define SVM_EXIT_VMLOAD                 0x82
+# define SVM_EXIT_VMLOAD                 0x82
 /** VMSAVE instruction. */
-#define SVM_EXIT_VMSAVE                 0x83
+# define SVM_EXIT_VMSAVE                 0x83
 /** STGI instruction. */
-#define SVM_EXIT_STGI                   0x84
+# define SVM_EXIT_STGI                   0x84
 /** CLGI instruction. */
-#define SVM_EXIT_CLGI                   0x85
+# define SVM_EXIT_CLGI                   0x85
 /** SKINIT instruction. */
-#define SVM_EXIT_SKINIT                 0x86
+# define SVM_EXIT_SKINIT                 0x86
 /** RDTSCP instruction. */
-#define SVM_EXIT_RDTSCP                 0x87
+# define SVM_EXIT_RDTSCP                 0x87
 /** ICEBP instruction. */
-#define SVM_EXIT_ICEBP                  0x88
+# define SVM_EXIT_ICEBP                  0x88
 /** WBINVD instruction. */
-#define SVM_EXIT_WBINVD                 0x89
+# define SVM_EXIT_WBINVD                 0x89
 /** MONITOR instruction. */
-#define SVM_EXIT_MONITOR                0x8A
+# define SVM_EXIT_MONITOR                0x8A
 /** MWAIT instruction. */
-#define SVM_EXIT_MWAIT                  0x8B
+# define SVM_EXIT_MWAIT                  0x8B
 /** MWAIT instruction, when armed. */
-#define SVM_EXIT_MWAIT_ARMED            0x8C
+# define SVM_EXIT_MWAIT_ARMED            0x8C
 /** XSETBV instruction. */
-#define SVM_EXIT_XSETBV                 0x8D
+# define SVM_EXIT_XSETBV                 0x8D
 /** Nested paging: host-level page fault occurred (EXITINFO1 contains fault errorcode; EXITINFO2 contains the guest physical address causing the fault). */
-#define SVM_EXIT_NPF                    0x400
+# define SVM_EXIT_NPF                    0x400
 /** AVIC: Virtual IPI delivery not completed. */
-#define SVM_EXIT_AVIC_INCOMPLETE_IPI    0x401
+# define SVM_EXIT_AVIC_INCOMPLETE_IPI    0x401
 /** AVIC: Attempted access by guest to a vAPIC register not handled by AVIC
  *  hardware. */
-#define SVM_EXIT_AVIC_NOACCEL           0x402
-
+# define SVM_EXIT_AVIC_NOACCEL           0x402
 /** The maximum possible exit value. */
-#define SVM_EXIT_MAX                    (SVM_EXIT_AVIC_NOACCEL)
+# define SVM_EXIT_MAX                    (SVM_EXIT_AVIC_NOACCEL)
 /** @} */
+#endif /* !IN_REM_R3*/
 
 
 /** @name SVMVMCB.u64ExitInfo2 for task switches
  * @{
  */
 /** Set to 1 if the task switch was caused by an IRET; else cleared to 0. */
-#define SVM_EXIT2_TASK_SWITCH_IRET            RT_BIT_64(36)
+# define SVM_EXIT2_TASK_SWITCH_IRET            RT_BIT_64(36)
 /** Set to 1 if the task switch was caused by a far jump; else cleared to 0. */
-#define SVM_EXIT2_TASK_SWITCH_JMP             RT_BIT_64(38)
+# define SVM_EXIT2_TASK_SWITCH_JMP             RT_BIT_64(38)
 /** Set to 1 if the task switch has an error code; else cleared to 0. */
-#define SVM_EXIT2_TASK_SWITCH_HAS_ERROR_CODE  RT_BIT_64(44)
+# define SVM_EXIT2_TASK_SWITCH_HAS_ERROR_CODE  RT_BIT_64(44)
 /** The value of EFLAGS.RF that would be saved in the outgoing TSS if the task switch were not intercepted. */
-#define SVM_EXIT2_TASK_SWITCH_EFLAGS_RF       RT_BIT_64(48)
+# define SVM_EXIT2_TASK_SWITCH_EFLAGS_RF       RT_BIT_64(48)
 /** @} */
 
 /** @name SVMVMCB.u64ExitInfo1 for MSR accesses
  * @{
  */
 /** The access was a read MSR. */
-#define SVM_EXIT1_MSR_READ                    0x0
+# define SVM_EXIT1_MSR_READ                    0x0
 /** The access was a write MSR. */
-#define SVM_EXIT1_MSR_WRITE                   0x1
+# define SVM_EXIT1_MSR_WRITE                   0x1
 /** @} */
 
 /** @name SVMVMCB.ctrl.u32InterceptCtrl1
@@ -556,6 +562,37 @@ typedef union
 #define SVM_IOIO_READ                   1
 /** @}*/
 
+/** 8-bit IO transfer. */
+#define SVM_IOIO_8_BIT_OP               RT_BIT_32(4)
+/** 16-bit IO transfer. */
+#define SVM_IOIO_16_BIT_OP              RT_BIT_32(5)
+/** 32-bit IO transfer. */
+#define SVM_IOIO_32_BIT_OP              RT_BIT_32(6)
+/** Mask of all possible IO transfer sizes. */
+#define SVM_IOIO_OP_SIZE_MASK           (SVM_IOIO_8_BIT_OP | SVM_IOIO_16_BIT_OP | SVM_IOIO_32_BIT_OP)
+/** 16-bit address for the IO buffer. */
+#define SVM_IOIO_16_BIT_ADDR            RT_BIT_32(7)
+/** 32-bit address for the IO buffer. */
+#define SVM_IOIO_32_BIT_ADDR            RT_BIT_32(8)
+/** 64-bit address for the IO buffer. */
+#define SVM_IOIO_64_BIT_ADDR            RT_BIT_32(9)
+/** Mask of all the IO address sizes. */
+#define SVM_IOIO_ADDR_SIZE_MASK         (SVM_IOIO_16_BIT_ADDR | SVM_IOIO_32_BIT_ADDR | SVM_IOIO_64_BIT_ADDR)
+/** Number of bits to left shift to get the IO port number. */
+#define SVM_IOIO_PORT_SHIFT             16
+/** IO write. */
+#define SVM_IOIO_WRITE                  0
+/** IO read. */
+#define SVM_IOIO_READ                   1
+/**
+ * SVM IOIO transfer type.
+ */
+typedef enum
+{
+    SVMIOIOTYPE_OUT = SVM_IOIO_WRITE,
+    SVMIOIOTYPE_IN  = SVM_IOIO_READ
+} SVMIOIOTYPE;
+
 /**
  * SVM nested paging structure.
  */
@@ -774,6 +811,8 @@ typedef struct SVMVMCB
 } SVMVMCB;
 /** Pointer to the SVMVMCB structure. */
 typedef SVMVMCB *PSVMVMCB;
+/** Pointer to a const SVMVMCB structure. */
+typedef const SVMVMCB *PCSVMVMCB;
 AssertCompileMemberOffset(SVMVMCB, ctrl, 0x00);
 AssertCompileMemberOffset(SVMVMCB, ctrl.u16InterceptRdCRx, 0x00);
 AssertCompileMemberOffset(SVMVMCB, ctrl.u16InterceptWrCRx, 0x02);
@@ -860,6 +899,63 @@ AssertCompileSize(SVMVMCB, 0x1000);
 VMMR0DECL(int) SVMR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt);
 #endif /* IN_RING0 */
 
+/** 
+ * Segment attribute conversion between CPU and AMD-V VMCB format. 
+ *
+ * The CPU format of the segment attribute is described in X86DESCATTRBITS
+ * which is 16-bits (i.e. includes 4 bits of the segment limit).
+ *
+ * The AMD-V VMCB format the segment attribute is compact 12-bits (strictly
+ * only the attribute bits and nothing else). Upper 4-bits are unused.
+ */
+#define HMSVM_CPU_2_VMCB_SEG_ATTR(a)       ( ((a) & 0xff) | (((a) & 0xf000) >> 4) )
+#define HMSVM_VMCB_2_CPU_SEG_ATTR(a)       ( ((a) & 0xff) | (((a) & 0x0f00) << 4) )
+
+/** @def HMSVM_SEG_REG_COPY_TO_VMCB
+ * Copies the specified segment register to a VMCB from a virtual CPU context.
+ *  
+ * @param   a_pCtx      The virtual-CPU context. 
+ * @param   a_pVmcb     The VMCB. 
+ * @param   REG         The segment register in the VMCB struct. (CS, DS, FS 
+ *                      etc.)
+ * @param   reg         The segment register in the virtual CPU struct (cs, ds, 
+ *                      fs etc.)
+ */
+#define HMSVM_SEG_REG_COPY_TO_VMCB(a_pCtx, a_pVmcb, REG, reg) \
+    do \
+    { \
+        Assert((a_pCtx)->reg.fFlags & CPUMSELREG_FLAGS_VALID);  \
+        Assert((a_pCtx)->reg.ValidSel == (a_pCtx)->reg.Sel);    \
+        (a_pVmcb)->guest.REG.u16Sel    = (a_pCtx)->reg.Sel;      \
+        (a_pVmcb)->guest.REG.u32Limit  = (a_pCtx)->reg.u32Limit; \
+        (a_pVmcb)->guest.REG.u64Base   = (a_pCtx)->reg.u64Base;  \
+        (a_pVmcb)->guest.REG.u16Attr   = HMSVM_CPU_2_VMCB_SEG_ATTR((a_pCtx)->reg.Attr.u); \
+    } while (0)
+
+/** @def HMSVM_SEG_REG_COPY_TO_VMCB
+ * Copies the specified segment register from the VMCB to a virtual CPU 
+ * context. 
+ *  
+ * @param   a_pCtx      The virtual-CPU context. 
+ * @param   a_pVmcb     The VMCB. 
+ * @param   REG         The segment register in the VMCB struct. (CS, DS, FS 
+ *                      etc.)
+ * @param   reg         The segment register in the virtual CPU struct (cs, ds, 
+ *                      fs etc.)
+ */
+#define HMSVM_SEG_REG_COPY_FROM_VMCB(a_pCtx, a_pVmcb, REG, reg) \
+    do \
+    { \
+        (a_pCtx)->reg.Sel       = (a_pVmcb)->guest.REG.u16Sel;   \
+        (a_pCtx)->reg.ValidSel  = (a_pVmcb)->guest.REG.u16Sel;   \
+        (a_pCtx)->reg.fFlags    = CPUMSELREG_FLAGS_VALID;    \
+        (a_pCtx)->reg.u32Limit  = (a_pVmcb)->guest.REG.u32Limit; \
+        (a_pCtx)->reg.u64Base   = (a_pVmcb)->guest.REG.u64Base;  \
+        (a_pCtx)->reg.Attr.u    = HMSVM_VMCB_2_CPU_SEG_ATTR((a_pVmcb)->guest.REG.u16Attr); \
+    } while (0)
+/** @} */
+
+
 /** @} */
 
 #endif
diff --git a/include/VBox/vmm/iem.h b/include/VBox/vmm/iem.h
index edc76f8..72e24c4 100644
--- a/include/VBox/vmm/iem.h
+++ b/include/VBox/vmm/iem.h
@@ -39,16 +39,33 @@ RT_C_DECLS_BEGIN
  */
 
 
-/**
- * Operand or addressing mode.
- */
-typedef enum IEMMODE
-{
-    IEMMODE_16BIT = 0,
-    IEMMODE_32BIT,
-    IEMMODE_64BIT
-} IEMMODE;
-AssertCompileSize(IEMMODE, 4);
+/** @name Operand or addressing mode.
+ * @{ */
+typedef uint8_t IEMMODE;
+#define IEMMODE_16BIT 0
+#define IEMMODE_32BIT 1
+#define IEMMODE_64BIT 2
+/** @} */
+
+
+/** @name IEM_XCPT_FLAGS_XXX - flags for iemRaiseXcptOrInt.
+ * @{ */
+/** CPU exception. */
+#define IEM_XCPT_FLAGS_T_CPU_XCPT       RT_BIT_32(0)
+/** External interrupt (from PIC, APIC, whatever). */
+#define IEM_XCPT_FLAGS_T_EXT_INT        RT_BIT_32(1)
+/** Software interrupt (int or into, not bound).
+ * Returns to the following instruction */
+#define IEM_XCPT_FLAGS_T_SOFT_INT       RT_BIT_32(2)
+/** Takes an error code. */
+#define IEM_XCPT_FLAGS_ERR              RT_BIT_32(3)
+/** Takes a CR2. */
+#define IEM_XCPT_FLAGS_CR2              RT_BIT_32(4)
+/** Generated by the breakpoint instruction. */
+#define IEM_XCPT_FLAGS_BP_INSTR         RT_BIT_32(5)
+/** Generated by a DRx instruction breakpoint and RF should be cleared. */
+#define IEM_XCPT_FLAGS_DRx_INSTR_BP     RT_BIT_32(6)
+/** @}  */
 
 
 /** @name IEMTARGETCPU_XXX - IEM target CPU specification.
@@ -119,7 +136,8 @@ VMM_INT_DECL(int)           IEMBreakpointClear(PVM pVM, RTGCPTR GCPtrBp);
 VMM_INT_DECL(void)          IEMTlbInvalidateAll(PVMCPU pVCpu, bool fVmm);
 VMM_INT_DECL(void)          IEMTlbInvalidatePage(PVMCPU pVCpu, RTGCPTR GCPtr);
 VMM_INT_DECL(void)          IEMTlbInvalidateAllPhysical(PVMCPU pVCpu);
-
+VMM_INT_DECL(bool)          IEMGetCurrentXcpt(PVMCPU pVCpu, uint8_t *puVector, uint32_t *pfFlags, uint32_t *puErr,
+                                              uint64_t *puCr2);
 
 /** @name Given Instruction Interpreters
  * @{ */
@@ -134,6 +152,13 @@ VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedMovCRxRead(PVMCPU pVCpu, uint8_t cbIns
 VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedClts(PVMCPU pVCpu, uint8_t cbInstr);
 VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedLmsw(PVMCPU pVCpu, uint8_t cbInstr, uint16_t uValue);
 VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedXsetbv(PVMCPU pVCpu, uint8_t cbInstr);
+#ifdef VBOX_WITH_NESTED_HWVIRT
+VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedClgi(PVMCPU pVCpu, uint8_t cbInstr);
+VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedStgi(PVMCPU pVCpu, uint8_t cbInstr);
+VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedVmload(PVMCPU pVCpu, uint8_t cbInstr);
+VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedVmsave(PVMCPU pVCpu, uint8_t cbInstr);
+VMM_INT_DECL(VBOXSTRICTRC)  IEMExecDecodedInvlpga(PVMCPU pVCpu, uint8_t cbInstr);
+#endif
 /** @}  */
 
 #if defined(IEM_VERIFICATION_MODE) && defined(IN_RING3)
diff --git a/include/VBox/vmm/mm.h b/include/VBox/vmm/mm.h
index 4a638ff..f642466 100644
--- a/include/VBox/vmm/mm.h
+++ b/include/VBox/vmm/mm.h
@@ -78,6 +78,8 @@ typedef enum MMTAG
 
     MM_TAG_EM,
 
+    MM_TAG_IEM,
+
     MM_TAG_IOM,
     MM_TAG_IOM_STATS,
 
diff --git a/include/iprt/asm-amd64-x86.h b/include/iprt/asm-amd64-x86.h
index 4f9d647..452a3e3 100644
--- a/include/iprt/asm-amd64-x86.h
+++ b/include/iprt/asm-amd64-x86.h
@@ -1959,6 +1959,20 @@ DECLASM(void) ASMXSave(struct X86XSAVEAREA *pXStateArea, uint64_t fComponents);
 DECLASM(void) ASMXRstor(struct X86XSAVEAREA const *pXStateArea, uint64_t fComponents);
 
 
+struct X86FXSTATE;
+/**
+ * Save FPU and SSE CPU state.
+ * @param   pXStateArea     Where to save the state.
+ */
+DECLASM(void) ASMFxSave(struct X86FXSTATE *pXStateArea);
+
+/**
+ * Load FPU and SSE CPU state.
+ * @param   pXStateArea     Where to load the state from.
+ */
+DECLASM(void) ASMFxRstor(struct X86FXSTATE const *pXStateArea);
+
+
 /**
  * Enables interrupts (EFLAGS.IF).
  */
diff --git a/include/iprt/cdefs.h b/include/iprt/cdefs.h
index b56c167..f901144 100644
--- a/include/iprt/cdefs.h
+++ b/include/iprt/cdefs.h
@@ -1768,6 +1768,54 @@
 /** RT_CONCAT6 helper, don't use.  */
 #define RT_CONCAT6_HLP(a,b,c,d,e,f)   a##b##c##d##e##f
 
+/** @def RT_CONCAT7
+ * Concatenate the expanded arguments without any extra spaces in between.
+ *
+ * @param   a       The 1st part.
+ * @param   b       The 2nd part.
+ * @param   c       The 3rd part.
+ * @param   d       The 4th part.
+ * @param   e       The 5th part.
+ * @param   f       The 6th part.
+ * @param   g       The 7th part.
+ */
+#define RT_CONCAT7(a,b,c,d,e,f,g)       RT_CONCAT7_HLP(a,b,c,d,e,f,g)
+/** RT_CONCAT7 helper, don't use.  */
+#define RT_CONCAT7_HLP(a,b,c,d,e,f,g)   a##b##c##d##e##f##g
+
+/** @def RT_CONCAT8
+ * Concatenate the expanded arguments without any extra spaces in between.
+ *
+ * @param   a       The 1st part.
+ * @param   b       The 2nd part.
+ * @param   c       The 3rd part.
+ * @param   d       The 4th part.
+ * @param   e       The 5th part.
+ * @param   f       The 6th part.
+ * @param   g       The 7th part.
+ * @param   h       The 8th part.
+ */
+#define RT_CONCAT8(a,b,c,d,e,f,g,h)     RT_CONCAT8_HLP(a,b,c,d,e,f,g,h)
+/** RT_CONCAT8 helper, don't use.  */
+#define RT_CONCAT8_HLP(a,b,c,d,e,f,g,h) a##b##c##d##e##f##g##h
+
+/** @def RT_CONCAT9
+ * Concatenate the expanded arguments without any extra spaces in between.
+ *
+ * @param   a       The 1st part.
+ * @param   b       The 2nd part.
+ * @param   c       The 3rd part.
+ * @param   d       The 4th part.
+ * @param   e       The 5th part.
+ * @param   f       The 6th part.
+ * @param   g       The 7th part.
+ * @param   h       The 8th part.
+ * @param   i       The 9th part.
+ */
+#define RT_CONCAT9(a,b,c,d,e,f,g,h,i)   RT_CONCAT9_HLP(a,b,c,d,e,f,g,h,i)
+/** RT_CONCAT9 helper, don't use.  */
+#define RT_CONCAT9_HLP(a,b,c,d,e,f,g,h,i) a##b##c##d##e##f##g##h##i
+
 /**
  * String constant tuple - string constant, strlen(string constant).
  *
@@ -2235,6 +2283,89 @@
  */
 #define RT_ELEMENTS(aArray)                     ( sizeof(aArray) / sizeof((aArray)[0]) )
 
+/** @def RT_FLEXIBLE_ARRAY
+ * What to up inside the square brackets when declaring a structure member
+ * with a flexible size.
+ *
+ * @note    Use RT_UOFFSETOF() to calculate the structure size.
+ *
+ * @note    Never to a sizeof() on the structure or member!
+ *
+ * @note    The member must be the last one.
+ *
+ * @note    GCC does not permit using this in a union.  So, for unions you must
+ *          use RT_FLEXIBLE_ARRAY_IN_UNION instead.
+ *
+ * @note    GCC does not permit using this in nested structures, where as MSC
+ *          does.  So, use RT_FLEXIBLE_ARRAY_NESTED for that.
+ *
+ * @sa      RT_FLEXIBLE_ARRAY_NESTED, RT_FLEXIBLE_ARRAY_IN_UNION
+ */
+#if RT_MSC_PREREQ(RT_MSC_VER_VS2005) /** @todo Probably much much earlier. */ \
+ || (defined(__cplusplus) && RT_GNUC_PREREQ(6, 1)) \
+ || defined(__WATCOMC__) /* openwatcom 1.9 supports it, we don't care about older atm. */
+# define RT_FLEXIBLE_ARRAY
+# if defined(__cplusplus) && defined(_MSC_VER)
+#  pragma warning(disable:4200) /* -wd4200 does not work with VS2010 */
+# endif
+#elif defined(__STDC_VERSION__)
+# if __STDC_VERSION__ >= 1999901L
+#  define RT_FLEXIBLE_ARRAY
+# else
+#  define RT_FLEXIBLE_ARRAY                     1
+# endif
+#else
+# define RT_FLEXIBLE_ARRAY                      1
+#endif
+
+/** @def RT_FLEXIBLE_ARRAY_NESTED
+ * Variant of RT_FLEXIBLE_ARRAY for use in structures that are nested.
+ *
+ * GCC only allow the use of flexible array member in the top structure, whereas
+ * MSC is less strict and let you do struct { struct { char szName[]; } s; };
+ *
+ * @note    See notes for RT_FLEXIBLE_ARRAY.
+ *
+ * @note    GCC does not permit using this in a union.  So, for unions you must
+ *          use RT_FLEXIBLE_ARRAY_IN_NESTED_UNION instead.
+ *
+ * @sa      RT_FLEXIBLE_ARRAY, RT_FLEXIBLE_ARRAY_IN_NESTED_UNION
+ */
+#ifdef _MSC_VER
+# define RT_FLEXIBLE_ARRAY_NESTED               RT_FLEXIBLE_ARRAY
+#else
+# define RT_FLEXIBLE_ARRAY_NESTED               1
+#endif
+
+/** @def RT_FLEXIBLE_ARRAY_IN_UNION
+ * The union version of RT_FLEXIBLE_ARRAY.
+ *
+ * @remarks GCC does not support flexible array members in unions, 6.1.x
+ *          actively checks for this.  Visual C++ 2010 seems happy with it.
+ *
+ * @note    See notes for RT_FLEXIBLE_ARRAY.
+ *
+ * @sa      RT_FLEXIBLE_ARRAY, RT_FLEXIBLE_ARRAY_IN_NESTED_UNION
+ */
+#ifdef _MSC_VER
+# define RT_FLEXIBLE_ARRAY_IN_UNION             RT_FLEXIBLE_ARRAY
+#else
+# define RT_FLEXIBLE_ARRAY_IN_UNION             1
+#endif
+
+/** @def RT_FLEXIBLE_ARRAY_IN_NESTED_UNION
+ * The union version of RT_FLEXIBLE_ARRAY_NESTED.
+ *
+ * @note    See notes for RT_FLEXIBLE_ARRAY.
+ *
+ * @sa      RT_FLEXIBLE_ARRAY, RT_FLEXIBLE_ARRAY_IN_NESTED_UNION
+ */
+#ifdef _MSC_VER
+# define RT_FLEXIBLE_ARRAY_IN_NESTED_UNION      RT_FLEXIBLE_ARRAY_NESTED
+#else
+# define RT_FLEXIBLE_ARRAY_IN_NESTED_UNION      1
+#endif
+
 /**
  * Checks if the value is a power of two.
  *
@@ -2830,7 +2961,7 @@
 /** @def RT_NOREF9
  * RT_NOREF_PV shorthand taking nine parameters.  */
 #define RT_NOREF9(var1, var2, var3, var4, var5, var6, var7, var8, var9) \
-    RT_NOREF_PV(var1); RT_NOREF8(var2, var3, var4, var5, var6, var7, var8)
+    RT_NOREF_PV(var1); RT_NOREF8(var2, var3, var4, var5, var6, var7, var8, var9)
 /** @def RT_NOREF10
  * RT_NOREF_PV shorthand taking ten parameters.  */
 #define RT_NOREF10(var1, var2, var3, var4, var5, var6, var7, var8, var9, var10) \
diff --git a/include/iprt/formats/omf.h b/include/iprt/formats/omf.h
index 2654fd7..f59f5ef 100644
--- a/include/iprt/formats/omf.h
+++ b/include/iprt/formats/omf.h
@@ -172,6 +172,34 @@ typedef OMFRECHDR *PCOMFRECHDR;
 #define OMF_CCLS_BORLAND_DEP_FILES  UINT8_C(0xe9)
 /** @} */
 
+/** @name OMF SEGDEF Attrib.
+ * @{ */
+#define OMF_SEG_ATTR_ALIGN_ABS       (UINT8_C(0) << 5)  /**< SEGDEF attrib A: absolute - frame and offset fields present. */
+#define OMF_SEG_ATTR_ALIGN_BYTE      (UINT8_C(1) << 5)  /**< SEGDEF attrib A: 1-byte alignment. */
+#define OMF_SEG_ATTR_ALIGN_WORD      (UINT8_C(2) << 5)  /**< SEGDEF attrib A: 2-byte alignment. */
+#define OMF_SEG_ATTR_ALIGN_PARA      (UINT8_C(3) << 5)  /**< SEGDEF attrib A: 16-byte alignment. */
+#define OMF_SEG_ATTR_ALIGN_PAGE      (UINT8_C(4) << 5)  /**< SEGDEF attrib A: 4096-byte alignment (or 256-byte). */
+#define OMF_SEG_ATTR_ALIGN_DWORD     (UINT8_C(5) << 5)  /**< SEGDEF attrib A: 4-byte alignment. */
+#define OMF_SEG_ATTR_ALIGN_6         (UINT8_C(6) << 5)  /**< SEGDEF attrib A: not supported (load-time locatable, paragraph aligned). */
+#define OMF_SEG_ATTR_ALIGN_7         (UINT8_C(7) << 5)  /**< SEGDEF attrib A: undefined. */
+#define OMF_SEG_ATTR_ALIGN_MASK      (UINT8_C(7) << 5)  /**< SEGDEF attrib A: Mask for the alignment field. */
+#define OMF_SEG_ATTR_ALIGN_SHIFT     5                  /**< SEGDEF attrib A: Shift count for the alignment field. */
+
+#define OMF_SEG_ATTR_COMB_PRIVATE    (UINT8_C(0) << 2)  /**< SEGDEF attrib C: Private - do not combine with anyone. */
+#define OMF_SEG_ATTR_COMB_1          (UINT8_C(1) << 2)  /**< SEGDEF attrib C: Reserved */
+#define OMF_SEG_ATTR_COMB_PUBLIC     (UINT8_C(2) << 2)  /**< SEGDEF attrib C: Public - append at offset meeting alignment. */
+#define OMF_SEG_ATTR_COMB_3          (UINT8_C(3) << 2)  /**< SEGDEF attrib C: Reserved */
+#define OMF_SEG_ATTR_COMB_PUBLIC_4   (UINT8_C(4) << 2)  /**< SEGDEF attrib C: Public - append at offset meeting alignment. */
+#define OMF_SEG_ATTR_COMB_STACK      (UINT8_C(5) << 2)  /**< SEGDEF attrib C: Stack - same as public, but forced byte alignment. */
+#define OMF_SEG_ATTR_COMB_COMMON     (UINT8_C(6) << 2)  /**< SEGDEF attrib C: Common - overlay using maximum size. */
+#define OMF_SEG_ATTR_COMB_PUBLIC_7   (UINT8_C(5) << 2)  /**< SEGDEF attrib C: Public - append at offset meeting alignment. */
+#define OMF_SEG_ATTR_COMB_MASK       (UINT8_C(7) << 2)  /**< SEGDEF attrib C: Mask for the combination field. */
+#define OMF_SEG_ATTR_COMB_SHIFT      2                  /**< SEGDEF attrib C: Shift count for the combination field. */
+#define OMF_SEG_ATTR_BIG             UINT8_C(2)         /**< SEGDEF attrib B: Big segment 64K / 4GB. */
+#define OMF_SEG_ATTR_USE32           UINT8_C(1)         /**< SEGDEF attrib P: Indicates 32-bit data or code. */
+#define OMF_SEG_ATTR_USE16           UINT8_C(0)         /**< SEGDEF attrib ~P: Just for spelling out !USE32. */
+/** @} */
+
 
 /** @name OMF FIXUPP Locations.
  * @{ */
diff --git a/include/iprt/mangling.h b/include/iprt/mangling.h
index aea9632..c1daa8f 100644
--- a/include/iprt/mangling.h
+++ b/include/iprt/mangling.h
@@ -356,6 +356,10 @@
 # define ASMXRstor_EndProc                              RT_MANGLER(ASMXRstor_EndProc)
 # define ASMXSave                                       RT_MANGLER(ASMXSave)
 # define ASMXSave_EndProc                               RT_MANGLER(ASMXSave_EndProc)
+# define ASMFxRstor                                     RT_MANGLER(ASMFxRstor)
+# define ASMFxRstor_EndProc                             RT_MANGLER(ASMFxRstor_EndProc)
+# define ASMFxSave                                      RT_MANGLER(ASMFxSave)
+# define ASMFxSave_EndProc                              RT_MANGLER(ASMFxSave_EndProc)
 
 # define RTAssertAreQuiet                               RT_MANGLER(RTAssertAreQuiet)
 # define RTAssertMayPanic                               RT_MANGLER(RTAssertMayPanic)
diff --git a/include/iprt/x86.h b/include/iprt/x86.h
index 785717e..064be0b 100644
--- a/include/iprt/x86.h
+++ b/include/iprt/x86.h
@@ -741,6 +741,37 @@ typedef const X86CPUIDFEATEDX *PCX86CPUIDFEATEDX;
 /** @} */
 
 
+/** @name CPUID AMD SVM Feature information.
+ * CPUID query with EAX=0x8000000a.
+ * @{
+ */
+/** Bit 0 - NP - Nested Paging supported. */
+#define X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING             RT_BIT(0)
+/** Bit 1 - LbrVirt - Support for saving five debug MSRs. */
+#define X86_CPUID_SVM_FEATURE_EDX_LBR_VIRT                  RT_BIT(1)
+/** Bit 2 - SVML - SVM locking bit supported. */
+#define X86_CPUID_SVM_FEATURE_EDX_SVM_LOCK                  RT_BIT(2)
+/** Bit 3 - NRIPS - Saving the next instruction pointer is supported. */
+#define X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE                 RT_BIT(3)
+/** Bit 4 - TscRateMsr - Support for MSR TSC ratio. */
+#define X86_CPUID_SVM_FEATURE_EDX_TSC_RATE_MSR              RT_BIT(4)
+/** Bit 5 - VmcbClean - Support VMCB clean bits. */
+#define X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN                RT_BIT(5)
+/** Bit 6 - FlushByAsid - Indicate TLB flushing for current ASID only, and that
+ *  VMCB.TLB_Control is supported. */
+#define X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID             RT_BIT(6)
+/** Bit 7 - DecodeAssist - Indicate decode assist is supported. */
+#define X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSIST             RT_BIT(7)
+/** Bit 10 - PauseFilter - Indicates support for the PAUSE intercept filter. */
+#define X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER              RT_BIT(10)
+/** Bit 12 - PauseFilterThreshold - Indicates support for the PAUSE
+ *  intercept filter cycle count threshold. */
+#define X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD    RT_BIT(12)
+/** Bit 13 - AVIC - Advanced Virtual Interrupt Controller. */
+#define X86_CPUID_SVM_FEATURE_EDX_AVIC                      RT_BIT(13)
+/** @} */
+
+
 /** @name CR0
  * @remarks The 286 (MSW), 386 and 486 ignores attempts at setting
  *          reserved flags.
@@ -1449,7 +1480,21 @@ AssertCompile(X86_DR7_ANY_RW_IO(UINT32_C(0x00040000)) == 0);
 /** Hypertransport interrupt pending register.
  * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" */
 #define MSR_K8_INT_PENDING                  UINT32_C(0xc0010055)
+
+/** SVM Control. */
 #define MSR_K8_VM_CR                        UINT32_C(0xc0010114)
+/** Disables HDT (Hardware Debug Tool) and certain internal debug
+ *  features. */
+#define MSR_K8_VM_CR_DPD                    RT_BIT_32(0)
+/** If set, non-intercepted INIT signals are converted to \#SX
+ *  exceptions. */
+#define MSR_K8_VM_CR_R_INIT                 RT_BIT_32(1)
+/** Disables A20 masking.  */
+#define MSR_K8_VM_CR_DIS_A20M               RT_BIT_32(2)
+/** Lock bit for this MSR controlling bits 3 (LOCK) and 4 (SVMDIS). */
+#define MSR_K8_VM_CR_LOCK                   RT_BIT_32(3)
+/** SVM disable. When set, writes to EFER.SVME are treated as MBZ. When
+ *  clear, EFER.SVME can be written normally. */
 #define MSR_K8_VM_CR_SVM_DISABLE            RT_BIT_32(4)
 
 #define MSR_K8_IGNNE                        UINT32_C(0xc0010115)
@@ -2540,7 +2585,7 @@ typedef X86FPUREG const *PCX86FPUREG;
  */
 typedef union X86XMMREG
 {
-    /** XMM Register view *. */
+    /** XMM Register view. */
     uint128_t   xmm;
     /** 8-bit view. */
     uint8_t     au8[16];
@@ -2552,6 +2597,8 @@ typedef union X86XMMREG
     uint64_t    au64[2];
     /** 128-bit view. (yeah, very helpful) */
     uint128_t   au128[1];
+    /** Confusing nested 128-bit union view (this is what xmm should've been). */
+    RTUINT128U  uXmm;
 } X86XMMREG;
 #ifndef VBOX_FOR_DTRACE_LIB
 AssertCompileSize(X86XMMREG, 16);
@@ -2802,50 +2849,50 @@ AssertCompileMemberOffset(X86FXSTATE, au32RsrvdForSoftware, X86_OFF_FXSTATE_RSVD
 /** @name SSE MXCSR
  * @{ */
 /** Exception Flag: Invalid operation.  */
-#define X86_MXSCR_IE          RT_BIT_32(0)
+#define X86_MXCSR_IE          RT_BIT_32(0)
 /** Exception Flag: Denormalized operand.  */
-#define X86_MXSCR_DE          RT_BIT_32(1)
+#define X86_MXCSR_DE          RT_BIT_32(1)
 /** Exception Flag: Zero divide.  */
-#define X86_MXSCR_ZE          RT_BIT_32(2)
+#define X86_MXCSR_ZE          RT_BIT_32(2)
 /** Exception Flag: Overflow.  */
-#define X86_MXSCR_OE          RT_BIT_32(3)
+#define X86_MXCSR_OE          RT_BIT_32(3)
 /** Exception Flag: Underflow.  */
-#define X86_MXSCR_UE          RT_BIT_32(4)
+#define X86_MXCSR_UE          RT_BIT_32(4)
 /** Exception Flag: Precision.  */
-#define X86_MXSCR_PE          RT_BIT_32(5)
+#define X86_MXCSR_PE          RT_BIT_32(5)
 
 /** Denormals are zero. */
-#define X86_MXSCR_DAZ         RT_BIT_32(6)
+#define X86_MXCSR_DAZ         RT_BIT_32(6)
 
 /** Exception Mask: Invalid operation. */
-#define X86_MXSCR_IM          RT_BIT_32(7)
+#define X86_MXCSR_IM          RT_BIT_32(7)
 /** Exception Mask: Denormalized operand. */
-#define X86_MXSCR_DM          RT_BIT_32(8)
+#define X86_MXCSR_DM          RT_BIT_32(8)
 /** Exception Mask: Zero divide.  */
-#define X86_MXSCR_ZM          RT_BIT_32(9)
+#define X86_MXCSR_ZM          RT_BIT_32(9)
 /** Exception Mask: Overflow.  */
-#define X86_MXSCR_OM          RT_BIT_32(10)
+#define X86_MXCSR_OM          RT_BIT_32(10)
 /** Exception Mask: Underflow.  */
-#define X86_MXSCR_UM          RT_BIT_32(11)
+#define X86_MXCSR_UM          RT_BIT_32(11)
 /** Exception Mask: Precision.  */
-#define X86_MXSCR_PM          RT_BIT_32(12)
+#define X86_MXCSR_PM          RT_BIT_32(12)
 
 /** Rounding control mask. */
-#define X86_MXSCR_RC_MASK     UINT16_C(0x6000)
+#define X86_MXCSR_RC_MASK     UINT16_C(0x6000)
 /** Rounding control: To nearest. */
-#define X86_MXSCR_RC_NEAREST  UINT16_C(0x0000)
+#define X86_MXCSR_RC_NEAREST  UINT16_C(0x0000)
 /** Rounding control: Down. */
-#define X86_MXSCR_RC_DOWN     UINT16_C(0x2000)
+#define X86_MXCSR_RC_DOWN     UINT16_C(0x2000)
 /** Rounding control: Up. */
-#define X86_MXSCR_RC_UP       UINT16_C(0x4000)
+#define X86_MXCSR_RC_UP       UINT16_C(0x4000)
 /** Rounding control: Towards zero. */
-#define X86_MXSCR_RC_ZERO     UINT16_C(0x6000)
+#define X86_MXCSR_RC_ZERO     UINT16_C(0x6000)
 
 /** Flush-to-zero for masked underflow.  */
-#define X86_MXSCR_FZ          RT_BIT_32(15)
+#define X86_MXCSR_FZ          RT_BIT_32(15)
 
 /** Misaligned Exception Mask (AMD MISALIGNSSE).  */
-#define X86_MXSCR_MM          RT_BIT_32(17)
+#define X86_MXCSR_MM          RT_BIT_32(17)
 /** @} */
 
 /**
@@ -3049,7 +3096,7 @@ typedef X86XSAVEAREA *PX86XSAVEAREA;
 typedef X86XSAVEAREA const *PCX86XSAVEAREA;
 
 
-/** @name XSAVE_C_XXX - XSAVE State Components Bits.
+/** @name XSAVE_C_XXX - XSAVE State Components Bits (XCR0).
  * @{ */
 /** Bit 0 - x87 - Legacy FPU state (bit number) */
 #define XSAVE_C_X87_BIT         0
@@ -3091,6 +3138,10 @@ typedef X86XSAVEAREA const *PCX86XSAVEAREA;
 #define XSAVE_C_LWP_BIT         62
 /** Bit 62 - LWP - Lightweight Profiling (AMD). */
 #define XSAVE_C_LWP             RT_BIT_64(XSAVE_C_LWP_BIT)
+/** Bit 63 - X - Reserved (MBZ) for extending XCR0 (bit number). */
+#define XSAVE_C_X_BIT           63
+/** Bit 63 - X - Reserved (MBZ) for extending XCR0 (AMD). */
+#define XSAVE_C_X               RT_BIT_64(XSAVE_C_X_BIT)
 /** @} */
 
 
@@ -3970,14 +4021,14 @@ typedef enum X86XCPT
     /** \#VE - Virtualization Exception. */
     X86_XCPT_VE = 0x14,
     /** \#SX - Security Exception. */
-    X86_XCPT_SX = 0x1f
+    X86_XCPT_SX = 0x1e
 } X86XCPT;
 /** Pointer to a x86 exception code. */
 typedef X86XCPT *PX86XCPT;
 /** Pointer to a const x86 exception code. */
 typedef const X86XCPT *PCX86XCPT;
-/** The maximum exception value. */
-#define X86_XCPT_MAX                (X86_XCPT_SX)
+/** The last valid (currently reserved) exception value. */
+#define X86_XCPT_LAST               0x1f
 
 
 /** @name Trap Error Codes
@@ -4074,6 +4125,11 @@ typedef struct X86XDTR64
 AssertCompile((X86_MODRM_RM_MASK | X86_MODRM_REG_MASK | X86_MODRM_MOD_MASK) == 0xff);
 AssertCompile((X86_MODRM_REG_MASK >> X86_MODRM_REG_SHIFT) == X86_MODRM_REG_SMASK);
 AssertCompile((X86_MODRM_MOD_MASK >> X86_MODRM_MOD_SHIFT) == X86_MODRM_MOD_SMASK);
+/** @def X86_MODRM_MAKE
+ * @param   a_Mod       The mod value (0..3).
+ * @param   a_Reg       The register value (0..7).
+ * @param   a_RegMem    The register or memory value (0..7). */
+# define X86_MODRM_MAKE(a_Mod, a_Reg, a_RegMem) (((a_Mod) << X86_MODRM_MOD_SHIFT) | ((a_Reg) << X86_MODRM_REG_SHIFT) | (a_RegMem))
 #endif
 /** @} */
 
@@ -4137,8 +4193,8 @@ AssertCompile((X86_SIB_SCALE_MASK >> X86_SIB_SCALE_SHIFT) == X86_SIB_SCALE_SMASK
 #define X86_OP_PRF_SIZE_OP      UINT8_C(0x66)
 #define X86_OP_PRF_SIZE_ADDR    UINT8_C(0x67)
 #define X86_OP_PRF_LOCK         UINT8_C(0xf0)
-#define X86_OP_PRF_REPZ         UINT8_C(0xf2)
-#define X86_OP_PRF_REPNZ        UINT8_C(0xf3)
+#define X86_OP_PRF_REPZ         UINT8_C(0xf3)
+#define X86_OP_PRF_REPNZ        UINT8_C(0xf2)
 #define X86_OP_REX_B            UINT8_C(0x41)
 #define X86_OP_REX_X            UINT8_C(0x42)
 #define X86_OP_REX_R            UINT8_C(0x44)
diff --git a/include/iprt/x86.mac b/include/iprt/x86.mac
index 6b98030..752ecfb 100644
--- a/include/iprt/x86.mac
+++ b/include/iprt/x86.mac
@@ -208,6 +208,17 @@
 %define X86_CPUID_AMD_ADVPOWER_EDX_EFRO      RT_BIT_32(10)
 %define X86_CPUID_AMD_ADVPOWER_EDX_PFI       RT_BIT_32(11)
 %define X86_CPUID_AMD_ADVPOWER_EDX_PA        RT_BIT_32(12)
+%define X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING             RT_BIT(0)
+%define X86_CPUID_SVM_FEATURE_EDX_LBR_VIRT                  RT_BIT(1)
+%define X86_CPUID_SVM_FEATURE_EDX_SVM_LOCK                  RT_BIT(2)
+%define X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE                 RT_BIT(3)
+%define X86_CPUID_SVM_FEATURE_EDX_TSC_RATE_MSR              RT_BIT(4)
+%define X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN                RT_BIT(5)
+%define X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID             RT_BIT(6)
+%define X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSIST             RT_BIT(7)
+%define X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER              RT_BIT(10)
+%define X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD    RT_BIT(12)
+%define X86_CPUID_SVM_FEATURE_EDX_AVIC                      RT_BIT(13)
 %define X86_CR0_PE                          RT_BIT_32(0)
 %define X86_CR0_PROTECTION_ENABLE           RT_BIT_32(0)
 %define X86_CR0_MP                          RT_BIT_32(1)
@@ -532,6 +543,10 @@
 %define MSR_K8_NB_CFG                       0xc001001f
 %define MSR_K8_INT_PENDING                  0xc0010055
 %define MSR_K8_VM_CR                        0xc0010114
+%define MSR_K8_VM_CR_DPD                    RT_BIT_32(0)
+%define MSR_K8_VM_CR_R_INIT                 RT_BIT_32(1)
+%define MSR_K8_VM_CR_DIS_A20M               RT_BIT_32(2)
+%define MSR_K8_VM_CR_LOCK                   RT_BIT_32(3)
 %define MSR_K8_VM_CR_SVM_DISABLE            RT_BIT_32(4)
 %define MSR_K8_IGNNE                        0xc0010115
 %define MSR_K8_SMM_CTL                      0xc0010116
@@ -762,26 +777,26 @@
 %define X86_FCW_RC_UP       0x0800
 %define X86_FCW_RC_ZERO     0x0c00
 %define X86_FCW_ZERO_MASK   0xf080
-%define X86_MXSCR_IE          RT_BIT_32(0)
-%define X86_MXSCR_DE          RT_BIT_32(1)
-%define X86_MXSCR_ZE          RT_BIT_32(2)
-%define X86_MXSCR_OE          RT_BIT_32(3)
-%define X86_MXSCR_UE          RT_BIT_32(4)
-%define X86_MXSCR_PE          RT_BIT_32(5)
-%define X86_MXSCR_DAZ         RT_BIT_32(6)
-%define X86_MXSCR_IM          RT_BIT_32(7)
-%define X86_MXSCR_DM          RT_BIT_32(8)
-%define X86_MXSCR_ZM          RT_BIT_32(9)
-%define X86_MXSCR_OM          RT_BIT_32(10)
-%define X86_MXSCR_UM          RT_BIT_32(11)
-%define X86_MXSCR_PM          RT_BIT_32(12)
-%define X86_MXSCR_RC_MASK     0x6000
-%define X86_MXSCR_RC_NEAREST  0x0000
-%define X86_MXSCR_RC_DOWN     0x2000
-%define X86_MXSCR_RC_UP       0x4000
-%define X86_MXSCR_RC_ZERO     0x6000
-%define X86_MXSCR_FZ          RT_BIT_32(15)
-%define X86_MXSCR_MM          RT_BIT_32(17)
+%define X86_MXCSR_IE          RT_BIT_32(0)
+%define X86_MXCSR_DE          RT_BIT_32(1)
+%define X86_MXCSR_ZE          RT_BIT_32(2)
+%define X86_MXCSR_OE          RT_BIT_32(3)
+%define X86_MXCSR_UE          RT_BIT_32(4)
+%define X86_MXCSR_PE          RT_BIT_32(5)
+%define X86_MXCSR_DAZ         RT_BIT_32(6)
+%define X86_MXCSR_IM          RT_BIT_32(7)
+%define X86_MXCSR_DM          RT_BIT_32(8)
+%define X86_MXCSR_ZM          RT_BIT_32(9)
+%define X86_MXCSR_OM          RT_BIT_32(10)
+%define X86_MXCSR_UM          RT_BIT_32(11)
+%define X86_MXCSR_PM          RT_BIT_32(12)
+%define X86_MXCSR_RC_MASK     0x6000
+%define X86_MXCSR_RC_NEAREST  0x0000
+%define X86_MXCSR_RC_DOWN     0x2000
+%define X86_MXCSR_RC_UP       0x4000
+%define X86_MXCSR_RC_ZERO     0x6000
+%define X86_MXCSR_FZ          RT_BIT_32(15)
+%define X86_MXCSR_MM          RT_BIT_32(17)
 %ifndef VBOX_FOR_DTRACE_LIB
 %endif
 %ifndef VBOX_FOR_DTRACE_LIB
@@ -820,6 +835,8 @@
 %define XSAVE_C_PKRU            RT_BIT_64(XSAVE_C_PKRU_BIT)
 %define XSAVE_C_LWP_BIT         62
 %define XSAVE_C_LWP             RT_BIT_64(XSAVE_C_LWP_BIT)
+%define XSAVE_C_X_BIT           63
+%define XSAVE_C_X               RT_BIT_64(XSAVE_C_X_BIT)
 %ifndef VBOX_FOR_DTRACE_LIB
 %endif
 %define X86DESCATTR_TYPE            0x0000000f
@@ -946,7 +963,7 @@
 %define X86_SEL_LDT             0x0004
 %define X86_SEL_RPL             0x0003
 %define X86_SEL_RPL_LDT         0x0007
-%define X86_XCPT_MAX                (X86_XCPT_SX)
+%define X86_XCPT_LAST               0x1f
 %define X86_TRAP_ERR_EXTERNAL       1
 %define X86_TRAP_ERR_IDT            2
 %define X86_TRAP_ERR_TI             4
@@ -972,6 +989,7 @@
 %define X86_MODRM_MOD_SMASK     0x03
 %define X86_MODRM_MOD_SHIFT     6
 %ifndef VBOX_FOR_DTRACE_LIB
+ %define X86_MODRM_MAKE(a_Mod, a_Reg, a_RegMem) (((a_Mod) << X86_MODRM_MOD_SHIFT) | ((a_Reg) << X86_MODRM_REG_SHIFT) | (a_RegMem))
 %endif
 %define X86_SIB_BASE_MASK     0x07
 %define X86_SIB_INDEX_MASK    0x38
@@ -1014,8 +1032,8 @@
 %define X86_OP_PRF_SIZE_OP      0x66
 %define X86_OP_PRF_SIZE_ADDR    0x67
 %define X86_OP_PRF_LOCK         0xf0
-%define X86_OP_PRF_REPZ         0xf2
-%define X86_OP_PRF_REPNZ        0xf3
+%define X86_OP_PRF_REPZ         0xf3
+%define X86_OP_PRF_REPNZ        0xf2
 %define X86_OP_REX_B            0x41
 %define X86_OP_REX_X            0x42
 %define X86_OP_REX_R            0x44
diff --git a/src/VBox/Additions/linux/installer/vboxadd.sh b/src/VBox/Additions/linux/installer/vboxadd.sh
index 5402083..a5260cb 100755
--- a/src/VBox/Additions/linux/installer/vboxadd.sh
+++ b/src/VBox/Additions/linux/installer/vboxadd.sh
@@ -1,6 +1,6 @@
 #! /bin/sh
 #
-# Linux Additions kernel module init script ($Revision: 114389 $)
+# Linux Additions kernel module init script ($Revision: 114649 $)
 #
 
 #
@@ -426,7 +426,7 @@ shared_folder_setup()
     # Put the mount.vboxsf mount helper in the right place.
     ## @todo It would be nicer if the kernel module just parsed parameters
     # itself instead of needing a separate binary to do that.
-    ln -sf "${INSTALL_DIR}/other/mount.vboxsf" /sbin
+    ln -sf "$lib_path/$PACKAGE/mount.vboxsf" /sbin
     # SELinux security context for the mount helper.
     if test -e /etc/selinux/config; then
         # This is correct.  semanage maps this to the real path, and it aborts
diff --git a/src/VBox/Debugger/DBGCEmulateCodeView.cpp b/src/VBox/Debugger/DBGCEmulateCodeView.cpp
index 1101829..e3c89f1 100644
--- a/src/VBox/Debugger/DBGCEmulateCodeView.cpp
+++ b/src/VBox/Debugger/DBGCEmulateCodeView.cpp
@@ -1167,7 +1167,7 @@ static DECLCALLBACK(int) dbgcCmdUnassemble(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp,
     unsigned fFlags = DBGF_DISAS_FLAGS_NO_ADDRESS | DBGF_DISAS_FLAGS_UNPATCHED_BYTES | DBGF_DISAS_FLAGS_ANNOTATE_PATCHED;
     switch (pCmd->pszCmd[1])
     {
-        default: AssertFailed();
+        default: AssertFailed(); /* fall thru */
         case '\0':  fFlags |= DBGF_DISAS_FLAGS_DEFAULT_MODE;    break;
         case '6':   fFlags |= DBGF_DISAS_FLAGS_64BIT_MODE;      break;
         case '3':   fFlags |= DBGF_DISAS_FLAGS_32BIT_MODE;      break;
diff --git a/src/VBox/Debugger/VBoxDbgStatsQt4.cpp b/src/VBox/Debugger/VBoxDbgStatsQt4.cpp
index 05f3582..decfea8 100644
--- a/src/VBox/Debugger/VBoxDbgStatsQt4.cpp
+++ b/src/VBox/Debugger/VBoxDbgStatsQt4.cpp
@@ -2223,6 +2223,7 @@ VBoxDbgStatsModel::strValueTimes(PCDBGGUISTATSNODE pNode)
 
         default:
             AssertMsgFailed(("%d\n", pNode->enmType));
+            /* fall thru */
         case STAMTYPE_INVALID:
             return "";
     }
diff --git a/src/VBox/Devices/EFI/FirmwareBin/VBoxEFI32.fd b/src/VBox/Devices/EFI/FirmwareBin/VBoxEFI32.fd
index 22feefc..073d562 100644
Binary files a/src/VBox/Devices/EFI/FirmwareBin/VBoxEFI32.fd and b/src/VBox/Devices/EFI/FirmwareBin/VBoxEFI32.fd differ
diff --git a/src/VBox/Devices/EFI/FirmwareBin/VBoxEFI64.fd b/src/VBox/Devices/EFI/FirmwareBin/VBoxEFI64.fd
index 043062a..065ffde 100644
Binary files a/src/VBox/Devices/EFI/FirmwareBin/VBoxEFI64.fd and b/src/VBox/Devices/EFI/FirmwareBin/VBoxEFI64.fd differ
diff --git a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative286.asm b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative286.asm
index 56a8395..6a8dbd4 100644
--- a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative286.asm
+++ b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative286.asm
@@ -7293,7 +7293,7 @@ vesa_pm_end:                                 ; 0xc4514 LB 0x1
 
 section _DATA progbits vstart=0x4600 align=1 ; size=0x371f class=DATA group=DGROUP
 _msg_vga_init:                               ; 0xc4600 LB 0x2f
-    db  'Oracle VM VirtualBox Version 5.1.20 VGA BIOS', 00dh, 00ah, 000h
+    db  'Oracle VM VirtualBox Version 5.1.22 VGA BIOS', 00dh, 00ah, 000h
 _vga_modes:                                  ; 0xc462f LB 0x80
     db  000h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h, 001h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h
     db  002h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h, 003h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h
@@ -8188,7 +8188,7 @@ _vbebios_vendor_name:                        ; 0xc7c73 LB 0x13
 _vbebios_product_name:                       ; 0xc7c86 LB 0x21
     db  'Oracle VM VirtualBox VBE Adapter', 000h
 _vbebios_product_revision:                   ; 0xc7ca7 LB 0x24
-    db  'Oracle VM VirtualBox Version 5.1.20', 000h
+    db  'Oracle VM VirtualBox Version 5.1.22', 000h
 _vbebios_info_string:                        ; 0xc7ccb LB 0x2b
     db  'VirtualBox VBE Display Adapter enabled', 00dh, 00ah, 00dh, 00ah, 000h
 _no_vbebios_info_string:                     ; 0xc7cf6 LB 0x29
@@ -8247,4 +8247,4 @@ section CONST2 progbits vstart=0x7d20 align=1 ; size=0x0 class=DATA group=DGROUP
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
-    db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 0c6h
+    db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 0c2h
diff --git a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative286.md5sum b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative286.md5sum
index e841572..1c01dbb 100644
--- a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative286.md5sum
+++ b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative286.md5sum
@@ -1 +1 @@
-6891c4f7799156e60a44ed274afed440 *VBoxVgaBios286.rom
+bea8c51c86610c7dd6cb0d7bca5099cd *VBoxVgaBios286.rom
diff --git a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative386.asm b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative386.asm
index 43e78ca..55e747b 100644
--- a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative386.asm
+++ b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative386.asm
@@ -6769,7 +6769,7 @@ vesa_pm_end:                                 ; 0xc4514 LB 0x1
 
 section _DATA progbits vstart=0x4600 align=1 ; size=0x371f class=DATA group=DGROUP
 _msg_vga_init:                               ; 0xc4600 LB 0x2f
-    db  'Oracle VM VirtualBox Version 5.1.20 VGA BIOS', 00dh, 00ah, 000h
+    db  'Oracle VM VirtualBox Version 5.1.22 VGA BIOS', 00dh, 00ah, 000h
 _vga_modes:                                  ; 0xc462f LB 0x80
     db  000h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h, 001h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h
     db  002h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h, 003h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h
@@ -7664,7 +7664,7 @@ _vbebios_vendor_name:                        ; 0xc7c73 LB 0x13
 _vbebios_product_name:                       ; 0xc7c86 LB 0x21
     db  'Oracle VM VirtualBox VBE Adapter', 000h
 _vbebios_product_revision:                   ; 0xc7ca7 LB 0x24
-    db  'Oracle VM VirtualBox Version 5.1.20', 000h
+    db  'Oracle VM VirtualBox Version 5.1.22', 000h
 _vbebios_info_string:                        ; 0xc7ccb LB 0x2b
     db  'VirtualBox VBE Display Adapter enabled', 00dh, 00ah, 00dh, 00ah, 000h
 _no_vbebios_info_string:                     ; 0xc7cf6 LB 0x29
@@ -7723,4 +7723,4 @@ section CONST2 progbits vstart=0x7d20 align=1 ; size=0x0 class=DATA group=DGROUP
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
-    db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 0bbh
+    db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 0b7h
diff --git a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative386.md5sum b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative386.md5sum
index e044b71..4b18acf 100644
--- a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative386.md5sum
+++ b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative386.md5sum
@@ -1 +1 @@
-e605a5a32b0f6d7f6ac8604817567aad *VBoxVgaBios386.rom
+fb0f61a5bd7fdceb5c41b3a5dca0cd69 *VBoxVgaBios386.rom
diff --git a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative8086.asm b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative8086.asm
index f082654..27ac782 100644
--- a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative8086.asm
+++ b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative8086.asm
@@ -7434,7 +7434,7 @@ vesa_pm_end:                                 ; 0xc4514 LB 0x1
 
 section _DATA progbits vstart=0x4600 align=1 ; size=0x371f class=DATA group=DGROUP
 _msg_vga_init:                               ; 0xc4600 LB 0x2f
-    db  'Oracle VM VirtualBox Version 5.1.20 VGA BIOS', 00dh, 00ah, 000h
+    db  'Oracle VM VirtualBox Version 5.1.22 VGA BIOS', 00dh, 00ah, 000h
 _vga_modes:                                  ; 0xc462f LB 0x80
     db  000h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h, 001h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h
     db  002h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h, 003h, 000h, 000h, 004h, 000h, 0b8h, 0ffh, 002h
@@ -8329,7 +8329,7 @@ _vbebios_vendor_name:                        ; 0xc7c73 LB 0x13
 _vbebios_product_name:                       ; 0xc7c86 LB 0x21
     db  'Oracle VM VirtualBox VBE Adapter', 000h
 _vbebios_product_revision:                   ; 0xc7ca7 LB 0x24
-    db  'Oracle VM VirtualBox Version 5.1.20', 000h
+    db  'Oracle VM VirtualBox Version 5.1.22', 000h
 _vbebios_info_string:                        ; 0xc7ccb LB 0x2b
     db  'VirtualBox VBE Display Adapter enabled', 00dh, 00ah, 00dh, 00ah, 000h
 _no_vbebios_info_string:                     ; 0xc7cf6 LB 0x29
@@ -8388,4 +8388,4 @@ section CONST2 progbits vstart=0x7d20 align=1 ; size=0x0 class=DATA group=DGROUP
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
     db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h
-    db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 04dh
+    db  000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 000h, 049h
diff --git a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative8086.md5sum b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative8086.md5sum
index a6ccdeb..f663267 100644
--- a/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative8086.md5sum
+++ b/src/VBox/Devices/Graphics/BIOS/VBoxVgaBiosAlternative8086.md5sum
@@ -1 +1 @@
-653da9f9138edd9e217342f36fd30d82 *VBoxVgaBios8086.rom
+e4f13f730d0128276421351a5c6030fd *VBoxVgaBios8086.rom
diff --git a/src/VBox/Devices/Graphics/DevVGA.cpp b/src/VBox/Devices/Graphics/DevVGA.cpp
index 530f620..3f6b74b 100644
--- a/src/VBox/Devices/Graphics/DevVGA.cpp
+++ b/src/VBox/Devices/Graphics/DevVGA.cpp
@@ -5947,18 +5947,22 @@ static DECLCALLBACK(int) vgaR3Destruct(PPDMDEVINS pDevIns)
         vmsvgaDestruct(pDevIns);
 #endif
 
+#ifdef VBOX_WITH_HGSMI
+    VBVADestroy(pThis);
+#endif
+
     /*
      * Free MM heap pointers.
      */
     if (pThis->pbVBEExtraData)
     {
-        MMR3HeapFree(pThis->pbVBEExtraData);
+        PDMDevHlpMMHeapFree(pDevIns, pThis->pbVBEExtraData);
         pThis->pbVBEExtraData = NULL;
     }
 #endif /* VBE_NEW_DYN_LIST */
     if (pThis->pbVgaBios)
     {
-        MMR3HeapFree(pThis->pbVgaBios);
+        PDMDevHlpMMHeapFree(pDevIns, pThis->pbVgaBios);
         pThis->pbVgaBios = NULL;
     }
 
@@ -5974,6 +5978,12 @@ static DECLCALLBACK(int) vgaR3Destruct(PPDMDEVINS pDevIns)
         pThis->pszLogoFile = NULL;
     }
 
+    if (pThis->pbLogo)
+    {
+        PDMDevHlpMMHeapFree(pDevIns, pThis->pbLogo);
+        pThis->pbLogo = NULL;
+    }
+
     PDMR3CritSectDelete(&pThis->CritSectIRQ);
     PDMR3CritSectDelete(&pThis->CritSect);
     return VINF_SUCCESS;
@@ -6509,7 +6519,7 @@ static DECLCALLBACK(int)   vgaR3Construct(PPDMDEVINS pDevIns, int iInstance, PCF
             if (RT_FAILURE(rc))
             {
                 AssertMsgFailed(("RTFileRead(,,%d,NULL) -> %Rrc\n", pThis->cbVgaBios, rc));
-                MMR3HeapFree(pThis->pbVgaBios);
+                PDMDevHlpMMHeapFree(pDevIns, pThis->pbVgaBios);
                 pThis->pbVgaBios = NULL;
             }
             rc = VINF_SUCCESS;
diff --git a/src/VBox/Devices/Input/UsbKbd.cpp b/src/VBox/Devices/Input/UsbKbd.cpp
index dee79b5..b290218 100644
--- a/src/VBox/Devices/Input/UsbKbd.cpp
+++ b/src/VBox/Devices/Input/UsbKbd.cpp
@@ -1114,6 +1114,7 @@ static DECLCALLBACK(int) usbHidQueue(PPDMUSBINS pUsbIns, PVUSBURB pUrb)
 
         case 0x81:
             AssertFailed();
+            /* fall thru */
         case 0x01:
             rc = usbHidHandleIntrDevToHost(pThis, &pThis->aEps[1], pUrb);
             break;
diff --git a/src/VBox/Devices/Input/UsbMouse.cpp b/src/VBox/Devices/Input/UsbMouse.cpp
index 1430d8e..190739e 100644
--- a/src/VBox/Devices/Input/UsbMouse.cpp
+++ b/src/VBox/Devices/Input/UsbMouse.cpp
@@ -2169,6 +2169,7 @@ static DECLCALLBACK(int) usbHidQueue(PPDMUSBINS pUsbIns, PVUSBURB pUrb)
 
         case 0x81:
             AssertFailed();
+            /* fall thru */
         case 0x01:
             rc = usbHidHandleIntrDevToHost(pThis, &pThis->aEps[1], pUrb);
             break;
diff --git a/src/VBox/Devices/Network/DrvIntNet.cpp b/src/VBox/Devices/Network/DrvIntNet.cpp
index 5f08394..c5e71a1 100644
--- a/src/VBox/Devices/Network/DrvIntNet.cpp
+++ b/src/VBox/Devices/Network/DrvIntNet.cpp
@@ -574,6 +574,7 @@ static DECLCALLBACK(void) drvR3IntNetUp_NotifyLinkChanged(PPDMINETWORKUP pInterf
             break;
         default:
             AssertMsgFailed(("enmLinkState=%d\n", enmLinkState));
+            /* fall thru */
         case PDMNETWORKLINKSTATE_UP:
             fLinkDown = false;
             break;
@@ -918,6 +919,7 @@ static DECLCALLBACK(int) drvR3IntNetRecvThread(RTTHREAD hThreadSelf, void *pvUse
 
             default:
                 AssertMsgFailed(("Invalid state %d\n", enmRecvState));
+                /* fall thru */
             case RECVSTATE_TERMINATE:
                 LogFlow(("drvR3IntNetRecvThread: returns VINF_SUCCESS\n"));
                 return VINF_SUCCESS;
diff --git a/src/VBox/Devices/Network/DrvUDPTunnel.cpp b/src/VBox/Devices/Network/DrvUDPTunnel.cpp
index 3048e01..bac9e8b 100644
--- a/src/VBox/Devices/Network/DrvUDPTunnel.cpp
+++ b/src/VBox/Devices/Network/DrvUDPTunnel.cpp
@@ -292,6 +292,7 @@ static DECLCALLBACK(void) drvUDPTunnelUp_NotifyLinkChanged(PPDMINETWORKUP pInter
             break;
         default:
             AssertMsgFailed(("enmLinkState=%d\n", enmLinkState));
+            /* fall thru */
         case PDMNETWORKLINKSTATE_UP:
             fLinkDown = false;
             break;
diff --git a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative286.asm b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative286.asm
index d2d5353..b252fff 100644
--- a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative286.asm
+++ b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative286.asm
@@ -1115,7 +1115,7 @@ section CONST progbits vstart=0xb0 align=1 ; size=0xcde class=DATA group=DGROUP
 
 section CONST2 progbits vstart=0xd8e align=1 ; size=0x3fa class=DATA group=DGROUP
 _bios_cvs_version_string:                    ; 0xf0d8e LB 0x12
-    db  'VirtualBox 5.1.20', 000h
+    db  'VirtualBox 5.1.22', 000h
 _bios_prefix_string:                         ; 0xf0da0 LB 0x8
     db  'BIOS: ', 000h, 000h
 _isotag:                                     ; 0xf0da8 LB 0x6
@@ -17807,4 +17807,4 @@ biosorg_check_before_or_at_0FFEEh:           ; 0xfff80 LB 0x70
     db  'XM'
 cpu_reset:                                   ; 0xffff0 LB 0x10
     jmp far 0f000h:0e05bh                     ; ea 5b e0 00 f0
-    db  030h, 036h, 02fh, 032h, 033h, 02fh, 039h, 039h, 000h, 0fch, 00ah
+    db  030h, 036h, 02fh, 032h, 033h, 02fh, 039h, 039h, 000h, 0fch, 008h
diff --git a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative286.md5sum b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative286.md5sum
index d258d6e..96963e2 100644
--- a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative286.md5sum
+++ b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative286.md5sum
@@ -1 +1 @@
-8b2d1b43924d849f5dcb014fe3e3d3cf *VBoxPcBios286.rom
+9ebf26bea54c23402ea94fc80c406a72 *VBoxPcBios286.rom
diff --git a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative386.asm b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative386.asm
index b23684e..945672e 100644
--- a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative386.asm
+++ b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative386.asm
@@ -1078,7 +1078,7 @@ section CONST progbits vstart=0xb0 align=1 ; size=0xcde class=DATA group=DGROUP
 
 section CONST2 progbits vstart=0xd8e align=1 ; size=0x3fa class=DATA group=DGROUP
 _bios_cvs_version_string:                    ; 0xf0d8e LB 0x12
-    db  'VirtualBox 5.1.20', 000h
+    db  'VirtualBox 5.1.22', 000h
 _bios_prefix_string:                         ; 0xf0da0 LB 0x8
     db  'BIOS: ', 000h, 000h
 _isotag:                                     ; 0xf0da8 LB 0x6
@@ -17198,4 +17198,4 @@ biosorg_check_before_or_at_0FFEEh:           ; 0xfff80 LB 0x70
     db  'XM'
 cpu_reset:                                   ; 0xffff0 LB 0x10
     jmp far 0f000h:0e05bh                     ; ea 5b e0 00 f0
-    db  030h, 036h, 02fh, 032h, 033h, 02fh, 039h, 039h, 000h, 0fch, 095h
+    db  030h, 036h, 02fh, 032h, 033h, 02fh, 039h, 039h, 000h, 0fch, 093h
diff --git a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative386.md5sum b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative386.md5sum
index 26f4bd5..8e5c896 100644
--- a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative386.md5sum
+++ b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative386.md5sum
@@ -1 +1 @@
-28775258585bacb3bb0b986ed3a422c3 *VBoxPcBios386.rom
+c94600ef288bf0ea835c5f7a6e2d1a6d *VBoxPcBios386.rom
diff --git a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative8086.asm b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative8086.asm
index f15e31f..caa5a81 100644
--- a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative8086.asm
+++ b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative8086.asm
@@ -1115,7 +1115,7 @@ section CONST progbits vstart=0xb0 align=1 ; size=0xcde class=DATA group=DGROUP
 
 section CONST2 progbits vstart=0xd8e align=1 ; size=0x3fa class=DATA group=DGROUP
 _bios_cvs_version_string:                    ; 0xf0d8e LB 0x12
-    db  'VirtualBox 5.1.20', 000h
+    db  'VirtualBox 5.1.22', 000h
 _bios_prefix_string:                         ; 0xf0da0 LB 0x8
     db  'BIOS: ', 000h, 000h
 _isotag:                                     ; 0xf0da8 LB 0x6
@@ -18276,4 +18276,4 @@ biosorg_check_before_or_at_0FFEEh:           ; 0xfff80 LB 0x70
     db  'XM'
 cpu_reset:                                   ; 0xffff0 LB 0x10
     jmp far 0f000h:0e05bh                     ; ea 5b e0 00 f0
-    db  030h, 036h, 02fh, 032h, 033h, 02fh, 039h, 039h, 000h, 0fbh, 0a3h
+    db  030h, 036h, 02fh, 032h, 033h, 02fh, 039h, 039h, 000h, 0fbh, 0a1h
diff --git a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative8086.md5sum b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative8086.md5sum
index 582dd0f..ef0b631 100644
--- a/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative8086.md5sum
+++ b/src/VBox/Devices/PC/BIOS/VBoxBiosAlternative8086.md5sum
@@ -1 +1 @@
-5c70bcfd78c9a888eecad7ab74e791ad *VBoxPcBios8086.rom
+377af5e9f379e0964bea3ae962495fb5 *VBoxPcBios8086.rom
diff --git a/src/VBox/Devices/Storage/DevBusLogic.cpp b/src/VBox/Devices/Storage/DevBusLogic.cpp
index 2856f63..53bc5b0 100644
--- a/src/VBox/Devices/Storage/DevBusLogic.cpp
+++ b/src/VBox/Devices/Storage/DevBusLogic.cpp
@@ -1745,6 +1745,7 @@ static int buslogicProcessCommand(PBUSLOGIC pBusLogic)
             break;
 #else
             AssertMsgFailed(("Must never get here!\n"));
+            break;
 #endif
         }
         case BUSLOGICCOMMAND_INQUIRE_BOARD_ID:
@@ -2094,6 +2095,7 @@ static int buslogicProcessCommand(PBUSLOGIC pBusLogic)
         }
         default:
             AssertMsgFailed(("Invalid command %#x\n", pBusLogic->uOperationCode));
+            /* fall thru */
         case BUSLOGICCOMMAND_EXT_BIOS_INFO:
         case BUSLOGICCOMMAND_UNLOCK_MAILBOX:
             /* Commands valid for Adaptec 154xC which we don't handle since
diff --git a/src/VBox/Devices/Storage/DevFdc.cpp b/src/VBox/Devices/Storage/DevFdc.cpp
index 7972fd9..2efa6d2 100644
--- a/src/VBox/Devices/Storage/DevFdc.cpp
+++ b/src/VBox/Devices/Storage/DevFdc.cpp
@@ -199,6 +199,7 @@ static void fd_init(fdrive_t *drv, bool fInit)
                     break;
                 default:
                     AssertFailed();
+                    /* fall thru */
                 case PDMMEDIATYPE_FLOPPY_2_88:
                     drv->drive = FDRIVE_DRV_288;
                     break;
diff --git a/src/VBox/Devices/Storage/DrvVD.cpp b/src/VBox/Devices/Storage/DrvVD.cpp
index b9f284d..7a6f781 100644
--- a/src/VBox/Devices/Storage/DrvVD.cpp
+++ b/src/VBox/Devices/Storage/DrvVD.cpp
@@ -5144,6 +5144,7 @@ static DECLCALLBACK(int) drvvdConstruct(PPDMDRVINS pDrvIns, PCFGMNODE pCfg, uint
             {
                 default:
                     AssertFailed();
+                    /* fall thru */
                 case PDMMEDIATYPE_FLOPPY_360:
                     if (cbFloppyImg > 40 * 2 * 9 * 512)
                         pThis->enmType = PDMMEDIATYPE_FLOPPY_720;
diff --git a/src/VBox/Devices/Storage/UsbMsd.cpp b/src/VBox/Devices/Storage/UsbMsd.cpp
index b07d097..da0c021 100644
--- a/src/VBox/Devices/Storage/UsbMsd.cpp
+++ b/src/VBox/Devices/Storage/UsbMsd.cpp
@@ -2018,6 +2018,7 @@ static DECLCALLBACK(int) usbMsdQueue(PPDMUSBINS pUsbIns, PVUSBURB pUrb)
 
         case 0x81:
             AssertFailed();
+            /* fall thru */
         case 0x01:
             rc = usbMsdHandleBulkDevToHost(pThis, &pThis->aEps[1], pUrb);
             break;
diff --git a/src/VBox/Devices/USB/DevOHCI.cpp b/src/VBox/Devices/USB/DevOHCI.cpp
index d437987..e687745 100644
--- a/src/VBox/Devices/USB/DevOHCI.cpp
+++ b/src/VBox/Devices/USB/DevOHCI.cpp
@@ -2664,6 +2664,7 @@ static void ohciRhXferCompleteGeneralURB(POHCI pThis, PVUSBURB pUrb, POHCIED pEd
                     break;
                 default: /* what the hell */
                     Log(("pUrb->enmStatus=%#x!!!\n", pUrb->enmStatus));
+                    /* fall thru */
                 case VUSBSTATUS_DNR:
                     pTd->hwinfo |= OHCI_CC_DNR;
                     break;
diff --git a/src/VBox/Devices/USB/VUSBUrb.cpp b/src/VBox/Devices/USB/VUSBUrb.cpp
index 0d57e35..86ed754 100644
--- a/src/VBox/Devices/USB/VUSBUrb.cpp
+++ b/src/VBox/Devices/USB/VUSBUrb.cpp
@@ -1388,6 +1388,7 @@ DECLHIDDEN(int) vusbUrbCancelWorker(PVUSBURB pUrb, CANCELMODE enmMode)
         {
             default:
                 AssertMsgFailed(("Invalid cancel mode\n"));
+                /* fall thru */
             case CANCELMODE_FAIL:
                 pUrb->enmStatus = VUSBSTATUS_CRC;
                 break;
diff --git a/src/VBox/Disassembler/DisasmTables.cpp b/src/VBox/Disassembler/DisasmTables.cpp
index 2c871b4..c8406de 100644
--- a/src/VBox/Disassembler/DisasmTables.cpp
+++ b/src/VBox/Disassembler/DisasmTables.cpp
@@ -85,56 +85,56 @@ const DISOPCODE g_InvalidOpcode[1] =
 const DISOPCODE g_aOneByteMapX86[256] =
 {
     /* 0 */
-    OP("add %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADD,     OP_PARM_Eb,         OP_PARM_Gb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("add %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADD,     OP_PARM_Ev,         OP_PARM_Gv ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("add %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADD,     OP_PARM_Gb,         OP_PARM_Eb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("add %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADD,     OP_PARM_Gv,         OP_PARM_Ev ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("add AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte,0,         OP_ADD,     OP_PARM_REG_AL,     OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("add %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_ADD,     OP_PARM_REG_EAX,    OP_PARM_Iz ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("push ES",            IDX_ParseFixedReg,  0,          0,              OP_PUSH,    OP_PARM_REG_ES,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
-    OP("pop ES",             IDX_ParseFixedReg,  0,          0,              OP_POP,     OP_PARM_REG_ES,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
-    OP("or %Eb,%Gb",         IDX_ParseModRM,     IDX_UseModRM,   0,          OP_OR,      OP_PARM_Eb,         OP_PARM_Gb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("or %Ev,%Gv",         IDX_ParseModRM,     IDX_UseModRM,   0,          OP_OR,      OP_PARM_Ev,         OP_PARM_Gv ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("or %Gb,%Eb",         IDX_ParseModRM,     IDX_UseModRM,   0,          OP_OR,      OP_PARM_Gb,         OP_PARM_Eb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("or %Gv,%Ev",         IDX_ParseModRM,     IDX_UseModRM,   0,          OP_OR,      OP_PARM_Gv,         OP_PARM_Ev ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("or AL,%Ib",          IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_OR,      OP_PARM_REG_AL,     OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("or %eAX,%Iz",        IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_OR,      OP_PARM_REG_EAX,    OP_PARM_Iz ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("push CS",            IDX_ParseFixedReg,  0,          0,              OP_PUSH,    OP_PARM_REG_CS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_POTENTIALLY_DANGEROUS | DISOPTYPE_INVALID_64),
-    OP("2-BYTE ESCAPE",      IDX_ParseTwoByteEsc,0,          0,              OP_2B_ESC,  OP_PARM_NONE,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADD,     OP_PARM_Eb,         OP_PARM_Gb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADD,     OP_PARM_Ev,         OP_PARM_Gv,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADD,     OP_PARM_Gb,         OP_PARM_Eb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADD,     OP_PARM_Gv,         OP_PARM_Ev,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_ADD,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_ADD,     OP_PARM_REG_EAX,    OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("push ES",            IDX_ParseFixedReg,  0,              0,          OP_PUSH,    OP_PARM_REG_ES,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
+    OP("pop ES",             IDX_ParseFixedReg,  0,              0,          OP_POP,     OP_PARM_REG_ES,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
+    OP("or %Eb,%Gb",         IDX_ParseModRM,     IDX_UseModRM,   0,          OP_OR,      OP_PARM_Eb,         OP_PARM_Gb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("or %Ev,%Gv",         IDX_ParseModRM,     IDX_UseModRM,   0,          OP_OR,      OP_PARM_Ev,         OP_PARM_Gv,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("or %Gb,%Eb",         IDX_ParseModRM,     IDX_UseModRM,   0,          OP_OR,      OP_PARM_Gb,         OP_PARM_Eb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("or %Gv,%Ev",         IDX_ParseModRM,     IDX_UseModRM,   0,          OP_OR,      OP_PARM_Gv,         OP_PARM_Ev,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("or AL,%Ib",          IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_OR,      OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("or %eAX,%Iz",        IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_OR,      OP_PARM_REG_EAX,    OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("push CS",            IDX_ParseFixedReg,  0,              0,          OP_PUSH,    OP_PARM_REG_CS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64 | DISOPTYPE_POTENTIALLY_DANGEROUS),
+    OP("EscTwo0f",           IDX_ParseTwoByteEsc, 0,             0,          OP_2B_ESC,  OP_PARM_NONE,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* 1 */
-    OP("adc %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADC,     OP_PARM_Eb,         OP_PARM_Gb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("adc %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADC,     OP_PARM_Ev,         OP_PARM_Gv ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("adc %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADC,     OP_PARM_Gb,         OP_PARM_Eb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("adc %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADC,     OP_PARM_Gv,         OP_PARM_Ev ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("adc AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte,0,         OP_ADC,     OP_PARM_REG_AL,     OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("adc %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_ADC,     OP_PARM_REG_EAX,    OP_PARM_Iz ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("push SS",            IDX_ParseFixedReg,  0,          0,              OP_PUSH,    OP_PARM_REG_SS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_RRM_DANGEROUS | DISOPTYPE_INVALID_64),
-    OP("pop SS",             IDX_ParseFixedReg,  0,          0,              OP_POP,     OP_PARM_REG_SS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_RRM_DANGEROUS | DISOPTYPE_INHIBIT_IRQS | DISOPTYPE_INVALID_64),
-    OP("sbb %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SBB,     OP_PARM_Eb,         OP_PARM_Gb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sbb %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SBB,     OP_PARM_Ev,         OP_PARM_Gv ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sbb %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SBB,     OP_PARM_Gb,         OP_PARM_Eb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sbb %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SBB,     OP_PARM_Gv,         OP_PARM_Ev ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sbb AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte,0,         OP_SBB,     OP_PARM_REG_AL,     OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sbb %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_SBB,     OP_PARM_REG_EAX,    OP_PARM_Iz ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("push DS",            IDX_ParseFixedReg,  0,          0,              OP_PUSH,    OP_PARM_REG_DS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
-    OP("pop DS",             IDX_ParseFixedReg,  0,          0,              OP_POP,     OP_PARM_REG_DS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_RRM_DANGEROUS | DISOPTYPE_INVALID_64),
+    OP("adc %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADC,     OP_PARM_Eb,         OP_PARM_Gb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("adc %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADC,     OP_PARM_Ev,         OP_PARM_Gv,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("adc %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADC,     OP_PARM_Gb,         OP_PARM_Eb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("adc %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_ADC,     OP_PARM_Gv,         OP_PARM_Ev,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("adc AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_ADC,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("adc %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_ADC,     OP_PARM_REG_EAX,    OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("push SS",            IDX_ParseFixedReg,  0,              0,          OP_PUSH,    OP_PARM_REG_SS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64 | DISOPTYPE_RRM_DANGEROUS),
+    OP("pop SS",             IDX_ParseFixedReg,  0,              0,          OP_POP,     OP_PARM_REG_SS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INHIBIT_IRQS | DISOPTYPE_INVALID_64 | DISOPTYPE_RRM_DANGEROUS),
+    OP("sbb %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SBB,     OP_PARM_Eb,         OP_PARM_Gb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sbb %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SBB,     OP_PARM_Ev,         OP_PARM_Gv,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sbb %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SBB,     OP_PARM_Gb,         OP_PARM_Eb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sbb %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SBB,     OP_PARM_Gv,         OP_PARM_Ev,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sbb AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_SBB,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sbb %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_SBB,     OP_PARM_REG_EAX,    OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("push DS",            IDX_ParseFixedReg,  0,              0,          OP_PUSH,    OP_PARM_REG_DS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
+    OP("pop DS",             IDX_ParseFixedReg,  0,              0,          OP_POP,     OP_PARM_REG_DS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64 | DISOPTYPE_RRM_DANGEROUS),
 
     /* 2 */
-    OP("and %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_AND,     OP_PARM_Eb,         OP_PARM_Gb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("and %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_AND,     OP_PARM_Ev,         OP_PARM_Gv ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("and %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_AND,     OP_PARM_Gb,         OP_PARM_Eb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("and %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_AND,     OP_PARM_Gv,         OP_PARM_Ev ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("and AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte,0,         OP_AND,     OP_PARM_REG_AL,     OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("and %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_AND,     OP_PARM_REG_EAX,    OP_PARM_Iz ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_AND,     OP_PARM_Eb,         OP_PARM_Gb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_AND,     OP_PARM_Ev,         OP_PARM_Gv,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_AND,     OP_PARM_Gb,         OP_PARM_Eb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_AND,     OP_PARM_Gv,         OP_PARM_Ev,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_AND,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_AND,     OP_PARM_REG_EAX,    OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("SEG ES",             0,                  0,              0,          OP_SEG,     OP_PARM_REG_ES,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("daa",                0,                  0,              0,          OP_DAA,     OP_PARM_NONE,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
-    OP("sub %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SUB,     OP_PARM_Eb,         OP_PARM_Gb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sub %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SUB,     OP_PARM_Ev,         OP_PARM_Gv ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sub %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SUB,     OP_PARM_Gb,         OP_PARM_Eb ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sub %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SUB,     OP_PARM_Gv,         OP_PARM_Ev ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sub AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte,0,         OP_SUB,     OP_PARM_REG_AL,     OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sub %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_SUB,     OP_PARM_REG_EAX,    OP_PARM_Iz ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub %Eb,%Gb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SUB,     OP_PARM_Eb,         OP_PARM_Gb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SUB,     OP_PARM_Ev,         OP_PARM_Gv,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SUB,     OP_PARM_Gb,         OP_PARM_Eb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_SUB,     OP_PARM_Gv,         OP_PARM_Ev,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_SUB,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_SUB,     OP_PARM_REG_EAX,    OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     /* Branch not taken hint prefix for branches on a Pentium 4 or Xeon CPU (or higher)! */
     OP("SEG CS",             0,                  0,              0,          OP_SEG,     OP_PARM_REG_CS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("das",                0,                  0,              0,          OP_DAS,     OP_PARM_NONE,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
@@ -144,7 +144,7 @@ const DISOPCODE g_aOneByteMapX86[256] =
     OP("xor %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_XOR,     OP_PARM_Ev,         OP_PARM_Gv,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("xor %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_XOR,     OP_PARM_Gb,         OP_PARM_Eb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("xor %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_XOR,     OP_PARM_Gv,         OP_PARM_Ev,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("xor AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte,0,         OP_XOR,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("xor AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_XOR,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("xor %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_XOR,     OP_PARM_REG_EAX,    OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("SEG SS",             0,                  0,              0,          OP_SEG,     OP_PARM_REG_SS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("aaa",                0,                  0,              0,          OP_AAA,     OP_PARM_NONE,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
@@ -152,7 +152,7 @@ const DISOPCODE g_aOneByteMapX86[256] =
     OP("cmp %Ev,%Gv",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_CMP,     OP_PARM_Ev,         OP_PARM_Gv,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("cmp %Gb,%Eb",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_CMP,     OP_PARM_Gb,         OP_PARM_Eb,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("cmp %Gv,%Ev",        IDX_ParseModRM,     IDX_UseModRM,   0,          OP_CMP,     OP_PARM_Gv,         OP_PARM_Ev,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("cmp AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte,0,         OP_CMP,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("cmp AL,%Ib",         IDX_ParseFixedReg,  IDX_ParseImmByte, 0,        OP_CMP,     OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("cmp %eAX,%Iz",       IDX_ParseFixedReg,  IDX_ParseImmZ,  0,          OP_CMP,     OP_PARM_REG_EAX,    OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     /* Branch not taken hint prefix for branches on a Pentium 4 or Xeon CPU (or higher)! */
     OP("SEG DS",             0,                  0,              0,          OP_SEG,     OP_PARM_REG_DS,     OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
@@ -177,22 +177,22 @@ const DISOPCODE g_aOneByteMapX86[256] =
     OP("dec %eDI",           IDX_ParseFixedReg,  0,          0,          OP_DEC,     OP_PARM_REG_EDI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* 5 */
-    OP("push %eAX",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EAX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("push %eCX",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_ECX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("push %eDX",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EDX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("push %eBX",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EBX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("push %eSP",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_ESP,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("push %eBP",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EBP,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("push %eSI",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_ESI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("push %eDI",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EDI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("pop %eAX",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EAX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("pop %eCX",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_ECX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("pop %eDX",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EDX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("pop %eBX",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EBX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("pop %eSP",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_ESP,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("pop %eBP",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EBP,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("pop %eSI",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_ESI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
-    OP("pop %eDI",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EDI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("push %eAX",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EAX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("push %eCX",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_ECX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("push %eDX",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EDX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("push %eBX",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EBX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("push %eSP",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_ESP,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("push %eBP",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EBP,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("push %eSI",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_ESI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("push %eDI",          IDX_ParseFixedReg,  0,          0,          OP_PUSH,    OP_PARM_REG_EDI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("pop %eAX",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EAX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("pop %eCX",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_ECX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("pop %eDX",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EDX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("pop %eBX",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EBX,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("pop %eSP",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_ESP,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("pop %eBP",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EBP,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("pop %eSI",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_ESI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
+    OP("pop %eDI",           IDX_ParseFixedReg,  0,          0,          OP_POP,     OP_PARM_REG_EDI,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_HARMLESS | DISOPTYPE_REXB_EXTENDS_OPREG),
 
     /* 6 */
     OP("pusha",              0,                  0,                 0,                  OP_PUSHA,   OP_PARM_NONE,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
@@ -214,22 +214,22 @@ const DISOPCODE g_aOneByteMapX86[256] =
 
 
     /* 7 */
-    OP("jo %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JO,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jno %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNO,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jc %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JC,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jnc %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNC,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("je %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JE,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jne %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNE,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jbe %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JBE,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jnbe %Jb",           IDX_ParseImmBRel,   0,          0,          OP_JNBE,    OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("js %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JS,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jns %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNS,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jp %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JP,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jnp %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNP,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jl %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JL,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jnl %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNL,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jle %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JLE,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("jnle %Jb",           IDX_ParseImmBRel,   0,          0,          OP_JNLE,    OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_RELATIVE_CONTROLFLOW | DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
+    OP("jo %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JO,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jno %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNO,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jc %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JC,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jnc %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNC,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("je %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JE,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jne %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNE,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jbe %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JBE,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jnbe %Jb",           IDX_ParseImmBRel,   0,          0,          OP_JNBE,    OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("js %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JS,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jns %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNS,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jp %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JP,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jnp %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNP,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jl %Jb",             IDX_ParseImmBRel,   0,          0,          OP_JL,      OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jnl %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JNL,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jle %Jb",            IDX_ParseImmBRel,   0,          0,          OP_JLE,     OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("jnle %Jb",           IDX_ParseImmBRel,   0,          0,          OP_JNLE,    OP_PARM_Jb  ,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
 
     /* 8 */
     OP("Imm Grp1 %Eb,%Ib",   IDX_ParseImmGrpl,   0,             0,          OP_IMM_GRP1,OP_PARM_Eb,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
@@ -262,8 +262,8 @@ const DISOPCODE g_aOneByteMapX86[256] =
     OP("cwd",                       0,                  0,                  0,      OP_CWD,     OP_PARM_NONE,       OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("call %Ap",                  IDX_ParseImmAddrF,  0,                  0,      OP_CALL,    OP_PARM_Ap,         OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_INVALID_64),
     OP("wait",                      0,                  0,                  0,      OP_WAIT,    OP_PARM_NONE,       OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("pushf %Fv",                 0,                  0,                  0,      OP_PUSHF,   OP_PARM_Fv,         OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_POTENTIALLY_DANGEROUS | DISOPTYPE_DEFAULT_64_OP_SIZE),
-    OP("popf %Fv",                  0,                  0,                  0,      OP_POPF,    OP_PARM_Fv,         OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_POTENTIALLY_DANGEROUS | DISOPTYPE_DEFAULT_64_OP_SIZE),
+    OP("pushf %Fv",                 0,                  0,                  0,      OP_PUSHF,   OP_PARM_Fv,         OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_POTENTIALLY_DANGEROUS),
+    OP("popf %Fv",                  0,                  0,                  0,      OP_POPF,    OP_PARM_Fv,         OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_POTENTIALLY_DANGEROUS),
     OP("sahf",                      0,                  0,                  0,      OP_SAHF,    OP_PARM_NONE,       OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("lahf",                      0,                  0,                  0,      OP_LAHF,    OP_PARM_NONE,       OP_PARM_NONE,       OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
@@ -311,7 +311,7 @@ const DISOPCODE g_aOneByteMapX86[256] =
     OP("retn %Iw",           IDX_ParseImmUshort, 0,                 0,          OP_RETN,        OP_PARM_Iw,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_UNCOND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
     OP("retn",               0,                  0,                 0,          OP_RETN,        OP_PARM_NONE,    OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_UNCOND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
     OP("les %Gv,%Mp",        IDX_ParseModRM,     IDX_UseModRM,      0,          OP_LES,         OP_PARM_Gv,      OP_PARM_Mp,     OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64),
-    OP("lds %Gv,%Mp",        IDX_ParseModRM,     IDX_UseModRM,      0,          OP_LDS,         OP_PARM_Gv,      OP_PARM_Mp,     OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_RRM_DANGEROUS | DISOPTYPE_INVALID_64),
+    OP("lds %Gv,%Mp",        IDX_ParseModRM,     IDX_UseModRM,      0,          OP_LDS,         OP_PARM_Gv,      OP_PARM_Mp,     OP_PARM_NONE,   DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64 | DISOPTYPE_RRM_DANGEROUS),
     /** @todo these two are actually group11 */
     OP("mov %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,  0,          OP_MOV,         OP_PARM_Eb,      OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("mov %Ev,%Iz",        IDX_ParseModRM,     IDX_ParseImmZ,     0,          OP_MOV,         OP_PARM_Ev,      OP_PARM_Iz,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
@@ -345,10 +345,10 @@ const DISOPCODE g_aOneByteMapX86[256] =
 
 
     /* E */
-    OP("loopne %Jb",         IDX_ParseImmBRel,   0,                 0,          OP_LOOPNE,  OP_PARM_Jb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW|DISOPTYPE_RELATIVE_CONTROLFLOW|DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("loope %Jb",          IDX_ParseImmBRel,   0,                 0,          OP_LOOPE,   OP_PARM_Jb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW|DISOPTYPE_RELATIVE_CONTROLFLOW|DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("loop %Jb",           IDX_ParseImmBRel,   0,                 0,          OP_LOOP,    OP_PARM_Jb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW|DISOPTYPE_RELATIVE_CONTROLFLOW|DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
-    OP("j(e)cxz %Jb",        IDX_ParseImmBRel,   0,                 0,          OP_JECXZ,   OP_PARM_Jb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW|DISOPTYPE_RELATIVE_CONTROLFLOW|DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE),
+    OP("loopne %Jb",         IDX_ParseImmBRel,   0,                 0,          OP_LOOPNE,  OP_PARM_Jb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("loope %Jb",          IDX_ParseImmBRel,   0,                 0,          OP_LOOPE,   OP_PARM_Jb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("loop %Jb",           IDX_ParseImmBRel,   0,                 0,          OP_LOOP,    OP_PARM_Jb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
+    OP("j(e)cxz %Jb",        IDX_ParseImmBRel,   0,                 0,          OP_JECXZ,   OP_PARM_Jb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_COND_CONTROLFLOW | DISOPTYPE_CONTROLFLOW | DISOPTYPE_FORCED_64_OP_SIZE | DISOPTYPE_RELATIVE_CONTROLFLOW),
     OP("in AL,%Ib",          IDX_ParseFixedReg,  IDX_ParseImmByte,  0,          OP_IN,      OP_PARM_REG_AL,     OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_PORTIO | DISOPTYPE_PRIVILEGED | DISOPTYPE_PORTIO_READ),
     OP("in %eAX,%Ib",        IDX_ParseFixedReg,  IDX_ParseImmByte,  0,          OP_IN,      OP_PARM_REG_EAX,    OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_PORTIO | DISOPTYPE_PRIVILEGED | DISOPTYPE_PORTIO_READ),
     OP("out %Ib,AL",         IDX_ParseImmByte,   IDX_ParseFixedReg, 0,          OP_OUT,     OP_PARM_Ib,         OP_PARM_REG_AL, OP_PARM_NONE,   DISOPTYPE_PORTIO | DISOPTYPE_PRIVILEGED | DISOPTYPE_PORTIO_WRITE),
@@ -458,9 +458,9 @@ const DISOPCODE g_aTwoByteMapX86[256] =
     OP("sysexit",            0,              0,          0,          OP_SYSEXIT, OP_PARM_NONE,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_CONTROLFLOW | DISOPTYPE_UNCOND_CONTROLFLOW),
     INVALID_OPCODE,
     OP("getsec",             0,              0,          0,          OP_GETSEC,  OP_PARM_NONE,       OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("3 byte escape A4",   IDX_ParseThreeByteEsc4,0,              0,        OP_3B_ESC4,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("3 byte escape A4",   IDX_ParseThreeByteEsc4, 0,             0,        OP_3B_ESC4,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE,
-    OP("3 byte escape A5",   IDX_ParseThreeByteEsc5,0,              0,        OP_3B_ESC5,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("3 byte escape A5",   IDX_ParseThreeByteEsc5, 0,             0,        OP_3B_ESC5,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE,
     /* SSE2 */
     INVALID_OPCODE,
@@ -737,9 +737,9 @@ const DISOPCODE g_aTwoByteMapX86_PF66[256] =
     INVALID_OPCODE,
     INVALID_OPCODE,
     INVALID_OPCODE,
-    OP("3 byte escape A4",   IDX_ParseThreeByteEsc4,0,              0,        OP_3B_ESC4,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("3 byte escape A4",   IDX_ParseThreeByteEsc4, 0,             0,        OP_3B_ESC4,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE,
-    OP("3 byte escape A5",   IDX_ParseThreeByteEsc5,0,              0,        OP_3B_ESC5,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("3 byte escape A5",   IDX_ParseThreeByteEsc5, 0,             0,        OP_3B_ESC5,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE,
     INVALID_OPCODE,
     INVALID_OPCODE,
@@ -939,7 +939,7 @@ const DISOPCODE g_aTwoByteMapX86_PFF2[256] =
     INVALID_OPCODE,
     INVALID_OPCODE,
     INVALID_OPCODE,
-    OP("3 byte escape A4",   IDX_ParseThreeByteEsc4,0,              0,        OP_3B_ESC4,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("3 byte escape A4",   IDX_ParseThreeByteEsc4, 0,             0,        OP_3B_ESC4,  OP_PARM_NONE,      OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE,
     INVALID_OPCODE,
     INVALID_OPCODE,
@@ -2340,14 +2340,14 @@ const DISOPCODE g_aMapX86_EscF2_High[16*4] =
     OP("fcmove ST(0),ST(7)", 0,              0,          0,          OP_FCMOVE,  OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* d */
-    OP("fcmovbe ST(0),ST(0)",0,              0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovbe ST(0),ST(1)",0,              0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovbe ST(0),ST(2)",0,              0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovbe ST(0),ST(3)",0,              0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovbe ST(0),ST(4)",0,              0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovbe ST(0),ST(5)",0,              0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovbe ST(0),ST(6)",0,              0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovbe ST(0),ST(7)",0,              0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovbe ST(0),ST(0)", 0,             0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovbe ST(0),ST(1)", 0,             0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovbe ST(0),ST(2)", 0,             0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovbe ST(0),ST(3)", 0,             0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovbe ST(0),ST(4)", 0,             0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovbe ST(0),ST(5)", 0,             0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovbe ST(0),ST(6)", 0,             0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovbe ST(0),ST(7)", 0,             0,          0,          OP_FCMOVBE, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("fcmovu ST(0),ST(0)", 0,              0,          0,          OP_FCMOVU,  OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("fcmovu ST(0),ST(1)", 0,              0,          0,          OP_FCMOVU,  OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     OP("fcmovu ST(0),ST(2)", 0,              0,          0,          OP_FCMOVU,  OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
@@ -2399,40 +2399,40 @@ const DISOPCODE g_aMapX86_EscF3_Low[8] =
 const DISOPCODE g_aMapX86_EscF3_High[16*4] =
 {
     /* c */
-    OP("fcmovnb ST(0),ST(0)",0,              0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnb ST(0),ST(1)",0,              0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnb ST(0),ST(2)",0,              0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnb ST(0),ST(3)",0,              0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnb ST(0),ST(4)",0,              0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnb ST(0),ST(5)",0,              0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnb ST(0),ST(6)",0,              0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnb ST(0),ST(7)",0,              0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovne ST(0),ST(0)",0,              0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovne ST(0),ST(1)",0,              0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovne ST(0),ST(2)",0,              0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovne ST(0),ST(3)",0,              0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovne ST(0),ST(4)",0,              0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovne ST(0),ST(5)",0,              0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovne ST(0),ST(6)",0,              0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovne ST(0),ST(7)",0,              0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnb ST(0),ST(0)", 0,             0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnb ST(0),ST(1)", 0,             0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnb ST(0),ST(2)", 0,             0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnb ST(0),ST(3)", 0,             0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnb ST(0),ST(4)", 0,             0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnb ST(0),ST(5)", 0,             0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnb ST(0),ST(6)", 0,             0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnb ST(0),ST(7)", 0,             0,          0,          OP_FCMOVNB, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovne ST(0),ST(0)", 0,             0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovne ST(0),ST(1)", 0,             0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovne ST(0),ST(2)", 0,             0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovne ST(0),ST(3)", 0,             0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovne ST(0),ST(4)", 0,             0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovne ST(0),ST(5)", 0,             0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovne ST(0),ST(6)", 0,             0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovne ST(0),ST(7)", 0,             0,          0,          OP_FCMOVNE, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* d */
-    OP("fcmovnbe ST(0),ST(0)",0,             0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnbe ST(0),ST(1)",0,             0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnbe ST(0),ST(2)",0,             0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnbe ST(0),ST(3)",0,             0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnbe ST(0),ST(4)",0,             0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnbe ST(0),ST(5)",0,             0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnbe ST(0),ST(6)",0,             0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnbe ST(0),ST(7)",0,             0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnu ST(0),ST(0)",0,              0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnu ST(0),ST(1)",0,              0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnu ST(0),ST(2)",0,              0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnu ST(0),ST(3)",0,              0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnu ST(0),ST(4)",0,              0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnu ST(0),ST(5)",0,              0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnu ST(0),ST(6)",0,              0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcmovnu ST(0),ST(7)",0,              0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnbe ST(0),ST(0)", 0,            0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnbe ST(0),ST(1)", 0,            0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnbe ST(0),ST(2)", 0,            0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnbe ST(0),ST(3)", 0,            0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnbe ST(0),ST(4)", 0,            0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnbe ST(0),ST(5)", 0,            0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnbe ST(0),ST(6)", 0,            0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnbe ST(0),ST(7)", 0,            0,          0,          OP_FCMOVNBE,OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnu ST(0),ST(0)", 0,             0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnu ST(0),ST(1)", 0,             0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnu ST(0),ST(2)", 0,             0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnu ST(0),ST(3)", 0,             0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnu ST(0),ST(4)", 0,             0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnu ST(0),ST(5)", 0,             0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnu ST(0),ST(6)", 0,             0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcmovnu ST(0),ST(7)", 0,             0,          0,          OP_FCMOVNU, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* e */
     INVALID_OPCODE,
@@ -2443,25 +2443,25 @@ const DISOPCODE g_aMapX86_EscF3_High[16*4] =
     INVALID_OPCODE,
     INVALID_OPCODE,
     INVALID_OPCODE,
-    OP("fucomi ST(0),ST(0)",0,               0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomi ST(0),ST(1)",0,               0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomi ST(0),ST(2)",0,               0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomi ST(0),ST(3)",0,               0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomi ST(0),ST(4)",0,               0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomi ST(0),ST(5)",0,               0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomi ST(0),ST(6)",0,               0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomi ST(0),ST(7)",0,               0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomi ST(0),ST(0)", 0,              0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomi ST(0),ST(1)", 0,              0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomi ST(0),ST(2)", 0,              0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomi ST(0),ST(3)", 0,              0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomi ST(0),ST(4)", 0,              0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomi ST(0),ST(5)", 0,              0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomi ST(0),ST(6)", 0,              0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomi ST(0),ST(7)", 0,              0,          0,          OP_FUCOMI,  OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
 
     /* e */
-    OP("fcomi ST(0),ST(0)",0,                0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcomi ST(0),ST(1)",0,                0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcomi ST(0),ST(2)",0,                0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcomi ST(0),ST(3)",0,                0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcomi ST(0),ST(4)",0,                0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcomi ST(0),ST(5)",0,                0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcomi ST(0),ST(6)",0,                0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fcomi ST(0),ST(7)",0,                0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcomi ST(0),ST(0)", 0,               0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcomi ST(0),ST(1)", 0,               0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcomi ST(0),ST(2)", 0,               0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcomi ST(0),ST(3)", 0,               0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcomi ST(0),ST(4)", 0,               0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcomi ST(0),ST(5)", 0,               0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcomi ST(0),ST(6)", 0,               0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fcomi ST(0),ST(7)", 0,               0,          0,          OP_FCOMI,   OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE,
     INVALID_OPCODE,
     INVALID_OPCODE,
@@ -2755,14 +2755,14 @@ const DISOPCODE g_aMapX86_EscF7_High[16*4] =
     INVALID_OPCODE,
     INVALID_OPCODE,
     INVALID_OPCODE,
-    OP("fucomip ST(0),ST(0)",0,              0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomip ST(0),ST(1)",0,              0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomip ST(0),ST(2)",0,              0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomip ST(0),ST(3)",0,              0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomip ST(0),ST(4)",0,              0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomip ST(0),ST(5)",0,              0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomip ST(0),ST(6)",0,              0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("fucomip ST(0),ST(7)",0,              0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomip ST(0),ST(0)", 0,             0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomip ST(0),ST(1)", 0,             0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_1,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomip ST(0),ST(2)", 0,             0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_2,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomip ST(0),ST(3)", 0,             0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_3,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomip ST(0),ST(4)", 0,             0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_4,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomip ST(0),ST(5)", 0,             0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_5,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomip ST(0),ST(6)", 0,             0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_6,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("fucomip ST(0),ST(7)", 0,             0,          0,          OP_FUCOMIP, OP_PARM_REGFP_0,    OP_PARM_REGFP_7,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* f */
     OP("fcomip ST(0),ST(0)", 0,              0,          0,          OP_FCOMIP,  OP_PARM_REGFP_0,    OP_PARM_REGFP_0,OP_PARM_NONE,   DISOPTYPE_HARMLESS),
@@ -2812,14 +2812,14 @@ const PCDISOPCODE g_apMapX86_FP_High[8] =
 const DISOPCODE g_aMapX86_Group1[8*4] =
 {
     /* 80 */
-    OP("add %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_ADD, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("or %Eb,%Ib",         IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_OR,  OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("adc %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_ADC, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sbb %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SBB, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("and %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_AND, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sub %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SUB, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("xor %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_XOR, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("cmp %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_CMP, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_ADD, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("or %Eb,%Ib",         IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_OR,  OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("adc %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_ADC, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sbb %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SBB, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_AND, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SUB, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("xor %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_XOR, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("cmp %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_CMP, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* 81 */
     OP("add %Ev,%Iz",        IDX_ParseModRM,     IDX_ParseImmZ,  0,          OP_ADD, OP_PARM_Ev,         OP_PARM_Iz ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
@@ -2832,47 +2832,47 @@ const DISOPCODE g_aMapX86_Group1[8*4] =
     OP("cmp %Ev,%Iz",        IDX_ParseModRM,     IDX_ParseImmZ,  0,          OP_CMP, OP_PARM_Ev,         OP_PARM_Iz ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* 82 */
-    OP("add %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_ADD, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("or %Eb,%Ib",         IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_OR,  OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("adc %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_ADC, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sbb %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SBB, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("and %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_AND, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sub %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SUB, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("xor %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_XOR, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("cmp %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_CMP, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_ADD, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("or %Eb,%Ib",         IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_OR,  OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("adc %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_ADC, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sbb %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SBB, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_AND, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SUB, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("xor %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_XOR, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("cmp %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_CMP, OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* 83 */
-    OP("add %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX,0,         OP_ADD, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("or %Ev,%Ib",         IDX_ParseModRM,     IDX_ParseImmByteSX,0,         OP_OR,  OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("adc %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX,0,         OP_ADC, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sbb %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX,0,         OP_SBB, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("and %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX,0,         OP_AND, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sub %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX,0,         OP_SUB, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("xor %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX,0,         OP_XOR, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("cmp %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX,0,         OP_CMP, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("add %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX, 0,        OP_ADD, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("or %Ev,%Ib",         IDX_ParseModRM,     IDX_ParseImmByteSX, 0,        OP_OR,  OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("adc %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX, 0,        OP_ADC, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sbb %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX, 0,        OP_SBB, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("and %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX, 0,        OP_AND, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sub %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX, 0,        OP_SUB, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("xor %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX, 0,        OP_XOR, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("cmp %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByteSX, 0,        OP_CMP, OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 };
 
 const DISOPCODE g_aMapX86_Group2[8*6] =
 {
     /* C0 */
-    OP("rol %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_ROL,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("ror %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_ROR,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("rcl %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_RCL,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("rcr %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_RCR,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("shl/sal %Eb,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SHL,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("shr %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SHR,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("shl/sal %Eb,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SHL,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sar %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SAR,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("rol %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_ROL,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("ror %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_ROR,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("rcl %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_RCL,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("rcr %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_RCR,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("shl/sal %Eb,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SHL,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("shr %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SHR,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("shl/sal %Eb,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SHL,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sar %Eb,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SAR,     OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* C1 */
-    OP("rol %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_ROL,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("ror %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_ROR,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("rcl %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_RCL,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("rcr %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_RCR,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("shl/sal %Ev,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SHL,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("shr %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SHR,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("shl/sal %Ev,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SHL,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("sar %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte,0,         OP_SAR,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("rol %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_ROL,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("ror %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_ROR,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("rcl %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_RCL,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("rcr %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_RCR,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("shl/sal %Ev,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SHL,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("shr %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SHR,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("shl/sal %Ev,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SHL,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("sar %Ev,%Ib",        IDX_ParseModRM,     IDX_ParseImmByte, 0,        OP_SAR,     OP_PARM_Ev,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 
     /* D0 */
     OP("rol %Eb,1",          IDX_ParseModRM,     0,          0,          OP_ROL,     OP_PARM_Eb,         OP_PARM_1 ,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
@@ -2920,7 +2920,7 @@ const DISOPCODE g_aMapX86_Group2[8*6] =
 const DISOPCODE g_aMapX86_Group3[8*2] =
 {
     /* F6 */
-    OP("test %Eb,%Ib",       IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_TEST,   OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("test %Eb,%Ib",       IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_TEST,   OP_PARM_Eb,         OP_PARM_Ib ,    OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     //AMD manual claims test??
     INVALID_OPCODE_MOD_RM(0xf601),
     OP("not %Eb",            IDX_ParseModRM,     0,          0,          OP_NOT,     OP_PARM_Eb,         OP_PARM_NONE,   OP_PARM_NONE,   DISOPTYPE_HARMLESS),
@@ -3092,21 +3092,21 @@ const DISOPCODE g_aMapX86_Group12[8*2] =
 {
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psrlw %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRLW,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrlw %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRLW,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psraw %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRAW,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psraw %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRAW,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psllw %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSLLW,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psllw %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSLLW,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
 
     /* Group 12 with prefix 0x66 */
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psrlw %Pdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRLW,  OP_PARM_Pdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrlw %Pdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRLW,  OP_PARM_Pdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psraw %Pdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRAW,  OP_PARM_Pdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psraw %Pdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRAW,  OP_PARM_Pdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psllw %Pdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSLLW,  OP_PARM_Pdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psllw %Pdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSLLW,  OP_PARM_Pdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
 };
 
@@ -3115,21 +3115,21 @@ const DISOPCODE g_aMapX86_Group13[8*2] =
 {
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psrld %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRLD,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrld %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRLD,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psrad %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRAD,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrad %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRAD,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("pslld %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSLLD,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("pslld %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSLLD,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
 
     /* Group 13 with prefix 0x66 */
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psrld %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRLD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrld %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRLD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psrad %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRAD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrad %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRAD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("pslld %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSLLD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("pslld %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSLLD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
 };
 
@@ -3138,22 +3138,22 @@ const DISOPCODE g_aMapX86_Group14[8*2] =
 {
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psrlq %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRLQ,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrlq %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRLQ,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psllq %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSLLQ,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psllq %Pq,%Ib",      IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSLLQ,  OP_PARM_Pq,         OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
 
     /* Group 14 with prefix 0x66 */
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psrlq %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRLD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("psrldq %Wdq,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSRLDQ, OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrlq %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRLD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psrldq %Wdq,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSRLDQ, OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
     INVALID_OPCODE_MOD_RM(0x),
     INVALID_OPCODE_MOD_RM(0x),
-    OP("psllq %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSLLD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
-    OP("pslldq %Wdq,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte,0,          OP_PSLLDQ, OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("psllq %Wdq,%Ib",     IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSLLD,  OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
+    OP("pslldq %Wdq,%Ib",    IDX_ParseModRM,     IDX_ParseImmByte, 0,         OP_PSLLDQ, OP_PARM_Wdq,        OP_PARM_Ib,     OP_PARM_NONE,   DISOPTYPE_HARMLESS),
 };
 
 
diff --git a/src/VBox/Frontends/VirtualBox/nls/VirtualBox_eu.ts b/src/VBox/Frontends/VirtualBox/nls/VirtualBox_eu.ts
index 21f0f5b..37ae536 100644
--- a/src/VBox/Frontends/VirtualBox/nls/VirtualBox_eu.ts
+++ b/src/VBox/Frontends/VirtualBox/nls/VirtualBox_eu.ts
@@ -4,7 +4,7 @@
 <context>
     <name>@@@</name>
     <message>
-        <location filename="../src/globals/VBoxGlobal.cpp" line="+2064"/>
+        <location filename="../src/globals/VBoxGlobal.cpp" line="+2065"/>
         <source>English</source>
         <comment>Native language name</comment>
         <translation>Euskara</translation>
@@ -1775,7 +1775,7 @@
 <context>
     <name>UIApplianceUnverifiedCertificateViewer</name>
     <message>
-        <location filename="../src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp" line="+260"/>
+        <location filename="../src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp" line="+261"/>
         <source>Unverifiable Certificate! Continue?</source>
         <translation type="unfinished"></translation>
     </message>
@@ -7346,7 +7346,7 @@
         <translation type="obsolete">Hautaturiko USB iragazkia behera mugitzen du.</translation>
     </message>
     <message>
-        <location filename="../src/settings/machine/UIMachineSettingsUSB.cpp" line="+603"/>
+        <location filename="../src/settings/machine/UIMachineSettingsUSB.cpp" line="+608"/>
         <source>New Filter %1</source>
         <comment>usb</comment>
         <translation>Iragazki Berria %1</translation>
@@ -9404,7 +9404,7 @@
 <context>
     <name>UIMiniToolBar</name>
     <message>
-        <location filename="../src/widgets/UIMiniToolBar.cpp" line="+294"/>
+        <location filename="../src/widgets/UIMiniToolBar.cpp" line="+298"/>
         <source>Always show the toolbar</source>
         <translation>Betik erakutsi tresnabarra</translation>
     </message>
@@ -9799,7 +9799,7 @@
         <translation>Erakutsi Egoerabarra</translation>
     </message>
     <message>
-        <location line="+342"/>
+        <location line="+347"/>
         <source>Select a virtual machine file</source>
         <translation>Hautatu makina birtual agiria</translation>
     </message>
@@ -9809,12 +9809,12 @@
         <translation>Makina birtual agiriak (%1)</translation>
     </message>
     <message>
-        <location line="-235"/>
+        <location line="-240"/>
         <source><h3>Welcome to VirtualBox!</h3><p>The left part of this window is  a list of all virtual machines on your computer. The list is empty now because you haven't created any virtual machines yet.<img src=:/welcome.png align=right/></p><p>In order to create a new virtual machine, press the <b>New</b> button in the main tool bar located at the top of the window.</p><p>You can press the <b>%1</b> key to [...]
         <translation><h3>Ongi etorri VirtualBox-era!</h3><p>Leiho honetako ezker aldea zure ordenagailuko makina birtualen zerrenda bat da. Zerrenda orain hutsik dago oraindik ez duzulako makina birtualik sortu.<img src=:/welcome.png align=right/></p><p>Makina birtual berri bat sortzeko, sakatu <b>Berria</b> botoia leihoaren goialdeko tresna barra nagusian.</p><p><b>%1</b> tekla sakatu dezakezu berehalako laguntza lo [...]
     </message>
     <message>
-        <location line="+785"/>
+        <location line="+790"/>
         <source>Manager</source>
         <comment>Note: main window title which is pretended by the product name.</comment>
         <translation>Kudeatzailea</translation>
@@ -9823,7 +9823,7 @@
 <context>
     <name>UISession</name>
     <message>
-        <location filename="../src/runtime/UISession.cpp" line="+548"/>
+        <location filename="../src/runtime/UISession.cpp" line="+552"/>
         <source>Updating Guest Additions</source>
         <translation>Gonbidatu Gehigarriak Eguneratzen</translation>
     </message>
@@ -11196,7 +11196,7 @@
         <translation>Ireki Birtualizazio Heuskarria (%1)</translation>
     </message>
     <message>
-        <location filename="../src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp" line="-200"/>
+        <location filename="../src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp" line="-201"/>
         <location filename="../src/wizards/importappliance/UIWizardImportAppPageExpert.cpp" line="+1"/>
         <source>Appliance settings</source>
         <translation>Gailu ezarpenak</translation>
@@ -11416,7 +11416,7 @@
         <translation>Sortu</translation>
     </message>
     <message>
-        <location filename="../src/wizards/newvm/UIWizardNewVMPageBasic1.cpp" line="+308"/>
+        <location filename="../src/wizards/newvm/UIWizardNewVMPageBasic1.cpp" line="+309"/>
         <location filename="../src/wizards/newvm/UIWizardNewVMPageExpert.cpp" line="+227"/>
         <source>Name and operating system</source>
         <translation>Izena eta sistema eragilea</translation>
@@ -12782,7 +12782,7 @@
         <translation>Ohinarriko diska gogor hau zeharka erantsita dago hurrengo ezberdinketa diska gogorra erabiliz:</translation>
     </message>
     <message numerus="yes">
-        <location filename="../src/globals/VBoxGlobal.h" line="+237"/>
+        <location filename="../src/globals/VBoxGlobal.h" line="+242"/>
         <source>%n year(s)</source>
         <translation>
             <numerusform>%n urte</numerusform>
@@ -14129,7 +14129,7 @@
 <context>
     <name>VBoxUSBMenu</name>
     <message>
-        <location filename="../src/settings/machine/UIMachineSettingsUSB.cpp" line="-800"/>
+        <location filename="../src/settings/machine/UIMachineSettingsUSB.cpp" line="-805"/>
         <source><no devices available></source>
         <comment>USB devices</comment>
         <translation><ez dago gailurik eskuragarri></translation>
diff --git a/src/VBox/Frontends/VirtualBox/nls/VirtualBox_sl.ts b/src/VBox/Frontends/VirtualBox/nls/VirtualBox_sl.ts
index 0c35b0a..3a8597d 100644
--- a/src/VBox/Frontends/VirtualBox/nls/VirtualBox_sl.ts
+++ b/src/VBox/Frontends/VirtualBox/nls/VirtualBox_sl.ts
@@ -4,7 +4,7 @@
 <context>
     <name>@@@</name>
     <message>
-        <location filename="../src/globals/VBoxGlobal.cpp" line="+2064"/>
+        <location filename="../src/globals/VBoxGlobal.cpp" line="+2065"/>
         <source>English</source>
         <comment>Native language name</comment>
         <translation>Slovenščina</translation>
@@ -1134,7 +1134,7 @@
     <message>
         <location line="+2"/>
         <source>Cr&eate Shortcut on Desktop</source>
-        <translation>&Ustvari bližnjco na namizju</translation>
+        <translation>&Ustvari bližnjico na namizju</translation>
     </message>
     <message>
         <location line="+1"/>
@@ -1411,7 +1411,7 @@
 <context>
     <name>UIApplianceUnverifiedCertificateViewer</name>
     <message>
-        <location filename="../src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp" line="+260"/>
+        <location filename="../src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp" line="+261"/>
         <source>Unverifiable Certificate! Continue?</source>
         <translation>Potrdila ni mogoče preveriti! Nadaljuj?</translation>
     </message>
@@ -5704,7 +5704,7 @@
 <context>
     <name>UIMachineSettingsUSB</name>
     <message>
-        <location filename="../src/settings/machine/UIMachineSettingsUSB.cpp" line="+603"/>
+        <location filename="../src/settings/machine/UIMachineSettingsUSB.cpp" line="+608"/>
         <source>New Filter %1</source>
         <comment>usb</comment>
         <translation>Nov filter %1</translation>
@@ -7503,7 +7503,7 @@
 <context>
     <name>UIMiniToolBar</name>
     <message>
-        <location filename="../src/widgets/UIMiniToolBar.cpp" line="+294"/>
+        <location filename="../src/widgets/UIMiniToolBar.cpp" line="+298"/>
         <source>Always show the toolbar</source>
         <translation>Vedno prikaži orodno vrstico</translation>
     </message>
@@ -7851,7 +7851,7 @@
         <translation>Prikaži vrstico stanja</translation>
     </message>
     <message>
-        <location line="+342"/>
+        <location line="+347"/>
         <source>Select a virtual machine file</source>
         <translation>Izberite datoteko navideznega računalnika</translation>
     </message>
@@ -7867,15 +7867,15 @@
         <translation>Upravljalnik</translation>
     </message>
     <message>
-        <location line="-785"/>
+        <location line="-790"/>
         <source><h3>Welcome to VirtualBox!</h3><p>The left part of this window is  a list of all virtual machines on your computer. The list is empty now because you haven't created any virtual machines yet.<img src=:/welcome.png align=right/></p><p>In order to create a new virtual machine, press the <b>New</b> button in the main tool bar located at the top of the window.</p><p>You can press the <b>%1</b> key to [...]
-        <translation><h3>Dobrodošli v VirtulBox!</h3><p>Levi del tega okna je seznam vseh navideznih računalnikov na vašem računalniku. Seznam je sedaj prazen, ker še niste ustvarili nobenega navideznega računalnika.<img src=:/welcome.png align=right/></p><p>Da ustvarite nov navidezni računalnik, pritisnite gumb <b>Nov</b> v glavni orodni vrstici na vrhu okna.</p><p>Lahko pritisnete tipko <b>%1</b> za takojšnjo pomoč [...]
+        <translation><h3>Dobrodošli v VirtualBoxu!</h3><p>Levi del tega okna je seznam vseh navideznih računalnikov na vašem računalniku. Seznam je sedaj prazen, ker še niste ustvarili nobenega navideznega računalnika.<img src=:/welcome.png align=right/></p><p>Da ustvarite nov navidezni računalnik, pritisnite gumb <b>Nov</b> v glavni orodni vrstici na vrhu okna.</p><p>Lahko pritisnete tipko <b>%1</b> za takojšnjo pom [...]
     </message>
 </context>
 <context>
     <name>UISession</name>
     <message>
-        <location filename="../src/runtime/UISession.cpp" line="+548"/>
+        <location filename="../src/runtime/UISession.cpp" line="+552"/>
         <source>Updating Guest Additions</source>
         <translation>Posodabljanje Dodatkov za gosta</translation>
     </message>
@@ -9093,7 +9093,7 @@
         <translation>Odprta oblika navideznosti (%1)</translation>
     </message>
     <message>
-        <location filename="../src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp" line="-204"/>
+        <location filename="../src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp" line="-205"/>
         <location filename="../src/wizards/importappliance/UIWizardImportAppPageExpert.cpp" line="+1"/>
         <source>Appliance settings</source>
         <translation>Nastavitve namenske naprave</translation>
@@ -9277,7 +9277,7 @@
         <translation>Ustvari</translation>
     </message>
     <message>
-        <location filename="../src/wizards/newvm/UIWizardNewVMPageBasic1.cpp" line="+308"/>
+        <location filename="../src/wizards/newvm/UIWizardNewVMPageBasic1.cpp" line="+309"/>
         <location filename="../src/wizards/newvm/UIWizardNewVMPageExpert.cpp" line="+227"/>
         <source>Name and operating system</source>
         <translation>Ime in operacijski sistem</translation>
@@ -10389,7 +10389,7 @@
         <translation><b>Nobena datoteka odtisa diska ni na voljo</b></translation>
     </message>
     <message numerus="yes">
-        <location filename="../src/globals/VBoxGlobal.h" line="+237"/>
+        <location filename="../src/globals/VBoxGlobal.h" line="+242"/>
         <source>%n year(s)</source>
         <translation>
             <numerusform>%n leto</numerusform>
@@ -11829,7 +11829,7 @@
 <context>
     <name>VBoxUSBMenu</name>
     <message>
-        <location filename="../src/settings/machine/UIMachineSettingsUSB.cpp" line="-495"/>
+        <location filename="../src/settings/machine/UIMachineSettingsUSB.cpp" line="-500"/>
         <source><no devices available></source>
         <comment>USB devices</comment>
         <translation><na voljo ni nobena naprava></translation>
diff --git a/src/VBox/Frontends/VirtualBox/src/globals/VBoxGlobal.cpp b/src/VBox/Frontends/VirtualBox/src/globals/VBoxGlobal.cpp
index 5bfa61d..870514d 100644
--- a/src/VBox/Frontends/VirtualBox/src/globals/VBoxGlobal.cpp
+++ b/src/VBox/Frontends/VirtualBox/src/globals/VBoxGlobal.cpp
@@ -3949,6 +3949,9 @@ void VBoxGlobal::prepare()
     m_osRelease = determineOsRelease();
 #endif /* VBOX_WS_MAC */
 
+    /* Prepare converter: */
+    UIConverter::prepare();
+
     /* Create desktop-widget watchdog: */
     UIDesktopWidgetWatchdog::create();
 
@@ -4330,9 +4333,6 @@ void VBoxGlobal::prepare()
 
     mValid = true;
 
-    /* Prepare converter: */
-    UIConverter::prepare();
-
     /* Create medium-enumerator but don't do any immediate caching: */
     m_pMediumEnumerator = new UIMediumEnumerator;
     {
diff --git a/src/VBox/Frontends/VirtualBox/src/settings/machine/UIMachineSettingsUSB.cpp b/src/VBox/Frontends/VirtualBox/src/settings/machine/UIMachineSettingsUSB.cpp
index a777076..b509ec6 100644
--- a/src/VBox/Frontends/VirtualBox/src/settings/machine/UIMachineSettingsUSB.cpp
+++ b/src/VBox/Frontends/VirtualBox/src/settings/machine/UIMachineSettingsUSB.cpp
@@ -242,6 +242,11 @@ UIMachineSettingsUSB::UIMachineSettingsUSB()
 #endif /* VBOX_WITH_EHCI */
 }
 
+UIMachineSettingsUSB::~UIMachineSettingsUSB()
+{
+    delete mUSBDevicesMenu;
+}
+
 bool UIMachineSettingsUSB::isUSBEnabled() const
 {
     return mGbUSB->isChecked();
diff --git a/src/VBox/Frontends/VirtualBox/src/settings/machine/UIMachineSettingsUSB.h b/src/VBox/Frontends/VirtualBox/src/settings/machine/UIMachineSettingsUSB.h
index 0d24abf..0659d76 100644
--- a/src/VBox/Frontends/VirtualBox/src/settings/machine/UIMachineSettingsUSB.h
+++ b/src/VBox/Frontends/VirtualBox/src/settings/machine/UIMachineSettingsUSB.h
@@ -119,6 +119,7 @@ public:
     };
 
     UIMachineSettingsUSB();
+    ~UIMachineSettingsUSB();
 
     bool isUSBEnabled() const;
 
diff --git a/src/VBox/Frontends/VirtualBox/src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp b/src/VBox/Frontends/VirtualBox/src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp
index 6e03ebd..b75b8c1 100644
--- a/src/VBox/Frontends/VirtualBox/src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp
+++ b/src/VBox/Frontends/VirtualBox/src/wizards/importappliance/UIWizardImportAppPageBasic2.cpp
@@ -110,6 +110,7 @@ void UIWizardImportAppPageBasic2::retranslateUi()
             break;
         default:
             AssertFailed();
+            /* fall thru */
         case kCertText_Uninitialized:
             m_pCertLabel->setText("<uninitialized page>");
             break;
diff --git a/src/VBox/GuestHost/OpenGL/packer/pack_program.c b/src/VBox/GuestHost/OpenGL/packer/pack_program.c
index 3cd0621..8a62e01 100644
--- a/src/VBox/GuestHost/OpenGL/packer/pack_program.c
+++ b/src/VBox/GuestHost/OpenGL/packer/pack_program.c
@@ -17,7 +17,7 @@
 void PACK_APIENTRY crPackProgramParameters4dvNV(GLenum target, GLuint index, GLuint num, const GLdouble * params)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     int packet_length = sizeof(int) + sizeof(target) + sizeof(index) + sizeof(num) + num * 4 * sizeof(GLdouble);
 
     CR_GET_BUFFERED_POINTER(pc, packet_length);
@@ -35,7 +35,7 @@ void PACK_APIENTRY crPackProgramParameters4dvNV(GLenum target, GLuint index, GLu
 void PACK_APIENTRY crPackProgramParameters4fvNV(GLenum target, GLuint index, GLuint num, const GLfloat * params)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     int packet_length = sizeof(int) + sizeof(target) + sizeof(index) + sizeof(num) + num * 4 * sizeof(GLfloat);
 
     CR_GET_BUFFERED_POINTER(pc, packet_length);
@@ -161,7 +161,7 @@ void PACK_APIENTRY crPackVertexAttribs4ubvNV(GLuint index, GLsizei n, const GLub
 void PACK_APIENTRY crPackExecuteProgramNV(GLenum target, GLuint id, const GLfloat *params)
 {
     const int packet_length = 32;
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_PACKER_CONTEXT(pc);
 
     CR_GET_BUFFERED_POINTER(pc, packet_length);
@@ -180,7 +180,7 @@ void PACK_APIENTRY crPackExecuteProgramNV(GLenum target, GLuint id, const GLfloa
 void PACK_APIENTRY crPackLoadProgramNV(GLenum target, GLuint id, GLsizei len, const GLubyte *program)
 {
     const int packet_length = 20 + len;
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_PACKER_CONTEXT(pc);
 
     CR_GET_BUFFERED_POINTER(pc, packet_length);
@@ -207,7 +207,7 @@ void PACK_APIENTRY crPackRequestResidentProgramsNV(GLsizei n, const GLuint *ids)
 void PACK_APIENTRY crPackProgramNamedParameter4fNV (GLuint id, GLsizei len, const GLubyte * name, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     int packet_length = 32 + len;
 
     CR_GET_BUFFERED_POINTER(pc, packet_length);
@@ -227,7 +227,7 @@ void PACK_APIENTRY crPackProgramNamedParameter4fNV (GLuint id, GLsizei len, cons
 void PACK_APIENTRY crPackProgramNamedParameter4dNV (GLuint id, GLsizei len, const GLubyte * name, GLdouble x, GLdouble y, GLdouble z, GLdouble w)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     int packet_length = 48 + len;
 
     CR_GET_BUFFERED_POINTER(pc, packet_length);
@@ -262,7 +262,7 @@ crPackAreProgramsResidentNV(GLsizei n, const GLuint * programs,
                                                         int *writeback)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     int packet_length;
 
     (void) return_val; /* Caller must compute this from residences!!! */
@@ -290,7 +290,7 @@ void PACK_APIENTRY crPackGetProgramNamedParameterfvNV(GLuint id, GLsizei len, co
 {
     int packet_length = 32 + len;
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, packet_length);
     WRITE_DATA(0, GLint, packet_length);
     WRITE_DATA(4, GLenum, CR_GETPROGRAMNAMEDPARAMETERFVNV_EXTEND_OPCODE);
@@ -308,7 +308,7 @@ void PACK_APIENTRY crPackGetProgramNamedParameterdvNV(GLuint id, GLsizei len, co
 {
     int packet_length = 32 + len;
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, packet_length);
     WRITE_DATA(0, GLint, packet_length);
     WRITE_DATA(4, GLenum, CR_GETPROGRAMNAMEDPARAMETERDVNV_EXTEND_OPCODE);
@@ -325,7 +325,7 @@ void PACK_APIENTRY crPackGetProgramNamedParameterdvNV(GLuint id, GLsizei len, co
 
 void PACK_APIENTRY crPackDeleteProgramsARB(GLsizei n, const GLuint *ids)
 {
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     int packet_length = sizeof(GLenum) + sizeof(n) + n * sizeof(*ids);
 
     if (!ids)
@@ -343,7 +343,7 @@ void PACK_APIENTRY crPackDeleteProgramsARB(GLsizei n, const GLuint *ids)
 void PACK_APIENTRY  crPackProgramStringARB(GLenum target, GLenum format, GLsizei len, const void *string)
 {
     const int packet_length = 20 + len;
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_PACKER_CONTEXT(pc);
 
     CR_GET_BUFFERED_POINTER(pc, packet_length);
@@ -366,7 +366,7 @@ void PACK_APIENTRY  crPackProgramStringARB(GLenum target, GLenum format, GLsizei
 void PACK_APIENTRY crPackVertexAttrib4NbvARB(GLuint index, const GLbyte *v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 8);
     pc->current.c.vertexAttrib.b4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -382,7 +382,7 @@ void PACK_APIENTRY crPackVertexAttrib4NbvARB(GLuint index, const GLbyte *v)
 void PACK_APIENTRY crPackVertexAttrib4NivARB(GLuint index, const GLint *v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 20);
     pc->current.c.vertexAttrib.i4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -398,7 +398,7 @@ void PACK_APIENTRY crPackVertexAttrib4NivARB(GLuint index, const GLint *v)
 void PACK_APIENTRY crPackVertexAttrib4NsvARB(GLuint index, const GLshort *v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 12);
     pc->current.c.vertexAttrib.s4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -414,7 +414,7 @@ void PACK_APIENTRY crPackVertexAttrib4NsvARB(GLuint index, const GLshort *v)
 void PACK_APIENTRY crPackVertexAttrib4NubvARB(GLuint index, const GLubyte * v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 8);
     pc->current.c.vertexAttrib.ub4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -430,7 +430,7 @@ void PACK_APIENTRY crPackVertexAttrib4NubvARB(GLuint index, const GLubyte * v)
 void PACK_APIENTRY crPackVertexAttrib4NuivARB(GLuint index, const GLuint * v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 20);
     pc->current.c.vertexAttrib.ui4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -446,7 +446,7 @@ void PACK_APIENTRY crPackVertexAttrib4NuivARB(GLuint index, const GLuint * v)
 void PACK_APIENTRY crPackVertexAttrib4NusvARB(GLuint index, const GLushort * v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 12);
     pc->current.c.vertexAttrib.s4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -462,7 +462,7 @@ void PACK_APIENTRY crPackVertexAttrib4NusvARB(GLuint index, const GLushort * v)
 void PACK_APIENTRY crPackVertexAttrib4bvARB(GLuint index, const GLbyte * v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 8);
     pc->current.c.vertexAttrib.b4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -478,7 +478,7 @@ void PACK_APIENTRY crPackVertexAttrib4bvARB(GLuint index, const GLbyte * v)
 void PACK_APIENTRY crPackVertexAttrib4ivARB(GLuint index, const GLint * v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 20);
     pc->current.c.vertexAttrib.i4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -494,7 +494,7 @@ void PACK_APIENTRY crPackVertexAttrib4ivARB(GLuint index, const GLint * v)
 void PACK_APIENTRY crPackVertexAttrib4uivARB(GLuint index, const GLuint * v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 20);
     pc->current.c.vertexAttrib.ui4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -510,7 +510,7 @@ void PACK_APIENTRY crPackVertexAttrib4uivARB(GLuint index, const GLuint * v)
 void PACK_APIENTRY crPackVertexAttrib4usvARB(GLuint index, const GLushort * v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 12);
     pc->current.c.vertexAttrib.s4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
@@ -527,7 +527,7 @@ void PACK_APIENTRY crPackVertexAttrib4usvARB(GLuint index, const GLushort * v)
 void PACK_APIENTRY crPackVertexAttrib4ubvARB(GLuint index, const GLubyte * v)
 {
     CR_GET_PACKER_CONTEXT(pc);
-    unsigned char *data_ptr;
+    unsigned char *data_ptr = NULL;
     CR_GET_BUFFERED_POINTER(pc, 8);
     pc->current.c.vertexAttrib.ub4[index] = data_ptr + 4;
     pc->current.attribsUsedMask |= (1 << index);
diff --git a/src/VBox/GuestHost/OpenGL/packer/packer.py b/src/VBox/GuestHost/OpenGL/packer/packer.py
index 9e97920..b0b5ad6 100755
--- a/src/VBox/GuestHost/OpenGL/packer/packer.py
+++ b/src/VBox/GuestHost/OpenGL/packer/packer.py
@@ -151,7 +151,7 @@ def PrintFunc( func_name, params, is_swapped, can_have_pointers ):
         is_extended = 0
 
 
-    print("\tunsigned char *data_ptr;")
+    print("\tunsigned char *data_ptr = NULL;")
     print('\t(void) pc;')
     #if func_name == "Enable" or func_name == "Disable":
     #   print "\tCRASSERT(!pc->buffer.geometry_only); /* sanity check */"
diff --git a/src/VBox/GuestHost/OpenGL/packer/packer_bbox.py b/src/VBox/GuestHost/OpenGL/packer/packer_bbox.py
index 355962d..5abb6e2 100755
--- a/src/VBox/GuestHost/OpenGL/packer/packer_bbox.py
+++ b/src/VBox/GuestHost/OpenGL/packer/packer_bbox.py
@@ -167,7 +167,7 @@ def PrintFunction( func_name, extSuffix, num_coords, argtype,
 
 
 	print("\tCR_GET_PACKER_CONTEXT(pc);")
-	print("\tunsigned char *data_ptr;")
+	print("\tunsigned char *data_ptr = NULL;")
 
 	if normalized:
 		if argtype == "Nb":
diff --git a/src/VBox/HostDrivers/Support/Makefile.kmk b/src/VBox/HostDrivers/Support/Makefile.kmk
index 8c32589..9fb6450 100644
--- a/src/VBox/HostDrivers/Support/Makefile.kmk
+++ b/src/VBox/HostDrivers/Support/Makefile.kmk
@@ -206,8 +206,8 @@ SUPR3HardenedStatic_SOURCES.win = \
 	win/SUPR3HardenedMain-win.cpp \
 	win/SUPR3HardenedMainA-win.asm \
 	win/SUPR3HardenedMainImports-win.cpp \
-       win/SUPHardenedVerifyProcess-win.cpp \
-       win/SUPHardenedVerifyImage-win.cpp \
+	win/SUPHardenedVerifyProcess-win.cpp \
+	win/SUPHardenedVerifyImage-win.cpp \
 	$(VBOX_SUP_WIN_CERTS_FILE)
 SUPR3HardenedStatic_SOURCES.x86 += \
 	$(VBOX_PATH_RUNTIME_SRC)/common/asm/ASMMemFirstMismatchingU8.asm
@@ -217,10 +217,10 @@ SUPR3HardenedStatic_SOURCES.amd64 += \
 
 if "$(KBUILD_TARGET)" == "win" && defined(VBOX_WITH_HARDENING) ## @todo some of this move up.
  SUPR3HardenedStatic_DEFS += \
-  	IN_RT \
  	IN_RT_R3 \
  	IN_RT_STATIC \
  	IN_DIS \
+	IN_DIS_STATIC \
  	DIS_CORE_ONLY \
   	IPRT_NO_CRT \
  	RT_WITH_NOCRT_ALIASES \
@@ -419,6 +419,7 @@ endif
 if1of ($(KBUILD_TARGET), linux darwin solaris)
  SUPR3HardenedStatic_DEFS += \
  	IN_DIS \
+	IN_DIS_STATIC \
  	DIS_CORE_ONLY \
  	LOG_DISABLED
  SUPR3HardenedStatic_DEFS.linux += \
diff --git a/src/VBox/HostDrivers/Support/SUPDrv.cpp b/src/VBox/HostDrivers/Support/SUPDrv.cpp
index 1fb0e8e..001a10d 100644
--- a/src/VBox/HostDrivers/Support/SUPDrv.cpp
+++ b/src/VBox/HostDrivers/Support/SUPDrv.cpp
@@ -64,7 +64,6 @@
 #include <VBox/param.h>
 #include <VBox/log.h>
 #include <VBox/err.h>
-#include <VBox/vmm/hm_svm.h>
 #include <VBox/vmm/hm_vmx.h>
 
 #if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
@@ -4144,7 +4143,7 @@ int VBOXCALL supdrvQueryVTCapsInternal(uint32_t *pfCaps)
 
                     /* Query AMD-V features. */
                     ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
-                    if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
+                    if (fSvmFeatures & X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
                         *pfCaps |= SUPVTCAPS_NESTED_PAGING;
                 }
             }
diff --git a/src/VBox/HostDrivers/Support/SUPLibInternal.h b/src/VBox/HostDrivers/Support/SUPLibInternal.h
index 970b2d8..4d446f5 100644
--- a/src/VBox/HostDrivers/Support/SUPLibInternal.h
+++ b/src/VBox/HostDrivers/Support/SUPLibInternal.h
@@ -448,7 +448,7 @@ DECLHIDDEN(int)     supR3HardenedVerifyFixedFile(const char *pszFilename, bool f
 DECLHIDDEN(int)     supR3HardenedVerifyDir(const char *pszDirPath, bool fRecursive, bool fCheckFiles, PRTERRINFO pErrInfo);
 DECLHIDDEN(int)     supR3HardenedVerifyFile(const char *pszFilename, RTHCUINTPTR hNativeFile, bool fMaybe3rdParty,
                                             PRTERRINFO pErrInfo);
-#ifdef RT_OS_DARWIN
+#if defined(RT_OS_DARWIN) || defined(RT_OS_LINUX)
 DECLHIDDEN(int)     supR3HardenedVerifyFileFollowSymlinks(const char *pszFilename, RTHCUINTPTR hNativeFile, bool fMaybe3rdParty,
                                                           PRTERRINFO pErrInfo);
 #endif
diff --git a/src/VBox/HostDrivers/Support/SUPR3HardenedVerify.cpp b/src/VBox/HostDrivers/Support/SUPR3HardenedVerify.cpp
index c309593..ce0f288 100644
--- a/src/VBox/HostDrivers/Support/SUPR3HardenedVerify.cpp
+++ b/src/VBox/HostDrivers/Support/SUPR3HardenedVerify.cpp
@@ -1035,7 +1035,8 @@ static int supR3HardenedSetErrorN(int rc, PRTERRINFO pErrInfo, unsigned cMsgs, .
     return rc;
 }
 
-#ifdef RT_OS_DARWIN
+
+#if defined(RT_OS_DARWIN) || defined(RT_OS_LINUX)
 /**
  * Copies the four messages into the error buffer and returns @a rc.
  *
@@ -1052,7 +1053,7 @@ static int supR3HardenedSetError4(int rc, PRTERRINFO pErrInfo, const char *pszMs
 {
     return supR3HardenedSetErrorN(rc, pErrInfo, 4, pszMsg1, pszMsg2, pszMsg3, pszMsg4);
 }
-#endif /* RT_OS_DARWIN */
+#endif
 
 
 /**
@@ -1071,8 +1072,8 @@ static int supR3HardenedSetError3(int rc, PRTERRINFO pErrInfo, const char *pszMs
     return supR3HardenedSetErrorN(rc, pErrInfo, 3, pszMsg1, pszMsg2, pszMsg3);
 }
 
-#ifdef SOME_UNUSED_FUNCTION
 
+#ifdef SOME_UNUSED_FUNCTION
 /**
  * Copies the two messages into the error buffer and returns @a rc.
  *
@@ -1087,11 +1088,10 @@ static int supR3HardenedSetError2(int rc, PRTERRINFO pErrInfo, const char *pszMs
 {
     return supR3HardenedSetErrorN(rc, pErrInfo, 2, pszMsg1, pszMsg2);
 }
+#endif
 
-#endif /* SOME_UNUSED_FUNCTION */
-
-#ifdef RT_OS_DARWIN
 
+#if defined(RT_OS_DARWIN) || defined(RT_OS_LINUX)
 /**
  * Copies the error message to the error buffer and returns @a rc.
  *
@@ -1104,7 +1104,6 @@ static int supR3HardenedSetError(int rc, PRTERRINFO pErrInfo, const char *pszMsg
 {
     return supR3HardenedSetErrorN(rc, pErrInfo, 1, pszMsg);
 }
-
 #endif
 
 
@@ -1187,16 +1186,19 @@ static int supR3HardenedVerifyPathSanity(const char *pszPath, PRTERRINFO pErrInf
         return supR3HardenedSetError3(VERR_SUPLIB_PATH_TOO_SHORT, pErrInfo, "The path is too short: '", pszPath, "'");
 
     /*
-     * Check each component.  No parent references or double slashes.
+     * The root slash should be alone to avoid UNC confusion.
+     */
+    if (RTPATH_IS_SLASH(pszSrc[0]))
+        return supR3HardenedSetError3(VERR_SUPLIB_PATH_NOT_CLEAN, pErrInfo,
+                                      "The path is not clean of leading double slashes: '", pszPath, "'");
+    /*
+     * Check each component.  No parent references.
      */
     pInfo->cComponents = 0;
     pInfo->fDirSlash   = false;
     while (pszSrc[0])
     {
         /* Sanity checks. */
-        if (RTPATH_IS_SLASH(pszSrc[0])) /* can be relaxed if we care. */
-            return supR3HardenedSetError3(VERR_SUPLIB_PATH_NOT_CLEAN, pErrInfo,
-                                          "The path is not clean of double slashes: '", pszPath, "'");
         if (   pszSrc[0] == '.'
             && pszSrc[1] == '.'
             && RTPATH_IS_SLASH(pszSrc[2]))
@@ -1226,6 +1228,10 @@ static int supR3HardenedVerifyPathSanity(const char *pszPath, PRTERRINFO pErrInf
                 return supR3HardenedSetError3(VERR_SUPLIB_PATH_TOO_LONG, pErrInfo,
                                               "The path is too long: '", pszPath, "'");
         }
+
+        /* Skip double slashes. */
+        while (RTPATH_IS_SLASH(*pszSrc))
+            pszSrc++;
     }
 
     /* Terminate the string and enter its length. */
@@ -1493,8 +1499,18 @@ static int supR3HardenedVerifyFsObject(PCSUPR3HARDENEDFSOBJSTATE pFsObjState, bo
 
     /*
      * World must not have write access.  There is no relaxing this rule.
+     * Linux exception: Symbolic links are always give permission 0777, there
+     *                  is no lchmod or lchown APIs.  The permissions on parent
+     *                  directory that contains the symbolic link is what is
+     *                  decising wrt to modifying it.  (Caller is expected not
+     *                  to allow symbolic links in the first path component.)
      */
-    if (pFsObjState->Stat.st_mode & S_IWOTH)
+    if (   (pFsObjState->Stat.st_mode & S_IWOTH)
+# ifdef RT_OS_LINUX
+        && (   !S_ISLNK(pFsObjState->Stat.st_mode)
+            || !fSymlinksAllowed /* paranoia */)
+# endif
+       )
         return supR3HardenedSetError3(VERR_SUPLIB_WORLD_WRITABLE, pErrInfo,
                                       "World writable: '", pszPath, "'");
 
@@ -1838,7 +1854,7 @@ DECLHIDDEN(int) supR3HardenedVerifyFile(const char *pszFilename, RTHCUINTPTR hNa
 }
 
 
-#ifdef RT_OS_DARWIN
+#if defined(RT_OS_DARWIN) || defined(RT_OS_LINUX)
 /**
  * Verfies a file following symlinks.
  *
@@ -1993,7 +2009,7 @@ DECLHIDDEN(int) supR3HardenedVerifyFileFollowSymlinks(const char *pszFilename, R
 
     return VINF_SUCCESS;
 }
-#endif /* RT_OS_DARWIN */
+#endif /* RT_OS_DARWIN || RT_OS_LINUX */
 
 
 /**
diff --git a/src/VBox/HostDrivers/Support/posix/SUPR3HardenedMain-posix.cpp b/src/VBox/HostDrivers/Support/posix/SUPR3HardenedMain-posix.cpp
new file mode 100644
index 0000000..f8f4187
--- /dev/null
+++ b/src/VBox/HostDrivers/Support/posix/SUPR3HardenedMain-posix.cpp
@@ -0,0 +1,660 @@
+/* $Id: SUPR3HardenedMain-posix.cpp $ */
+/** @file
+ * VirtualBox Support Library - Hardened main(), posix bits.
+ */
+
+/*
+ * Copyright (C) 2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <VBox/err.h>
+#include <VBox/dis.h>
+#include <VBox/sup.h>
+
+#include <iprt/path.h>
+#include <iprt/string.h>
+#include <iprt/x86.h>
+
+#include <dlfcn.h>
+#include <sys/mman.h>
+#ifdef RT_OS_DARWIN
+# include <errno.h>
+# include <fcntl.h>
+# include <sys/stat.h> /* fstat() */
+# include <unistd.h>   /* readlink() */
+# include <stdlib.h>
+#elif defined(RT_OS_SOLARIS)
+# include <link.h>
+#endif
+#include <stdio.h>
+#include <stdint.h>
+
+#include "SUPLibInternal.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+
+/** For OS X. */
+#ifndef MAP_ANONYMOUS
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+/**
+ * Memory for code patching.
+ */
+#define DLOPEN_PATCH_MEMORY_SIZE   _4K
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Callback (SUPHARDENEDPOSIXHOOK::pfnResolv) for triggering lazy GOT resolver.
+ *
+ * This generally just calls the API in a harmless manner and triggers the lazy
+ * resolving of the symbol, ensuring a proper address in the GOT/PLT entry.
+ *
+ * On Solaris dlsym() will return the value in the GOT/PLT entry.  We don't wish
+ * to patch the lazy loader trampoline function, but rather the real function!
+ */
+typedef DECLCALLBACK(void) FNSUPHARDENEDSYMRESOLVE(void);
+/** Pointer to FNSUPHARDENEDSYMRESOLVE. */
+typedef FNSUPHARDENEDSYMRESOLVE *PFNSUPHARDENEDSYMRESOLVE;
+
+/**
+ * A hook descriptor.
+ */
+typedef struct SUPHARDENEDPOSIXHOOK
+{
+    /** The symbol to hook. */
+    const char              *pszSymbol;
+    /** The intercepting wrapper doing additional checks. */
+    PFNRT                    pfnHook;
+    /** Where to store the pointer to the code into patch memory
+     * which resumes the original call. */
+    PFNRT                   *ppfnRealResume;
+    /** Pointer to the resolver method used on Solaris. */
+    PFNSUPHARDENEDSYMRESOLVE pfnResolve;
+} SUPHARDENEDPOSIXHOOK;
+/** Pointer to a hook descriptor. */
+typedef SUPHARDENEDPOSIXHOOK *PSUPHARDENEDPOSIXHOOK;
+/** Pointer to a const hook descriptor. */
+typedef const SUPHARDENEDPOSIXHOOK *PCSUPHARDENEDPOSIXHOOK;
+
+/** dlopen() declaration. */
+typedef void *FNDLOPEN(const char *pszFilename, int fFlags);
+/** Pointer to dlopen. */
+typedef FNDLOPEN *PFNDLOPEN;
+
+#ifdef SUP_HARDENED_WITH_DLMOPEN
+/** dlmopen() declaration */
+typedef void *FNDLMOPEN(Lmid_t idLm, const char *pszFilename, int fFlags);
+/** Pointer to dlmopen. */
+typedef FNDLMOPEN *PFNDLMOPEN;
+#endif
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+static FNSUPHARDENEDSYMRESOLVE supR3HardenedPosixMonitorDlopenResolve;
+#ifdef SUP_HARDENED_WITH_DLMOPEN
+static FNSUPHARDENEDSYMRESOLVE supR3HardenedPosixMonitorDlmopenResolve;
+#endif
+
+/* SUPR3HardenedMainA-posix.asm: */
+DECLASM(void) supR3HardenedPosixMonitor_Dlopen(const char *pszFilename, int fFlags);
+#ifdef SUP_HARDENED_WITH_DLMOPEN
+DECLASM(void) supR3HardenedPosixMonitor_Dlmopen(Lmid_t idLm, const char *pszFilename, int fFlags);
+#endif
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+RT_C_DECLS_BEGIN
+/** Resume patch for dlopen(), jumped to form assembly stub. */
+DECLHIDDEN(PFNDLOPEN)  g_pfnDlopenReal  = NULL;
+#ifdef SUP_HARDENED_WITH_DLMOPEN
+/** Resume patch for dlmopen(), jumped to form assembly stub. */
+DECLHIDDEN(PFNDLMOPEN) g_pfnDlmopenReal = NULL;
+#endif
+RT_C_DECLS_END
+
+/** Memory allocated for the patches. */
+static uint8_t *g_pbExecMemory = NULL;
+/** Offset into the patch memory which is not used. */
+static uint32_t g_offExecMemory = 0;
+
+/**
+ * Array of hooks to install.
+ */
+static SUPHARDENEDPOSIXHOOK const g_aHooks[] =
+{
+    /* pszSymbol,       pfnHook,                                     ppfnRealResume,   pfnResolve */
+    { "dlopen",  (PFNRT)supR3HardenedPosixMonitor_Dlopen,  (PFNRT *)&g_pfnDlopenReal,  supR3HardenedPosixMonitorDlopenResolve  },
+#ifdef SUP_HARDENED_WITH_DLMOPEN
+    { "dlmopen", (PFNRT)supR3HardenedPosixMonitor_Dlmopen, (PFNRT *)&g_pfnDlmopenReal, supR3HardenedPosixMonitorDlmopenResolve }
+#endif
+};
+
+
+
+/**
+ * Verifies the given library for proper access rights for further loading
+ * into the process.
+ *
+ * @returns Flag whether the access rights of the library look sane and loading
+ *          it is not considered a security risk. Returns true if the library
+ *          looks sane, false otherwise.
+ * @param   pszFilename         The library to load, this can be an absolute or relative path
+ *                              or just the filename of the library when the default paths should
+ *                              be searched. NULL is allowed too to indicate opening the main
+ *                              binary.
+ */
+DECLASM(bool) supR3HardenedPosixMonitor_VerifyLibrary(const char *pszFilename)
+{
+    /*
+     * Giving NULL as the filename indicates opening the main program which is fine
+     * We are already loaded and executing after all.
+     *
+     * Filenames without any path component (whether absolute or relative) are allowed
+     * unconditionally too as the loader will only search the default paths configured by root.
+     */
+    bool fAllow = true;
+
+    if (   pszFilename
+        && strchr(pszFilename, '/') != NULL)
+    {
+#if defined(RT_OS_DARWIN) || defined(RT_OS_LINUX)
+        int rc = supR3HardenedVerifyFileFollowSymlinks(pszFilename, RTHCUINTPTR_MAX, true /* fMaybe3rdParty */,
+                                                       NULL /* pErrInfo */);
+#else
+        int rc = supR3HardenedVerifyFile(pszFilename, RTHCUINTPTR_MAX, true /* fMaybe3rdParty */,
+                                         NULL /* pErrInfo */);
+#endif
+
+        if (RT_FAILURE(rc))
+            fAllow = false;
+    }
+
+    return fAllow;
+}
+
+
+/**
+ * Returns the start address of the given symbol if found or NULL otherwise.
+ *
+ * @returns Start address of the symbol or NULL if not found.
+ * @param   pszSymbol           The symbol name.
+ * @param   pfnResolve          The resolver to call before trying to query the start address.
+ */
+static void *supR3HardenedMainPosixGetStartBySymbol(const char *pszSymbol, PFNSUPHARDENEDSYMRESOLVE pfnResolve)
+{
+#ifndef RT_OS_SOLARIS
+    return dlsym(RTLD_DEFAULT, pszSymbol);
+    RT_NOREF(pfnResolve);
+
+#else  /* RT_OS_SOLARIS */
+    /*
+     * Solaris is tricky as dlsym doesn't return the actual start address of
+     * the symbol but the start of the trampoline in the PLT of the caller.
+     *
+     * Disassemble the first jmp instruction to get at the entry in the global
+     * offset table where the actual address is stored.
+     *
+     * To counter lazy symbol resolving, we first have to call the API before
+     * trying to resolve and disassemble it.
+     */
+    pfnResolve();
+
+    uint8_t *pbSym = (uint8_t *)dlsym(RTLD_DEFAULT, pszSymbol);
+
+# ifdef RT_ARCH_AMD64
+    DISSTATE Dis;
+    uint32_t cbInstr = 1;
+    int rc = DISInstr(pbSym, DISCPUMODE_64BIT, &Dis, &cbInstr);
+    if (   RT_FAILURE(rc)
+        || Dis.pCurInstr->uOpcode != OP_JMP
+        || !(Dis.ModRM.Bits.Mod == 0 && Dis.ModRM.Bits.Rm == 5 /* wrt RIP */))
+        return NULL;
+
+    /* Extract start address. */
+    pbSym = (pbSym + cbInstr + Dis.Param1.uDisp.i32);
+    pbSym = (uint8_t *)*((uintptr_t *)pbSym);
+# else
+#  error "Unsupported architecture"
+# endif
+
+    return pbSym;
+#endif /* RT_OS_SOLARIS */
+}
+
+
+/**
+ * Allocates executable patch memory with the given constraints.
+ *
+ * @returns VBox status code.
+ * @param   cb                  Size of the patch memory in bytes.
+ * @param   pvHint              Where to try allocating nearby.
+ * @param   fRipRelAddr         Flag whether the executable memory must be within
+ *                              2GB before or after the hint as it will contain
+ *                              instructions using RIP relative addressing
+ */
+static uint8_t *supR3HardenedMainPosixExecMemAlloc(size_t cb, void *pvHint, bool fRipRelAddr)
+{
+    AssertReturn(cb < _1K, NULL);
+
+    /* Lazy allocation of exectuable memory. */
+    if (!g_pbExecMemory)
+    {
+        g_pbExecMemory = (uint8_t *)mmap(pvHint, DLOPEN_PATCH_MEMORY_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+                                         MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+        g_offExecMemory = 0;
+        if (g_pbExecMemory == MAP_FAILED)
+            return NULL;
+
+        memset(g_pbExecMemory, 0xcc, DLOPEN_PATCH_MEMORY_SIZE);
+    }
+
+    if (g_offExecMemory + cb >= DLOPEN_PATCH_MEMORY_SIZE)
+        return NULL;
+
+    uint8_t *pb = &g_pbExecMemory[g_offExecMemory];
+
+    if (fRipRelAddr)
+    {
+        /* Check that we allocated within 2GB of the hint. */
+        uintptr_t uPtrHint     = (uintptr_t)pvHint;
+        uintptr_t uPtrPatchMem = (uintptr_t)pb;
+        uintptr_t cbDistance   = uPtrHint < uPtrPatchMem
+                               ? uPtrPatchMem - uPtrHint
+                               : uPtrHint - uPtrPatchMem;
+
+        if (cbDistance >= _2G - _4K)
+            return NULL;
+    }
+
+    g_offExecMemory = RT_ALIGN_32(g_offExecMemory + cb, 16);
+    return pb;
+}
+
+
+/**
+ * Hooks the given method to execute the given one first.
+ *
+ * @returns VBox status code.
+ * @param   pszSymbol           The symbol to hook.
+ * @param   pfnHook             The hook to install.
+ * @param   ppfnReal            Where to store the pointer to entry point of the real method
+ *                              (somewhere in patch memory).
+ * @param   pfnResolve          The resolver to call before trying to query the start address.
+ */
+static int supR3HardenedMainPosixHookOne(const char *pszSymbol, PFNRT pfnHook, PFNRT *ppfnReal,
+                                         PFNSUPHARDENEDSYMRESOLVE pfnResolve)
+{
+    void *pfnTarget = supR3HardenedMainPosixGetStartBySymbol(pszSymbol, pfnResolve);
+    if (!pfnTarget)
+        return VERR_NOT_FOUND;
+
+    /*
+     * Make the target memory writeable to be able to insert the patch.
+     * Unprotect two pages in case the code crosses a page boundary.
+     */
+    void *pvTargetBase = (void *)(((uintptr_t)pfnTarget) & ~(uintptr_t)(_4K - 1));
+    int rcPsx = mprotect(pvTargetBase, 2 * _4K, PROT_WRITE | PROT_READ | PROT_EXEC);
+    if (rcPsx == -1)
+        return VERR_SUPLIB_TEXT_NOT_WRITEABLE;
+
+    uint8_t * const pbTarget = (uint8_t *)(uintptr_t)pfnTarget;
+
+    DISSTATE Dis;
+    uint32_t cbInstr;
+    uint32_t offJmpBack = 0;
+    uint32_t cbPatchMem = 0;
+
+#ifdef RT_ARCH_AMD64
+    /*
+     * Patch 64-bit hosts.
+     */
+    uint32_t cRipRelMovs = 0;
+
+    /* Just use the disassembler to skip 12 bytes or more, we might need to
+       rewrite mov instructions using RIP relative addressing. */
+    while (offJmpBack < 12)
+    {
+        cbInstr = 1;
+        int rc = DISInstr(pbTarget + offJmpBack, DISCPUMODE_64BIT, &Dis, &cbInstr);
+        if (   RT_FAILURE(rc)
+            || (Dis.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW)
+            || (   Dis.ModRM.Bits.Mod == 0
+                && Dis.ModRM.Bits.Rm  == 5 /* wrt RIP */
+                && Dis.pCurInstr->uOpcode != OP_MOV))
+            return VERR_SUPLIB_UNEXPECTED_INSTRUCTION;
+
+        if (Dis.ModRM.Bits.Mod == 0 && Dis.ModRM.Bits.Rm == 5 /* wrt RIP */)
+            cRipRelMovs++;
+
+        offJmpBack += cbInstr;
+        cbPatchMem += cbInstr;
+    }
+
+    cbPatchMem += 14; /* jmp qword [$+8 wrt RIP] + 8 byte address to jump to. */
+    cbPatchMem = RT_ALIGN_32(cbPatchMem, 8);
+
+    /* Allocate suitable exectuable memory available. */
+    bool fConvRipRelMovs = false;
+    uint8_t *pbPatchMem = supR3HardenedMainPosixExecMemAlloc(cbPatchMem, pbTarget, cRipRelMovs > 0);
+    if (!pbPatchMem)
+    {
+        /*
+         * Try to allocate memory again without the RIP relative mov addressing constraint
+         * Makes it a bit more difficult for us later on but there is no way around it.
+         * We need to increase the patch memory because we create two instructions for one
+         * (7 bytes for the RIP relative mov vs. 13 bytes for the two instructions replacing it ->
+         * need to allocate 6 bytes more per RIP relative mov).
+         */
+        fConvRipRelMovs = true;
+        if (cRipRelMovs > 0)
+            pbPatchMem = supR3HardenedMainPosixExecMemAlloc(cbPatchMem + cRipRelMovs * 6,
+                                                            pbTarget, false /*fRipRelAddr*/);
+
+        if (!pbPatchMem)
+            return VERR_NO_MEMORY;
+    }
+
+    /* Assemble the code for resuming the call.*/
+    *ppfnReal = (PFNRT)(uintptr_t)pbPatchMem;
+
+    /* Go through the instructions to patch and fixup any rip relative mov instructions. */
+    uint32_t offInsn = 0;
+    while (offInsn < offJmpBack)
+    {
+        cbInstr = 1;
+        int rc = DISInstr(pbTarget + offInsn, DISCPUMODE_64BIT, &Dis, &cbInstr);
+        if (   RT_FAILURE(rc)
+            || (Dis.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW))
+            return VERR_SUPLIB_UNEXPECTED_INSTRUCTION;
+
+        if (   Dis.ModRM.Bits.Mod == 0
+            && Dis.ModRM.Bits.Rm  == 5 /* wrt RIP */
+            && Dis.pCurInstr->uOpcode == OP_MOV)
+        {
+            /* Deduce destination register and write out new instruction. */
+            if (RT_UNLIKELY(!(   (Dis.Param1.fUse & (DISUSE_BASE | DISUSE_REG_GEN64))
+                              && (Dis.Param2.fUse & DISUSE_RIPDISPLACEMENT32))))
+                return VERR_SUPLIB_UNEXPECTED_INSTRUCTION;
+
+            uintptr_t uAddr = (uintptr_t)&pbTarget[offInsn + cbInstr] + (intptr_t)Dis.Param2.uDisp.i32;
+
+            if (fConvRipRelMovs)
+            {
+                /*
+                 * Create two instructions, first one moves the address as a constant to the destination register
+                 * and the second one loads the data from the memory into the destination register.
+                 */
+
+                *pbPatchMem++ = 0x48;
+                *pbPatchMem++ = 0xb8 + Dis.Param1.Base.idxGenReg;
+                *(uintptr_t *)pbPatchMem = uAddr;
+                pbPatchMem   += sizeof(uintptr_t);
+
+                *pbPatchMem++ = 0x48;
+                *pbPatchMem++ = 0x8b;
+                *pbPatchMem++ = (Dis.Param1.Base.idxGenReg << X86_MODRM_REG_SHIFT) | Dis.Param1.Base.idxGenReg;
+            }
+            else
+            {
+                intptr_t  iDispNew   = uAddr - (uintptr_t)&pbPatchMem[3 + sizeof(int32_t)];
+                Assert(iDispNew == (int32_t)iDispNew);
+
+                /* Assemble the mov to register instruction with the updated rip relative displacement. */
+                *pbPatchMem++ = 0x48;
+                *pbPatchMem++ = 0x8b;
+                *pbPatchMem++ = (Dis.Param1.Base.idxGenReg << X86_MODRM_REG_SHIFT) | 5;
+                *(int32_t *)pbPatchMem = (int32_t)iDispNew;
+                pbPatchMem   += sizeof(int32_t);
+            }
+        }
+        else
+        {
+            memcpy(pbPatchMem, pbTarget + offInsn, cbInstr);
+            pbPatchMem += cbInstr;
+        }
+
+        offInsn += cbInstr;
+    }
+
+    *pbPatchMem++ = 0xff; /* jmp qword [$+8 wrt RIP] */
+    *pbPatchMem++ = 0x25;
+    *(uint32_t *)pbPatchMem = (uint32_t)(RT_ALIGN_PT(pbPatchMem + 4, 8, uint8_t *) - (pbPatchMem + 4));
+    pbPatchMem = RT_ALIGN_PT(pbPatchMem + 4, 8, uint8_t *);
+    *(uint64_t *)pbPatchMem = (uintptr_t)&pbTarget[offJmpBack];
+
+    /* Assemble the patch. */
+    Assert(offJmpBack >= 12);
+    pbTarget[0]  = 0x48; /* mov rax, qword */
+    pbTarget[1]  = 0xb8;
+    *(uintptr_t *)&pbTarget[2] = (uintptr_t)pfnHook;
+    pbTarget[10] = 0xff; /* jmp rax */
+    pbTarget[11] = 0xe0;
+
+#else  /* !RT_ARCH_AMD64 */
+    /*
+     * Patch 32-bit hosts.
+     */
+    /* Just use the disassembler to skip 5 bytes or more. */
+    while (offJmpBack < 5)
+    {
+        cbInstr = 1;
+        int rc = DISInstr(pbTarget + offJmpBack, DISCPUMODE_32BIT, &Dis, &cbInstr);
+        if (   RT_FAILURE(rc)
+            || (   (Dis.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW)
+                && Dis.pCurInstr->uOpcode != OP_CALL))
+            return VERR_SUPLIB_UNEXPECTED_INSTRUCTION;
+
+        if (   Dis.pCurInstr->uOpcode == OP_CALL
+            && (Dis.pCurInstr->fOpType & DISOPTYPE_RELATIVE_CONTROLFLOW))
+            cbPatchMem += 10; /* push imm32 + jmp rel32 */
+        else
+            cbPatchMem += cbInstr;
+
+        offJmpBack += cbInstr;
+    }
+
+    /* Allocate suitable exectuable memory available. */
+    uint8_t *pbPatchMem = supR3HardenedMainPosixExecMemAlloc(cbPatchMem, pbTarget, false /* fRipRelAddr */);
+    if (!pbPatchMem)
+        return VERR_NO_MEMORY;
+
+    /* Assemble the code for resuming the call.*/
+    *ppfnReal = (PFNRT)pbPatchMem;
+
+    /* Go through the instructions to patch and fixup any relative call instructions. */
+    uint32_t offInsn = 0;
+    while (offInsn < offJmpBack)
+    {
+        cbInstr = 1;
+        int rc = DISInstr(pbTarget + offInsn, DISCPUMODE_32BIT, &Dis, &cbInstr);
+        if (   RT_FAILURE(rc)
+            || (   (Dis.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW)
+                && Dis.pCurInstr->uOpcode != OP_CALL))
+            return VERR_SUPLIB_UNEXPECTED_INSTRUCTION;
+
+        if (   Dis.pCurInstr->uOpcode == OP_CALL
+            && (Dis.pCurInstr->fOpType & DISOPTYPE_RELATIVE_CONTROLFLOW))
+        {
+            /*
+             * Don't use a call instruction directly but push the original return address
+             * onto the stack and use a relative jump to the call target.
+             * The reason here is that on Linux the called method saves the return
+             * address from the stack which will be different from the original because
+             * the code is executed from our patch memory.
+             *
+             * Luckily the call instruction is 5 bytes long which means it is always the
+             * last instruction to patch and we don't need to return from the call
+             * to patch memory anyway but can use this method to resume the original call.
+             */
+            AssertReturn(offInsn + cbInstr >= offJmpBack, VERR_SUPLIB_UNEXPECTED_INSTRUCTION); /* Must be last instruction! */
+
+            /* push return address */
+            uint32_t const uAddrReturn = (uintptr_t)&pbTarget[offInsn + cbInstr]; /* The return address to push to the stack. */
+
+            *pbPatchMem++           = 0x68; /* push dword */
+            *(uint32_t *)pbPatchMem = uAddrReturn;
+            pbPatchMem             += sizeof(uint32_t);
+
+            /* jmp rel32 to the call target */
+            uintptr_t const uAddr      = uAddrReturn + (int32_t)Dis.Param1.uValue;
+            int32_t   const i32DispNew = uAddr - (uintptr_t)&pbPatchMem[5];
+
+            *pbPatchMem++          = 0xe9; /* jmp rel32 */
+            *(int32_t *)pbPatchMem = i32DispNew;
+            pbPatchMem            += sizeof(int32_t);
+        }
+        else
+        {
+            memcpy(pbPatchMem, pbTarget + offInsn, cbInstr);
+            pbPatchMem += cbInstr;
+        }
+
+        offInsn += cbInstr;
+    }
+
+    *pbPatchMem++ = 0xe9; /* jmp rel32 */
+    *(uint32_t *)pbPatchMem = (uintptr_t)&pbTarget[offJmpBack] - ((uintptr_t)pbPatchMem + 4);
+
+    /* Assemble the patch. */
+    Assert(offJmpBack >= 5);
+    pbTarget[0] = 0xe9;
+    *(uint32_t *)&pbTarget[1] = (uintptr_t)pfnHook - (uintptr_t)&pbTarget[1+4];
+#endif /* !RT_ARCH_AMD64 */
+
+    /*
+     * Re-seal target (ASSUMING that the shared object either has page aligned
+     * section or that the patch target is far enough from the writable parts).
+     */
+    rcPsx = mprotect(pvTargetBase, 2 * _4K, PROT_READ | PROT_EXEC);
+    if (rcPsx == -1)
+        return VERR_SUPLIB_TEXT_NOT_SEALED;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @callback_method_impl{FNSUPHARDENEDSYMRESOLVE, dlopen}
+ */
+static DECLCALLBACK(void) supR3HardenedPosixMonitorDlopenResolve(void)
+{
+    /* Make harmless dlopen call. */
+    void *pv = dlopen(NULL, RTLD_LAZY);
+    if (pv)
+        dlclose(pv);
+}
+
+
+#ifdef SUP_HARDENED_WITH_DLMOPEN
+/**
+ * @callback_method_impl{FNSUPHARDENEDSYMRESOLVE, dlmopen}
+ */
+static DECLCALLBACK(void) supR3HardenedPosixMonitorDlmopenResolve(void)
+{
+    /* Make harmless dlmopen call. */
+    void *pv = dlmopen(LM_ID_BASE, NULL, RTLD_LAZY);
+    if (pv)
+        dlclose(pv);
+}
+#endif
+
+
+/**
+ * Hardening initialization for POSIX compatible hosts.
+ *
+ * @returns nothing.
+ *
+ * @note Doesn't return on error.
+ */
+DECLHIDDEN(void) supR3HardenedPosixInit(void)
+{
+    for (unsigned i = 0; i < RT_ELEMENTS(g_aHooks); i++)
+    {
+        PCSUPHARDENEDPOSIXHOOK pHook = &g_aHooks[i];
+        int rc = supR3HardenedMainPosixHookOne(pHook->pszSymbol, pHook->pfnHook, pHook->ppfnRealResume, pHook->pfnResolve);
+        if (RT_FAILURE(rc))
+            supR3HardenedFatalMsg("supR3HardenedPosixInit", kSupInitOp_Integrity, rc,
+                                  "Failed to hook the %s interface", pHook->pszSymbol);
+    }
+}
+
+
+
+/*
+ * assert.cpp
+ *
+ * ASSUMES working DECLHIDDEN or there will be symbol confusion!
+ */
+
+RTDATADECL(char)                     g_szRTAssertMsg1[1024];
+RTDATADECL(char)                     g_szRTAssertMsg2[4096];
+RTDATADECL(const char * volatile)    g_pszRTAssertExpr;
+RTDATADECL(const char * volatile)    g_pszRTAssertFile;
+RTDATADECL(uint32_t volatile)        g_u32RTAssertLine;
+RTDATADECL(const char * volatile)    g_pszRTAssertFunction;
+
+RTDECL(bool) RTAssertMayPanic(void)
+{
+    return true;
+}
+
+
+RTDECL(void) RTAssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
+{
+    /*
+     * Fill in the globals.
+     */
+    g_pszRTAssertExpr       = pszExpr;
+    g_pszRTAssertFile       = pszFile;
+    g_pszRTAssertFunction   = pszFunction;
+    g_u32RTAssertLine       = uLine;
+    snprintf(g_szRTAssertMsg1, sizeof(g_szRTAssertMsg1),
+             "\n!!Assertion Failed!!\n"
+             "Expression: %s\n"
+             "Location  : %s(%d) %s\n",
+             pszExpr, pszFile, uLine, pszFunction);
+}
+
+
+RTDECL(void) RTAssertMsg2V(const char *pszFormat, va_list va)
+{
+    vsnprintf(g_szRTAssertMsg2, sizeof(g_szRTAssertMsg2), pszFormat, va);
+    if (g_enmSupR3HardenedMainState < SUPR3HARDENEDMAINSTATE_CALLED_TRUSTED_MAIN)
+        supR3HardenedFatalMsg(g_pszRTAssertExpr, kSupInitOp_Misc, VERR_INTERNAL_ERROR,
+                              "%s%s", g_szRTAssertMsg1,  g_szRTAssertMsg2);
+    else
+        supR3HardenedError(VERR_INTERNAL_ERROR, false/*fFatal*/, "%s%s", g_szRTAssertMsg1,  g_szRTAssertMsg2);
+}
+
diff --git a/src/VBox/HostDrivers/Support/posix/SUPR3HardenedMainA-posix.asm b/src/VBox/HostDrivers/Support/posix/SUPR3HardenedMainA-posix.asm
new file mode 100644
index 0000000..7a6d369
--- /dev/null
+++ b/src/VBox/HostDrivers/Support/posix/SUPR3HardenedMainA-posix.asm
@@ -0,0 +1,160 @@
+; $Id: SUPR3HardenedMainA-posix.asm $
+;; @file
+; VirtualBox Support Library - Hardened main(), Posix assembly bits.
+;
+
+;
+; Copyright (C) 2017 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+; VirtualBox OSE distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+
+
+;*******************************************************************************
+;* Header Files                                                                *
+;*******************************************************************************
+%include "iprt/asmdefs.mac"
+
+
+;*********************************************************************************************************************************
+;*  External Symbols                                                                                                             *
+;*********************************************************************************************************************************
+; External code.
+BEGINCODE
+extern NAME(supR3HardenedPosixMonitor_VerifyLibrary)
+
+; External data
+BEGINDATA
+extern NAME(g_pfnDlopenReal)
+%ifdef SUP_HARDENED_WITH_DLMOPEN
+extern NAME(g_pfnDlmopenReal)
+%endif
+
+
+
+BEGINCODE
+
+;;
+; Wrapper for dlopen() handing the call over to the file verification code
+; and resuming the call if we get a green light to load the library.
+;
+align 16
+BEGINPROC supR3HardenedPosixMonitor_Dlopen
+        push    xBP
+        mov     xBP, xSP
+
+%ifdef RT_ARCH_AMD64
+        ; Save parameters on the stack
+        push    rdi
+        push    rsi
+%else
+        sub     esp, 4                  ; 16-byte stack alignment before call.
+        push    dword [xBP + 08h]       ; first parameter.
+%endif
+
+        ;
+        ; Call the verification method.
+        ;
+        call    NAME(supR3HardenedPosixMonitor_VerifyLibrary)
+
+        ;
+        ; Restore parameters for the next call and get the stack back to the
+        ; original state.
+        ;
+%ifdef RT_ARCH_AMD64
+        pop     rsi
+        pop     rdi
+%endif
+        leave
+
+        ; Check the result and resume the call if the result is positive,
+        ; otherwise clean up and return NULL
+        test    al, al
+        je short .failed
+
+        ; Resume the original dlopen call by jumping into the saved code.
+        jmp     [NAME(g_pfnDlopenReal) xWrtRIP]
+
+.failed:
+        ;
+        ; Don't use leave here as we didn't use the enter instruction. Just clear
+        ; xAX and return
+        ;
+        xor     xAX, xAX
+        ret
+ENDPROC   supR3HardenedPosixMonitor_Dlopen
+
+
+%ifdef SUP_HARDENED_WITH_DLMOPEN
+;;
+; Wrapper for dlmopen() handing the call over to the file verification code
+; and resuming the call if we get a green light to load the library.
+;
+align 16
+BEGINPROC supR3HardenedPosixMonitor_Dlmopen
+        push    xBP
+        mov     xBP, xSP
+
+%ifdef RT_ARCH_AMD64
+        sub     rsp, 8                  ; 16-byte stack alignment before call.
+
+        ; Save parameters on the stack
+        push    rdi
+        push    rsi
+        push    rdx
+
+        mov     rdi, rsi                ; Move the second parameter to the front
+%else
+        sub     esp, 4                  ; 16-byte stack alignment before call.
+        push    dword [xBP + 0ch]       ; Move the second parameter to the front
+%endif
+
+        ;
+        ; Call the verification method.
+        ;
+        call    NAME(supR3HardenedPosixMonitor_VerifyLibrary)
+
+        ;
+        ; Restore parameters for the next call and get the stack back to the
+        ; original state.
+        ;
+%ifdef RT_ARCH_AMD64
+        pop     rdx
+        pop     rsi
+        pop     rdi
+%endif
+        leave
+
+        ; Check the result and resume the call if the result is positive,
+        ; otherwise clean up and return NULL
+        test    al, al
+        je short .failed
+
+        ; Resume the original dlopen call by jumping into the saved code.
+        jmp     [NAME(g_pfnDlmopenReal) xWrtRIP]
+
+.failed:
+        ;
+        ; Don't use leave here as we didn't use the enter instruction. Just clear
+        ; xAX and return
+        ;
+        xor     xAX, xAX
+        ret
+ENDPROC   supR3HardenedPosixMonitor_Dlmopen
+%endif
+
diff --git a/src/VBox/HostDrivers/Support/win/SUPLib-win.cpp b/src/VBox/HostDrivers/Support/win/SUPLib-win.cpp
index f10881e..4e0cc32 100644
--- a/src/VBox/HostDrivers/Support/win/SUPLib-win.cpp
+++ b/src/VBox/HostDrivers/Support/win/SUPLib-win.cpp
@@ -645,8 +645,8 @@ int suplibOsTerm(PSUPLIBDATA pThis)
      */
     if (pThis->hDevice != NULL)
     {
-        if (!NtClose((HANDLE)pThis->hDevice))
-            AssertFailed();
+        NTSTATUS rcNt = NtClose((HANDLE)pThis->hDevice);
+        Assert(NT_SUCCESS(rcNt)); RT_NOREF(rcNt);
         pThis->hDevice = NIL_RTFILE; /* yes, that's right */
     }
 
diff --git a/src/VBox/HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf-win.cpp b/src/VBox/HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf-win.cpp
index 67a469c..2abfbc6 100644
--- a/src/VBox/HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf-win.cpp
+++ b/src/VBox/HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf-win.cpp
@@ -1103,6 +1103,7 @@ static NDIS_STATUS vboxNetLwfWinAttach(IN NDIS_HANDLE hFilter, IN NDIS_HANDLE hD
             vboxNetLwfWinFreePools(pModuleCtx, i);
             NdisFreeIoWorkItem(pModuleCtx->hWorkItem);
             NdisFreeMemory(pModuleCtx, 0, 0);
+            vboxNetLwfLogErrorEvent(IO_ERR_INSUFFICIENT_RESOURCES, NDIS_STATUS_RESOURCES, 7);
             return NDIS_STATUS_RESOURCES;
         }
         Log4(("vboxNetLwfWinAttach: allocated NBL+NB pool (data size=%u) 0x%p\n",
@@ -1125,6 +1126,7 @@ static NDIS_STATUS vboxNetLwfWinAttach(IN NDIS_HANDLE hFilter, IN NDIS_HANDLE hD
         LogError(("vboxNetLwfWinAttach: NdisAllocateNetBufferListPool failed\n"));
         NdisFreeIoWorkItem(pModuleCtx->hWorkItem);
         NdisFreeMemory(pModuleCtx, 0, 0);
+        vboxNetLwfLogErrorEvent(IO_ERR_INSUFFICIENT_RESOURCES, NDIS_STATUS_RESOURCES, 7);
         return NDIS_STATUS_RESOURCES;
     }
     Log4(("vboxNetLwfWinAttach: allocated NBL+NB pool 0x%p\n", pModuleCtx->hPool));
diff --git a/src/VBox/HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf.inf b/src/VBox/HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf.inf
index 3c75903..dc396ed 100644
--- a/src/VBox/HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf.inf
+++ b/src/VBox/HostDrivers/VBoxNetFlt/win/drv/VBoxNetLwf.inf
@@ -84,7 +84,7 @@ VBoxNetLwf.sys,,,2
 
 [VBoxNetLwf.ndi.AddReg]
 HKR, Ndi, HelpText, , %VBoxNetLwf_HELP%
-HKR, Ndi,            ClsID,              0, {f374d1a0-bf08-4bdc-9cb2-c15ddaeef955}
+;HKR, Ndi,            ClsID,              0, {f374d1a0-bf08-4bdc-9cb2-c15ddaeef955}
 ;HKR, Ndi,            ComponentDll,        , VBoxNetLwfNobj.dll
 HKR, Ndi,            FilterClass,         , compression
 HKR, Ndi,            FilterType,   0x10001, 0x2
diff --git a/src/VBox/HostServices/SharedFolders/vbsf.cpp b/src/VBox/HostServices/SharedFolders/vbsf.cpp
index 5d3863b..4dc0e76 100644
--- a/src/VBox/HostServices/SharedFolders/vbsf.cpp
+++ b/src/VBox/HostServices/SharedFolders/vbsf.cpp
@@ -428,14 +428,14 @@ static int vbsfConvertFileOpenFlags(bool fWritable, unsigned fShflFlags, RTFMODE
  * @param  pClient               Data structure describing the client accessing the shared folder
  * @param  root                  The index of the shared folder in the table of mappings.
  * @param  pszPath               Path to the file or folder on the host.
- * @param  pParms->CreateFlags   Creation or open parameters, see include/VBox/shflsvc.h
- * @param  pParms->Info          When a new file is created this specifies the initial parameters.
+ * @param  pParms @a CreateFlags Creation or open parameters, see include/VBox/shflsvc.h
+ * @param  pParms @a Info        When a new file is created this specifies the initial parameters.
  *                               When a file is created or overwritten, it also specifies the
  *                               initial size.
- * @retval pParms->Result        Shared folder status code, see include/VBox/shflsvc.h
- * @retval pParms->Handle        On success the (shared folder) handle of the file opened or
+ * @retval pParms @a Resulte     Shared folder status code, see include/VBox/shflsvc.h
+ * @retval pParms @a Handle      On success the (shared folder) handle of the file opened or
  *                               created
- * @retval pParms->Info          On success the parameters of the file opened or created
+ * @retval pParms @a Info        On success the parameters of the file opened or created
  */
 static int vbsfOpenFile(SHFLCLIENTDATA *pClient, SHFLROOT root, const char *pszPath, SHFLCREATEPARMS *pParms)
 {
@@ -608,13 +608,14 @@ static int vbsfOpenFile(SHFLCLIENTDATA *pClient, SHFLROOT root, const char *pszP
  * Open a folder or create and open a new one.
  *
  * @returns IPRT status code
+ * @param  pClient               Data structure describing the client accessing the shared folder
  * @param  root                  The index of the shared folder in the table of mappings.
  * @param  pszPath               Path to the file or folder on the host.
- * @param  pParms->CreateFlags   Creation or open parameters, see include/VBox/shflsvc.h
- * @retval pParms->Result        Shared folder status code, see include/VBox/shflsvc.h
- * @retval pParms->Handle        On success the (shared folder) handle of the folder opened or
+ * @param  pParms @a CreateFlags Creation or open parameters, see include/VBox/shflsvc.h
+ * @retval pParms @a Result      Shared folder status code, see include/VBox/shflsvc.h
+ * @retval pParms @a Handle      On success the (shared folder) handle of the folder opened or
  *                               created
- * @retval pParms->Info          On success the parameters of the folder opened or created
+ * @retval pParms @a Info        On success the parameters of the folder opened or created
  *
  * @note folders are created with fMode = 0777
  */
@@ -830,11 +831,11 @@ void testCreate(RTTEST hTest)
  *                         indexed by root.
  * @param   cbPath         Presumably the length of the path in pPath.  Actually
  *                         ignored, as pPath contains a length parameter.
- * @param   pParms->Info   If a new file is created or an old one overwritten, set
+ * @param   pParms @a Info If a new file is created or an old one overwritten, set
  *                         these attributes
- * @retval  pParms->Result Shared folder result code, see include/VBox/shflsvc.h
- * @retval  pParms->Handle Shared folder handle to the newly opened file
- * @retval  pParms->Info   Attributes of the file or folder opened
+ * @retval  pParms @a Result Shared folder result code, see include/VBox/shflsvc.h
+ * @retval  pParms @a Handle Shared folder handle to the newly opened file
+ * @retval  pParms @a Info Attributes of the file or folder opened
  *
  * @note This function returns success if a "non-exceptional" error occurred,
  *       such as "no such file".  In this case, the caller should check the
diff --git a/src/VBox/Main/src-client/ConsoleImpl2.cpp b/src/VBox/Main/src-client/ConsoleImpl2.cpp
index 2bea66c..393064a 100644
--- a/src/VBox/Main/src-client/ConsoleImpl2.cpp
+++ b/src/VBox/Main/src-client/ConsoleImpl2.cpp
@@ -1448,6 +1448,7 @@ int Console::i_configConstructorInner(PUVM pUVM, PVM pVM, AutoWriteLock *pAlock)
         {
             default:
                 Assert(false);
+                /* fall thru */
             case ChipsetType_PIIX3:
                 InsertConfigNode(pDevices, "pci", &pDev);
                 uHbcPCIAddress = (0x0 << 16) | 0;
diff --git a/src/VBox/Main/src-client/ConsoleImplTeleporter.cpp b/src/VBox/Main/src-client/ConsoleImplTeleporter.cpp
index 0e9c9d3..b296b38 100644
--- a/src/VBox/Main/src-client/ConsoleImplTeleporter.cpp
+++ b/src/VBox/Main/src-client/ConsoleImplTeleporter.cpp
@@ -888,6 +888,7 @@ Console::i_teleporterSrcThreadWrapper(RTTHREAD hThreadSelf, void *pvUser)
 
                 default:
                     AssertMsgFailed(("%s\n", VMR3GetStateName(enmVMState)));
+                    /* fall thru */
                 case VMSTATE_SUSPENDED:
                 case VMSTATE_SUSPENDED_LS:
                 case VMSTATE_SUSPENDING:
diff --git a/src/VBox/Main/src-server/HostImpl.cpp b/src/VBox/Main/src-server/HostImpl.cpp
index 3c72427..b93a9d1 100644
--- a/src/VBox/Main/src-server/HostImpl.cpp
+++ b/src/VBox/Main/src-server/HostImpl.cpp
@@ -155,16 +155,7 @@ typedef SOLARISDVD *PSOLARISDVD;
 # include "netif.h"
 #endif
 
-/* XXX Solaris: definitions in /usr/include/sys/regset.h clash with hm_svm.h */
-#undef DS
-#undef ES
-#undef CS
-#undef SS
-#undef FS
-#undef GS
-
 #include <VBox/usb.h>
-#include <VBox/vmm/hm_svm.h>
 #include <VBox/err.h>
 #include <VBox/settings.h>
 #include <VBox/sup.h>
@@ -364,7 +355,7 @@ HRESULT Host::init(VirtualBox *aParent)
                     {
                         uint32_t fSVMFeaturesEdx;
                         ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSVMFeaturesEdx);
-                        if (fSVMFeaturesEdx & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
+                        if (fSVMFeaturesEdx & X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
                             m->fNestedPagingSupported = true;
                     }
                 }
diff --git a/src/VBox/Main/src-server/HostUSBDeviceImpl.cpp b/src/VBox/Main/src-server/HostUSBDeviceImpl.cpp
index e6fe8a9..2815ab5 100644
--- a/src/VBox/Main/src-server/HostUSBDeviceImpl.cpp
+++ b/src/VBox/Main/src-server/HostUSBDeviceImpl.cpp
@@ -1859,6 +1859,7 @@ bool HostUSBDevice::i_setState(HostUSBDeviceState aNewState,
                 case kHostUSBDeviceState_Capturable:
                 case kHostUSBDeviceState_Unused:
                     fFilters = true;
+                    /* fall thru */
                 case kHostUSBDeviceState_PhysDetached:
                     Assert(aNewPendingState == kHostUSBDeviceState_Invalid);
                     Assert(aNewSubState == kHostUSBDeviceSubState_Default);
@@ -1878,6 +1879,7 @@ bool HostUSBDevice::i_setState(HostUSBDeviceState aNewState,
                 /* Host changes. */
                 case kHostUSBDeviceState_Unused:
                     fFilters = true; /* Wildcard only... */
+                    /* fall thru */
                 case kHostUSBDeviceState_UsedByHost:
                 case kHostUSBDeviceState_PhysDetached:
                     Assert(aNewPendingState == kHostUSBDeviceState_Invalid);
diff --git a/src/VBox/Main/src-server/win/NetIf-win.cpp b/src/VBox/Main/src-server/win/NetIf-win.cpp
index 97846d0..8588cc3 100644
--- a/src/VBox/Main/src-server/win/NetIf-win.cpp
+++ b/src/VBox/Main/src-server/win/NetIf-win.cpp
@@ -1804,7 +1804,7 @@ int NetIfList(std::list<ComObjPtr<HostNetworkInterface> > &list)
     for (int tries = 0; tries < 3 && dwRc == ERROR_BUFFER_OVERFLOW; ++tries)
     {
         /* Get more memory and try again. */
-        free(pAddresses);
+        RTMemFree(pAddresses);
         pAddresses = (PIP_ADAPTER_ADDRESSES)RTMemAlloc(uBufLen);
         if (!pAddresses)
             return HRESULT_FROM_WIN32(ERROR_NOT_ENOUGH_MEMORY);
diff --git a/src/VBox/Runtime/Makefile.kmk b/src/VBox/Runtime/Makefile.kmk
index c52fef7..b010045 100644
--- a/src/VBox/Runtime/Makefile.kmk
+++ b/src/VBox/Runtime/Makefile.kmk
@@ -707,6 +707,8 @@ RuntimeR3_SOURCES.x86 += \
 	common/asm/ASMSetXcr0.asm \
 	common/asm/ASMXSave.asm \
 	common/asm/ASMXRstor.asm \
+	common/asm/ASMFxSave.asm \
+	common/asm/ASMFxRstor.asm \
 	common/asm/ASMSerializeInstruction-cpuid.asm \
 	common/asm/ASMSerializeInstruction-iret.asm \
 	common/asm/ASMSerializeInstruction-rdtscp.asm \
@@ -726,6 +728,8 @@ RuntimeR3_SOURCES.amd64 += \
 	common/asm/ASMSetXcr0.asm \
 	common/asm/ASMXSave.asm \
 	common/asm/ASMXRstor.asm \
+	common/asm/ASMFxSave.asm \
+	common/asm/ASMFxRstor.asm \
 	common/asm/ASMSerializeInstruction-cpuid.asm \
 	common/asm/ASMSerializeInstruction-iret.asm \
 	common/asm/ASMSerializeInstruction-rdtscp.asm \
@@ -1942,6 +1946,8 @@ RuntimeR0_SOURCES.x86 += \
 	common/asm/ASMSetXcr0.asm \
 	common/asm/ASMXSave.asm \
 	common/asm/ASMXRstor.asm \
+	common/asm/ASMFxSave.asm \
+	common/asm/ASMFxRstor.asm \
 	common/asm/ASMRdMsrEx.asm \
 	common/asm/ASMWrMsrEx.asm
 RuntimeR0_SOURCES.amd64 += \
@@ -1957,6 +1963,8 @@ RuntimeR0_SOURCES.amd64 += \
 	common/asm/ASMSetXcr0.asm \
 	common/asm/ASMXSave.asm \
 	common/asm/ASMXRstor.asm \
+	common/asm/ASMFxSave.asm \
+	common/asm/ASMFxRstor.asm \
 	common/asm/ASMRdMsrEx.asm \
 	common/asm/ASMWrMsrEx.asm
 
@@ -2565,6 +2573,8 @@ ifdef VBOX_WITH_RAW_MODE
 	common/asm/ASMSetXcr0.asm \
 	common/asm/ASMXSave.asm \
 	common/asm/ASMXRstor.asm \
+	common/asm/ASMFxSave.asm \
+	common/asm/ASMFxRstor.asm \
 	common/checksum/alt-md5.cpp \
 	common/checksum/crc32.cpp \
 	common/checksum/crc64.cpp \
diff --git a/src/VBox/Runtime/common/asm/ASMCpuId.asm b/src/VBox/Runtime/common/asm/ASMCpuId.asm
new file mode 100644
index 0000000..70f9395
--- /dev/null
+++ b/src/VBox/Runtime/common/asm/ASMCpuId.asm
@@ -0,0 +1,111 @@
+; $Id: ASMCpuId.asm $
+;; @file
+; IPRT - ASMCpuIdExSlow().
+;
+
+;
+; Copyright (C) 2012-2016 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+; VirtualBox OSE distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+
+;*******************************************************************************
+;* Header Files                                                                *
+;*******************************************************************************
+%include "iprt/asmdefs.mac"
+
+BEGINCODE
+
+;;
+; CPUID with EAX input, returning ALL output registers (no NULL checking).
+;
+; @param    uOperator   8086:bp+4   x86:ebp+8   gcc:rdi  msc:rcx
+; @param    pvEAX       8086:bp+8   x86:ebp+0c  gcc:rsi  msc:rdx
+; @param    pvEBX       8086:bp+0c  x86:ebp+10  gcc:rdx  msc:r8
+; @param    pvECX       8086:bp+10  x86:ebp+14  gcc:rcx  msc:r9
+; @param    pvEDX       8086:bp+14  x86:ebp+18  gcc:r8   msc:rbp+30h
+;
+; DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
+;
+BEGINPROC_EXPORTED ASMCpuId
+        push    xBP
+        mov     xBP, xSP
+        push    xBX
+
+%ifdef ASM_CALL64_MSC
+ %if ARCH_BITS != 64
+  %error ARCH_BITS mismatch?
+ %endif
+        mov     eax, ecx
+        mov     r10, rdx
+        cpuid
+        mov     [r10], eax
+        mov     [r8], ebx
+        mov     [r9], ecx
+        mov     r10, [rbp+30h]
+        mov     [r10], edx
+
+%elifdef ASM_CALL64_GCC
+        mov     eax, edi
+        mov     r10, rdx
+        mov     r11, rcx
+        cpuid
+        mov     [rsi], eax
+        mov     [r10], ebx
+        mov     [r11], ecx
+        mov     [r8], edx
+
+%elif ARCH_BITS == 32
+        mov     eax, [xBP + 08h]
+        cpuid
+        push    edx
+        mov     edx, [xBP + 0ch]
+        mov     [edx], eax
+        mov     edx, [xBP + 10h]
+        mov     [edx], ebx
+        mov     edx, [xBP + 14h]
+        mov     [edx], ecx
+        mov     edx, [xBP + 18h]
+        pop     dword [edx]
+
+%elif ARCH_BITS == 16
+        push    es
+        push    di
+
+        mov     eax, [xBP + 04h]
+        cpuid
+        les     di, [xBP + 08h]
+        mov     [di], eax
+        les     di, [xBP + 0ch]
+        mov     [di], ebx
+        les     di, [xBP + 10h]
+        mov     [di], ecx
+        les     di, [xBP + 14h]
+        mov     [di], edx
+
+        pop     di
+        pop     es
+%else
+ %error unsupported arch
+%endif
+
+        pop     xBX
+        leave
+        ret
+ENDPROC ASMCpuId
+
diff --git a/src/VBox/Runtime/common/asm/ASMCpuIdExSlow.asm b/src/VBox/Runtime/common/asm/ASMCpuIdExSlow.asm
index 8ce8f7e..58ce8c3 100644
--- a/src/VBox/Runtime/common/asm/ASMCpuIdExSlow.asm
+++ b/src/VBox/Runtime/common/asm/ASMCpuIdExSlow.asm
@@ -49,11 +49,17 @@ BEGINPROC_EXPORTED ASMCpuIdExSlow
         push    xBP
         mov     xBP, xSP
         push    xBX
-%ifdef RT_ARCH_X86
+%if ARCH_BITS == 32
         push    edi
+%elif ARCH_BITS == 16
+        push    di
+        push    es
 %endif
 
 %ifdef ASM_CALL64_MSC
+ %if ARCH_BITS != 64
+  %error ARCH_BITS mismatch?
+ %endif
         mov     eax, ecx
         mov     ebx, edx
         mov     ecx, r8d
@@ -68,12 +74,17 @@ BEGINPROC_EXPORTED ASMCpuIdExSlow
         xchg    ecx, edx
         mov     r10, [rbp + 10h]
         mov     r11, [rbp + 18h]
-%elifdef RT_ARCH_X86
-        mov     eax, [ebp + 08h]
-        mov     ebx, [ebp + 0ch]
-        mov     ecx, [ebp + 10h]
-        mov     edx, [ebp + 14h]
-        mov     edi, [ebp + 18h]
+%elif ARCH_BITS == 32
+        mov     eax, [xBP + 08h]
+        mov     ebx, [xBP + 0ch]
+        mov     ecx, [xBP + 10h]
+        mov     edx, [xBP + 14h]
+        mov     edi, [xBP + 18h]
+%elif ARCH_BITS == 16
+        mov     eax, [xBP + 08h - 4]
+        mov     ebx, [xBP + 0ch - 4]
+        mov     ecx, [xBP + 10h - 4]
+        mov     edx, [xBP + 14h - 4]
 %else
  %error unsupported arch
 %endif
@@ -84,10 +95,15 @@ BEGINPROC_EXPORTED ASMCpuIdExSlow
         test    r8, r8
         jz      .store_ebx
         mov     [r8], eax
-%else
+%elif ARCH_BITS == 32
         test    edi, edi
         jz      .store_ebx
         mov     [edi], eax
+%else
+        cmp     dword [bp + 18h - 4], 0
+        je      .store_ebx
+        les     di, [bp + 18h - 4]
+        mov     [es:di], eax
 %endif
 .store_ebx:
 
@@ -95,11 +111,16 @@ BEGINPROC_EXPORTED ASMCpuIdExSlow
         test    r9, r9
         jz      .store_ecx
         mov     [r9], ebx
-%else
+%elif ARCH_BITS == 32
         mov     edi, [ebp + 1ch]
         test    edi, edi
         jz      .store_ecx
         mov     [edi], ebx
+%else
+        cmp     dword [bp + 1ch - 4], 0
+        je      .store_ecx
+        les     di, [bp + 1ch - 4]
+        mov     [es:di], ebx
 %endif
 .store_ecx:
 
@@ -107,11 +128,16 @@ BEGINPROC_EXPORTED ASMCpuIdExSlow
         test    r10, r10
         jz      .store_edx
         mov     [r10], ecx
-%else
+%elif ARCH_BITS == 32
         mov     edi, [ebp + 20h]
         test    edi, edi
         jz      .store_edx
         mov     [edi], ecx
+%else
+        cmp     dword [bp + 20h - 4], 0
+        je      .store_edx
+        les     di, [bp + 20h - 4]
+        mov     [es:di], ecx
 %endif
 .store_edx:
 
@@ -119,16 +145,24 @@ BEGINPROC_EXPORTED ASMCpuIdExSlow
         test    r11, r11
         jz      .done
         mov     [r11], edx
-%else
+%elif ARCH_BITS == 32
         mov     edi, [ebp + 24h]
         test    edi, edi
         jz      .done
         mov     [edi], edx
+%else
+        cmp     dword [bp + 24h - 4], 0
+        je      .done
+        les     di, [bp + 24h - 4]
+        mov     [es:di], edx
 %endif
 .done:
 
-%ifdef RT_ARCH_X86
+%if ARCH_BITS == 32
         pop     edi
+%elif ARCH_BITS == 16
+        pop     es
+        pop     di
 %endif
         pop     xBX
         leave
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm b/src/VBox/Runtime/common/asm/ASMFxRstor.asm
similarity index 53%
copy from src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
copy to src/VBox/Runtime/common/asm/ASMFxRstor.asm
index d9ea987..602a26b 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
+++ b/src/VBox/Runtime/common/asm/ASMFxRstor.asm
@@ -1,10 +1,10 @@
-; $Id: bs3-cpu-decoding-1-asm.asm $
+; $Id: ASMFxRstor.asm $
 ;; @file
-; BS3Kit - bs3-cpu-decoding-1, assembly helpers and template instantiation.
+; IPRT - ASMFxRstor().
 ;
 
 ;
-; Copyright (C) 2007-2016 Oracle Corporation
+; Copyright (C) 2006-2017 Oracle Corporation
 ;
 ; This file is part of VirtualBox Open Source Edition (OSE), as
 ; available from http://www.virtualbox.org. This file is free software;
@@ -24,15 +24,41 @@
 ; terms and conditions of either the GPL or the CDDL or both.
 ;
 
+;*******************************************************************************
+;* Header Files                                                                *
+;*******************************************************************************
+%define RT_ASM_WITH_SEH64
+%include "iprt/asmdefs.mac"
 
-;*********************************************************************************************************************************
-;*  Header Files                                                                                                                 *
-;*********************************************************************************************************************************
-%include "bs3kit.mac"
+BEGINCODE
 
-
-;
-; Instantiate code templates.
+;;
+; Loads extended CPU state.
+; @param    pFxState    Pointer to the FXRSTOR state area.
+;                       msc=rcx, gcc=rdi, x86=[esp+4]
 ;
-BS3_INSTANTIATE_TEMPLATE_ESSENTIALS      "bs3-cpu-decoding-1-template.mac"
+BEGINPROC_EXPORTED ASMFxRstor
+SEH64_END_PROLOGUE
+%ifdef ASM_CALL64_MSC
+        o64 fxrstor [rcx]
+%elifdef ASM_CALL64_GCC
+        o64 fxrstor [rdi]
+%elif ARCH_BITS == 32
+        mov     ecx, [esp + 4]
+        fxrstor [ecx]
+%elif ARCH_BITS == 16
+        push    bp
+        mov     bp, sp
+        push    es
+        push    bx
+        les     bx, [bp + 4]
+        fxrstor [es:bx]
+        pop     bx
+        pop     es
+        pop     bp
+%else
+ %error "Undefined arch?"
+%endif
+        ret
+ENDPROC ASMFxRstor
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm b/src/VBox/Runtime/common/asm/ASMFxSave.asm
similarity index 54%
copy from src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
copy to src/VBox/Runtime/common/asm/ASMFxSave.asm
index d9ea987..83048e9 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
+++ b/src/VBox/Runtime/common/asm/ASMFxSave.asm
@@ -1,10 +1,10 @@
-; $Id: bs3-cpu-decoding-1-asm.asm $
+; $Id: ASMFxSave.asm $
 ;; @file
-; BS3Kit - bs3-cpu-decoding-1, assembly helpers and template instantiation.
+; IPRT - ASMFxSave().
 ;
 
 ;
-; Copyright (C) 2007-2016 Oracle Corporation
+; Copyright (C) 2006-2017 Oracle Corporation
 ;
 ; This file is part of VirtualBox Open Source Edition (OSE), as
 ; available from http://www.virtualbox.org. This file is free software;
@@ -24,15 +24,41 @@
 ; terms and conditions of either the GPL or the CDDL or both.
 ;
 
+;*******************************************************************************
+;* Header Files                                                                *
+;*******************************************************************************
+%define RT_ASM_WITH_SEH64
+%include "iprt/asmdefs.mac"
 
-;*********************************************************************************************************************************
-;*  Header Files                                                                                                                 *
-;*********************************************************************************************************************************
-%include "bs3kit.mac"
+BEGINCODE
 
-
-;
-; Instantiate code templates.
+;;
+; Saves extended CPU state.
+; @param    pFxState    Pointer to the XSAVE state area.
+;                       msc=rcx, gcc=rdi, x86=[esp+4]
 ;
-BS3_INSTANTIATE_TEMPLATE_ESSENTIALS      "bs3-cpu-decoding-1-template.mac"
+BEGINPROC_EXPORTED ASMFxSave
+SEH64_END_PROLOGUE
+%ifdef ASM_CALL64_MSC
+        o64 fxsave [rcx]
+%elifdef ASM_CALL64_GCC
+        o64 fxsave [rdi]
+%elif ARCH_BITS == 32
+        mov     ecx, [esp + 4]
+        fxsave  [ecx]
+%elif ARCH_BITS == 16
+        push    bp
+        mov     bp, sp
+        push    es
+        push    bx
+        les     bx, [bp + 4]
+        fxsave  [es:bx]
+        pop     bx
+        pop     es
+        pop     bp
+%else
+ %error "Undefined arch?"
+%endif
+        ret
+ENDPROC ASMFxSave
 
diff --git a/src/VBox/Storage/VD.cpp b/src/VBox/Storage/VD.cpp
index df2d514..61cf2a4 100644
--- a/src/VBox/Storage/VD.cpp
+++ b/src/VBox/Storage/VD.cpp
@@ -1808,8 +1808,12 @@ static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)
         {
             LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
             vdThreadFinishWrite(pDisk);
+
+            bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
             vdIoCtxRootComplete(pDisk, pTmp);
-            vdIoCtxFree(pDisk, pTmp);
+
+            if (fFreeCtx)
+                vdIoCtxFree(pDisk, pTmp);
         }
     }
 
@@ -1863,8 +1867,11 @@ static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)
         {
             LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
             vdThreadFinishWrite(pDisk);
+
+            bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
             vdIoCtxRootComplete(pDisk, pTmp);
-            vdIoCtxFree(pDisk, pTmp);
+            if (fFreeCtx)
+                vdIoCtxFree(pDisk, pTmp);
         }
     }
 
@@ -4128,6 +4135,7 @@ static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq)
             && ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false))
         {
             LogFlowFunc(("I/O context completed pIoCtx=%#p\n", pIoCtx));
+            bool fFreeCtx = RT_BOOL(!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE));
             if (pIoCtx->pIoCtxParent)
             {
                 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
@@ -4165,9 +4173,12 @@ static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq)
                     && ASMAtomicCmpXchgBool(&pIoCtxParent->fComplete, true, false))
                 {
                     LogFlowFunc(("Parent I/O context completed pIoCtxParent=%#p rcReq=%Rrc\n", pIoCtxParent, pIoCtxParent->rcReq));
+                    bool fFreeParentCtx = RT_BOOL(!(pIoCtxParent->fFlags & VDIOCTX_FLAGS_DONT_FREE));
                     vdIoCtxRootComplete(pDisk, pIoCtxParent);
                     vdThreadFinishWrite(pDisk);
-                    vdIoCtxFree(pDisk, pIoCtxParent);
+
+                    if (fFreeParentCtx)
+                        vdIoCtxFree(pDisk, pIoCtxParent);
                     vdDiskProcessBlockedIoCtx(pDisk);
                 }
                 else if (!vdIoCtxIsDiskLockOwner(pDisk, pIoCtx))
@@ -4196,7 +4207,8 @@ static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq)
                 vdIoCtxRootComplete(pDisk, pIoCtx);
             }
 
-            vdIoCtxFree(pDisk, pIoCtx);
+            if (fFreeCtx)
+                vdIoCtxFree(pDisk, pIoCtx);
         }
     }
 
diff --git a/src/VBox/Storage/testcase/VDScriptInterp.cpp b/src/VBox/Storage/testcase/VDScriptInterp.cpp
index 43fe436..927b1c1 100644
--- a/src/VBox/Storage/testcase/VDScriptInterp.cpp
+++ b/src/VBox/Storage/testcase/VDScriptInterp.cpp
@@ -504,6 +504,7 @@ static int vdScriptInterpreterEvaluateExpression(PVDSCRIPTINTERPCTX pThis, PVDSC
         case VDSCRIPTEXPRTYPE_POSTFIX_INCREMENT:
         case VDSCRIPTEXPRTYPE_POSTFIX_DECREMENT:
             AssertMsgFailed(("TODO\n"));
+            /* fall thru */
         case VDSCRIPTEXPRTYPE_POSTFIX_FNCALL:
         {
             PVDSCRIPTFN pFn = (PVDSCRIPTFN)RTStrSpaceGet(&pThis->pScriptCtx->hStrSpaceFn, pExpr->FnCall.pFnIde->pIde->aszIde);
@@ -570,6 +571,7 @@ static int vdScriptInterpreterEvaluateExpression(PVDSCRIPTINTERPCTX pThis, PVDSC
         case VDSCRIPTEXPRTYPE_ASSIGN_OR:
         case VDSCRIPTEXPRTYPE_ASSIGNMENT_LIST:
             AssertMsgFailed(("TODO\n"));
+            /* fall thru */
         default:
             AssertMsgFailed(("Invalid expression type: %d\n", pExpr->enmType));
     }
diff --git a/src/VBox/Storage/testcase/tstVDCompact.vd b/src/VBox/Storage/testcase/tstVDCompact.vd
index 0289978..15c796c 100644
--- a/src/VBox/Storage/testcase/tstVDCompact.vd
+++ b/src/VBox/Storage/testcase/tstVDCompact.vd
@@ -67,6 +67,8 @@ void tstSnapshotCompact(string strMsg, string strBackend)
     compact("disk", 1);
 
     close("disk", "single", true);
+    close("disk", "single", true);
+    close("disk", "single", true);
     destroydisk("disk");
 }
 
diff --git a/src/VBox/Storage/testcase/tstVDCopy.vd b/src/VBox/Storage/testcase/tstVDCopy.vd
index d34f27d..eba1e6d 100644
--- a/src/VBox/Storage/testcase/tstVDCopy.vd
+++ b/src/VBox/Storage/testcase/tstVDCopy.vd
@@ -75,10 +75,14 @@ void main()
     close("dest", "single", true);
     close("dest", "single", true);
     close("dest", "single", true);
+    close("dest", "single", true);
+    close("dest", "single", true);
 
     close("source", "single", true);
     close("source", "single", true);
     close("source", "single", true);
+    close("source", "single", true);
+    close("source", "single", true);
     destroydisk("source");
     destroydisk("dest");
 
diff --git a/src/VBox/Storage/testcase/tstVDIo.cpp b/src/VBox/Storage/testcase/tstVDIo.cpp
index a75aaa6..d435056 100644
--- a/src/VBox/Storage/testcase/tstVDIo.cpp
+++ b/src/VBox/Storage/testcase/tstVDIo.cpp
@@ -2756,6 +2756,43 @@ static void tstVDIoScriptExec(const char *pszName, const char *pszScript)
                     rc = VDScriptCtxCallFn(hScriptCtx, "main", NULL, 0);
                 VDScriptCtxDestroy(hScriptCtx);
             }
+
+            /* Clean up all leftover resources. */
+            PVDPATTERN pPatternIt, pPatternItNext;
+            RTListForEachSafe(&GlobTest.ListPatterns, pPatternIt, pPatternItNext, VDPATTERN, ListNode)
+            {
+                RTPrintf("Cleanup: Leftover pattern \"%s\", deleting...\n", pPatternIt->pszName);
+                RTListNodeRemove(&pPatternIt->ListNode);
+                RTMemFree(pPatternIt->pvPattern);
+                RTStrFree(pPatternIt->pszName);
+                RTMemFree(pPatternIt);
+            }
+
+            PVDDISK pDiskIt, pDiskItNext;
+            RTListForEachSafe(&GlobTest.ListDisks, pDiskIt, pDiskItNext, VDDISK, ListNode)
+            {
+                RTPrintf("Cleanup: Leftover disk \"%s\", deleting...\n", pDiskIt->pszName);
+                RTListNodeRemove(&pDiskIt->ListNode);
+                VDDestroy(pDiskIt->pVD);
+                if (pDiskIt->pMemDiskVerify)
+                {
+                    VDMemDiskDestroy(pDiskIt->pMemDiskVerify);
+                    RTCritSectDelete(&pDiskIt->CritSectVerify);
+                }
+                RTStrFree(pDiskIt->pszName);
+                RTMemFree(pDiskIt);
+            }
+
+            PVDFILE pFileIt, pFileItNext;
+            RTListForEachSafe(&GlobTest.ListFiles, pFileIt, pFileItNext, VDFILE, Node)
+            {
+                RTPrintf("Cleanup: Leftover file \"%s\", deleting...\n", pFileIt->pszName);
+                RTListNodeRemove(&pFileIt->Node);
+                VDIoBackendStorageDestroy(pFileIt->pIoStorage);
+                RTStrFree(pFileIt->pszName);
+                RTMemFree(pFileIt);
+            }
+
             VDIoBackendDestroy(GlobTest.pIoBackend);
         }
         else
@@ -2829,6 +2866,7 @@ static void tstVDIoRunBuiltinTests(void)
 
         AssertPtr(pszScript);
         tstVDIoScriptExec(g_aVDIoTests[i].pszName, pszScript);
+        RTStrFree(pszScript);
     }
 #endif
 }
diff --git a/src/VBox/Storage/testcase/tstVDResize.vd b/src/VBox/Storage/testcase/tstVDResize.vd
index 8e74d35..c57e3eb 100644
--- a/src/VBox/Storage/testcase/tstVDResize.vd
+++ b/src/VBox/Storage/testcase/tstVDResize.vd
@@ -26,6 +26,7 @@ void main()
     io("test", false, 1, "seq", 64K, 255G, 257G, 2G, 100, "none");
     resize("test", 1331200M);
     io("test", false, 1, "seq", 64K, 255G, 257G, 2G,   0, "none");
+    close("test", "single", true /* fDelete */);
     destroydisk("test");
 
     iorngdestroy();
diff --git a/src/VBox/VMM/Makefile.kmk b/src/VBox/VMM/Makefile.kmk
index 8be9099..d3b00ff 100644
--- a/src/VBox/VMM/Makefile.kmk
+++ b/src/VBox/VMM/Makefile.kmk
@@ -48,6 +48,9 @@ endif
 ifdef VBOX_WITH_3RD_IEM_STEP
  VMM_COMMON_DEFS += VBOX_WITH_3RD_IEM_STEP
 endif
+ifdef VBOX_WITH_NESTED_HWVIRT
+ VMM_COMMON_DEFS += VBOX_WITH_NESTED_HWVIRT
+endif
 #ifdef VBOX_WITH_IEM
 # VMM_COMMON_DEFS += VBOX_WITH_IEM
 #endif
@@ -138,7 +141,8 @@ VBoxVMM_DEFS.darwin = VMM_R0_SWITCH_STACK
 
 VBoxVMM_INCS     = \
 	include \
-	$(if-expr defined(VBOX_WITH_RAW_MODE),PATM,)
+	$(if-expr defined(VBOX_WITH_RAW_MODE),PATM,) \
+	$(VBoxVMM_0_OUTDIR)/CommonGenIncs
 VBoxVMM_ASINCS   = .
 
 VBoxVMM_SOURCES  = \
@@ -335,11 +339,69 @@ VMMR3/SSM.cpp_DEFS +=	\
 
 if "$(USERNAME)" == "bird" && "$(KBUILD_TARGET)" == "win"
  VBoxVMM_VMMAll/IEMAll.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
+ VBoxVMM_VMMAll/IEMAllAImplC.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
+ VBoxVMM_VMMAll/PGMAll.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
  VBoxVMM_VMMAll/PDMAllCritSect.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
 endif
 
 $(call VBOX_SET_VER_INFO_DLL,VBoxVMM,VirtualBox VMM) # Version info / description.
 
+
+#
+# Generate macro template for IEM instruction statistics.
+#
+VBoxVMM_INTERMEDIATES += $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMInstructionStatisticsTmpl.h
+VBoxVMM_CLEAN         += \
+	$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMInstructionStatisticsTmpl.h.ts \
+	$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMInstructionStatisticsTmpl.h
+$$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMInstructionStatisticsTmpl.h.ts \
++| $$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMInstructionStatisticsTmpl.h: \
+		$(PATH_SUB_CURRENT)/VMMAll/IEMAllInstructions.cpp.h \
+		$(PATH_SUB_CURRENT)/VMMAll/IEMAllInstructionsOneByte.cpp.h \
+		$(PATH_SUB_CURRENT)/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h \
+		$(PATH_SUB_CURRENT)/VMMAll/IEMAllInstructionsVexMap1.cpp.h
+	$(QUIET)$(call MSG_GENERATE,VBoxVMM,$@,$<)
+	$(QUIET)$(RM) -f -- "$@.tmp" "$@.tmp" "$@.sorted"
+	$(QUIET)$(MKDIR) -p -- "$(dir $@)"
+	$(QUIET)$(SED) \
+		-e '/IEMOP_MNEMONIC\(\|[01234]\|[01234]EX\)(/!d' \
+               -e 's/^.*IEMOP_MNEMONIC\(\|[01234]\|[01234]EX\)(/IEM_DO_INSTR_STAT\1(/' \
+               -e 's/;.*$(DOLLAR)//' \
+		--output "$@.tmp" $(filter %.cpp.h,$^)
+# Windows sort does some kind of seeking. So, we must use a temporary file and kmk_cat to define and undefine our macros.
+	$(QUIET)$(REDIRECT) -wto "$@.sorted" -- sort "$@.tmp"
+	$(QUIET)$(APPEND) -nt "$@" \
+       	'/* Warning autogenerated by VMM/Makefile.kmk. */ ' \
+		'#define IEM_DO_INSTR_STAT0(f,u,l,fd,fi)                   IEM_DO_INSTR_STAT(l,  #l)' \
+		'#define IEM_DO_INSTR_STAT1(f,u,l,o1,fd,fi)                IEM_DO_INSTR_STAT(l ## _ ## o1,                                   #l " " #o1)' \
+		'#define IEM_DO_INSTR_STAT2(f,u,l,o1,o2,fd,fi)             IEM_DO_INSTR_STAT(l ## _ ## o1 ## _ ## o2,                        #l " " #o1 "," #o2)' \
+		'#define IEM_DO_INSTR_STAT3(f,u,l,o1,o2,o3,fd,fi)          IEM_DO_INSTR_STAT(l ## _ ## o1 ## _ ## o2 ## _ ## o3,             #l " " #o1 "," #o2 "," #o3)' \
+		'#define IEM_DO_INSTR_STAT4(f,u,l,o1,o2,o3,o4,fd,fi)       IEM_DO_INSTR_STAT(l ## _ ## o1 ## _ ## o2 ## _ ## o3 ## _ ## o4,  #l " " #o1 "," #o2 "," #o3 "," #o4)' \
+		'#define IEM_DO_INSTR_STAT0EX(s,m,f,u,l,fd,fi)             IEM_DO_INSTR_STAT(s,m)' \
+		'#define IEM_DO_INSTR_STAT1EX(s,m,f,u,l,o1,fd,fi)          IEM_DO_INSTR_STAT(s,m)' \
+		'#define IEM_DO_INSTR_STAT2EX(s,m,f,u,l,o1,o2,fd,fi)       IEM_DO_INSTR_STAT(s,m)' \
+		'#define IEM_DO_INSTR_STAT3EX(s,m,f,u,l,o1,o2,o3,fd,fi)    IEM_DO_INSTR_STAT(s,m)' \
+		'#define IEM_DO_INSTR_STAT4EX(s,m,f,u,l,o1,o2,o3,o4,fd,fi) IEM_DO_INSTR_STAT(s,m)' \
+               ''
+	$(QUIET)$(REDIRECT) -ato "$@" -- $(CAT_EXT) "$@.sorted"
+	$(QUIET)$(APPEND) -n "$@" \
+               '' \
+		'#undef IEM_DO_INSTR_STAT0' \
+		'#undef IEM_DO_INSTR_STAT1' \
+		'#undef IEM_DO_INSTR_STAT2' \
+		'#undef IEM_DO_INSTR_STAT3' \
+		'#undef IEM_DO_INSTR_STAT4' \
+		'#undef IEM_DO_INSTR_STAT0EX' \
+		'#undef IEM_DO_INSTR_STAT1EX' \
+		'#undef IEM_DO_INSTR_STAT2EX' \
+		'#undef IEM_DO_INSTR_STAT3EX' \
+		'#undef IEM_DO_INSTR_STAT4EX' \
+               ''
+	$(QUIET)$(RM) -f -- "$@.tmp" "$@.sorted"
+	$(QUIET)$(CP) -v -f --changed -- "$@" "$(patsubst %.ts,%,$@)"
+
+foobar: $$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMInstructionStatisticsTmpl.h
+
 if "$(KBUILD_TARGET)" == "win" && !defined(VBOX_ONLY_EXTPACKS_USE_IMPLIBS)
  #
  # Debug type info hack for VMCPU, VM and similar.
@@ -359,6 +421,7 @@ if "$(KBUILD_TARGET)" == "win" && !defined(VBOX_ONLY_EXTPACKS_USE_IMPLIBS)
  VBoxVMMPdbTypeHack_DEFS.win.x86   = $(VBoxVMM_DEFS.win.x86)
  VBoxVMMPdbTypeHack_DEFS.win.amd64 = $(VBoxVMM_DEFS.win.amd64)
  VBoxVMMPdbTypeHack_INCS           = $(VBoxVMM_INCS)
+ VBoxVMMPdbTypeHack_INTERMEDIATES  = $(VBoxVMM_INTERMEDIATES)
 endif
 
 
@@ -468,14 +531,16 @@ if defined(VBOX_WITH_RAW_MODE) && !defined(VBOX_ONLY_EXTPACKS)
 
  VMMRC_DEFS      = IN_VMM_RC IN_RT_RC IN_DIS DIS_CORE_ONLY VBOX_WITH_RAW_MODE VBOX_WITH_RAW_MODE_NOT_R0 IN_SUP_RC \
  	$(VMM_COMMON_DEFS)
+ VMMRC_DEFS := $(filter-out VBOX_WITH_NESTED_HWVIRT,$(VMMRC_DEFS))
  ifdef VBOX_WITH_VMM_R0_SWITCH_STACK
   VMMRC_DEFS    += VMM_R0_SWITCH_STACK
  endif
 
- VMMRC_INCS     := \
+ VMMRC_INCS      = \
  	include \
  	VMMRC \
- 	$(if-expr defined(VBOX_WITH_RAW_MODE),PATM,)
+ 	$(if-expr defined(VBOX_WITH_RAW_MODE),PATM,) \
+ 	$(VBoxVMM_0_OUTDIR)/CommonGenIncs
 
  VMMRC_LIBS      = \
  	$(PATH_STAGE_LIB)/DisasmRC$(VBOX_SUFF_LIB) \
@@ -578,8 +643,12 @@ if defined(VBOX_WITH_RAW_MODE) && !defined(VBOX_ONLY_EXTPACKS)
 
  if "$(USERNAME)" == "bird" && "$(KBUILD_TARGET)" == "win"
   VMMRC_VMMAll/IEMAll.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
+  VMMRC_VMMAll/IEMAllAImplC.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
+  VMMRC_VMMAll/PGMAll.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
  endif
 
+ VMMRC_INTERMEDIATES += $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMInstructionStatisticsTmpl.h
+
  if "$(KBUILD_TARGET)" == "win"
   # Debug type info hack for VMCPU, VM and similar.  See VBoxVMM for details.
   VMMRC_LIBS    += $(VMMRCPdbTypeHack_1_TARGET)
@@ -593,6 +662,7 @@ if defined(VBOX_WITH_RAW_MODE) && !defined(VBOX_ONLY_EXTPACKS)
   VMMRCPdbTypeHack_DEFS.win.x86   = $(VMMRC_DEFS.win.x86)
   VMMRCPdbTypeHack_DEFS.win.amd64 = $(VMMRC_DEFS.win.amd64)
   VMMRCPdbTypeHack_INCS           = $(VMMRC_INCS)
+  VMMRCPdbTypeHack_INTERMEDIATES  = $(VMMRC_INTERMEDIATES)
  endif
 
 endif # VBOX_WITH_RAW_MODE && !VBOX_ONLY_EXTPACKS
@@ -633,7 +703,8 @@ ifndef VBOX_ONLY_EXTPACKS
 
  VMMR0_INCS      = \
  	include \
- 	$(if-expr defined(VBOX_WITH_RAW_MODE),PATM,)
+ 	$(if-expr defined(VBOX_WITH_RAW_MODE),PATM,) \
+ 	$(VBoxVMM_0_OUTDIR)/CommonGenIncs
 
  VMMR0_SOURCES   = \
  	VBoxVMM.d \
@@ -735,8 +806,12 @@ ifndef VBOX_ONLY_EXTPACKS
 
  if "$(USERNAME)" == "bird" && "$(KBUILD_TARGET)" == "win"
   VMMR0_VMMAll/IEMAll.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
+  VMMR0_VMMAll/IEMAllAImplC.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
+  VMMR0_VMMAll/PGMAll.cpp_CXXFLAGS = /FAcs /Fa$(subst /,\\,$(outbase).cod)
  endif
 
+ VMMR0_INTERMEDIATES += $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMInstructionStatisticsTmpl.h
+
  if "$(KBUILD_TARGET)" == "win"
   # Debug type info hack for VMCPU, VM and similar.  See VBoxVMM for details.
   VMMR0_LIBS    += $(VMMR0PdbTypeHack_1_TARGET)
@@ -750,6 +825,7 @@ ifndef VBOX_ONLY_EXTPACKS
   VMMR0PdbTypeHack_DEFS.win.x86   = $(VMMR0_DEFS.win.x86)
   VMMR0PdbTypeHack_DEFS.win.amd64 = $(VMMR0_DEFS.win.amd64)
   VMMR0PdbTypeHack_INCS           = $(VMMR0_INCS)
+  VMMR0PdbTypeHack_INTERMEDIATES  = $(VMMR0_INTERMEDIATES)
  endif
 
 endif # !VBOX_ONLY_EXTPACKS
@@ -867,6 +943,31 @@ $(foreach base,$(notdir $(basename $(wildcard $(PATH_SUB_CURRENT)/VMMR3/cpus/*.h
 
 
 #
+# Process python source(s).
+#
+BLDDIRS += $(PATH_TARGET)/pylint
+
+define def_vbox_vmm_py_check
+$(eval name:=$(basename $(notdir $(py))))
+
+pylint::   $(name)-py-phony.o
+$(name).o: $(name)-py-phony.o
+$(PATH_TARGET)/pylint/$(name).o $(name)-py-phony.o:: $(py) | $(PATH_TARGET)/pylint/
+ifdef VBOX_WITH_PYLINT
+	$(QUIET2)$(call MSG_L1,Subjecting $(py) to pylint...)
+	$(QUIET)$(REDIRECT) -C "$(dir $(py))" -E LC_ALL=C -- \
+		$(VBOX_PYLINT) --rcfile=$(PATH_TARGET)/no-such-pylintrc \
+			$$(VBOX_PYLINT_FLAGS) $$($(py)_VBOX_PYLINT_FLAGS) ./$(notdir $(py))
+endif
+	$(QUIET)$(APPEND) -t "$(PATH_TARGET)/pylint/$(name).o"
+
+TESTING += $(name)-py-phony.o
+endef # def_vbox_vmm_py_check
+
+$(foreach py, $(addprefix $(PATH_SUB_CURRENT)/VMMAll/, IEMAllInstructionsPython.py ) , $(eval $(def_vbox_vmm_py_check)))
+
+
+#
 # Test for undefined symbols.
 #
 if1of ($(SYSMODS),VMMRC)
@@ -920,9 +1021,17 @@ PGMInline.o 	PGMInline.obj: 			PGMDbg.o
 
 # Alias the IEM templates to the object in which they are instantiated.
 IEMInternal.o \
-IEMAllInstructions.cpp.o  IEMAllInstructions.cpp.obj \
-IEMAllCImpl.cpp.o         IEMAllCImpl.cpp.obj \
-IEMAllCImplStrInstr.cpp.o IEMAllCImplStrInstr.cpp.obj: IEMAll.o
+IEMAllInstructions.cpp.o           IEMAllInstructions.cpp.obj \
+IEMAllInstructionsOneByte.cpp.o    IEMAllInstructionsOneByte.cpp.obj \
+IEMAllInstructionsTwoByte0f.cpp.o  IEMAllInstructionsTwoByte0f.cpp.obj \
+IEMAllInstructionsThree0f38.cpp.o  IEMAllInstructionsThree0f38.cpp.obj \
+IEMAllInstructionsThree0f3a.cpp.o  IEMAllInstructionsThree0f3a.cpp.obj \
+IEMAllInstructionsVexMap1.cpp.o    IEMAllInstructionsVexMap1.cpp.obj \
+IEMAllInstructionsVexMap2.cpp.o    IEMAllInstructionsVexMap2.cpp.obj \
+IEMAllInstructionsVexMap3.cpp.o    IEMAllInstructionsVexMap3.cpp.obj \
+IEMAllInstructions3DNow.cpp.o      IEMAllInstructions3DNow.cpp.obj \
+IEMAllCImpl.cpp.o                  IEMAllCImpl.cpp.obj \
+IEMAllCImplStrInstr.cpp.o          IEMAllCImplStrInstr.cpp.obj: IEMAll.o
 
 # Alias the switcher templates.
 PAEand32Bit.o PAEand32Bit.obj:                 PAETo32Bit.o   PAEToPAE.o   32BitTo32Bit.o PAETo32Bit.o
diff --git a/src/VBox/VMM/VMMAll/APICAll.cpp b/src/VBox/VMM/VMMAll/APICAll.cpp
index ed3332e..3abf295 100644
--- a/src/VBox/VMM/VMMAll/APICAll.cpp
+++ b/src/VBox/VMM/VMMAll/APICAll.cpp
@@ -2369,8 +2369,8 @@ APICBOTHCBDECL(VBOXSTRICTRC) apicLocalInterrupt(PPDMDEVINS pDevIns, PVMCPU pVCpu
                 {
                     /** @todo won't work in R0/RC because callers don't care about rcRZ. */
                     AssertMsgFailed(("INIT through LINT0/LINT1 is not yet supported\n"));
-                    /* fallthru */
                 }
+                /* fall thru */
                 case XAPICDELIVERYMODE_FIXED:
                 {
                     PAPICCPU       pApicCpu = VMCPU_TO_APICCPU(pVCpu);
diff --git a/src/VBox/VMM/VMMAll/CPUMAllRegs.cpp b/src/VBox/VMM/VMMAll/CPUMAllRegs.cpp
index 98c334f..4c81025 100644
--- a/src/VBox/VMM/VMMAll/CPUMAllRegs.cpp
+++ b/src/VBox/VMM/VMMAll/CPUMAllRegs.cpp
@@ -152,7 +152,8 @@ VMM_INT_DECL(void) CPUMGuestLazyLoadHiddenCsAndSs(PVMCPU pVCpu)
 /**
  * Loads a the hidden parts of a selector register.
  *
- * @param   pVCpu               The cross context virtual CPU structure of the calling EMT.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   pSReg       The selector register to lazily load hidden parts of.
  */
 VMM_INT_DECL(void) CPUMGuestLazyLoadHiddenSelectorReg(PVMCPU pVCpu, PCPUMSELREG pSReg)
 {
@@ -915,6 +916,26 @@ VMMDECL(RTSEL) CPUMGetGuestSS(PVMCPU pVCpu)
 }
 
 
+VMMDECL(uint64_t)   CPUMGetGuestFlatPC(PVMCPU pVCpu)
+{
+    CPUMSELREG_LAZY_LOAD_HIDDEN_PARTS(pVCpu, &pVCpu->cpum.s.Guest.cs);
+    if (   !CPUMIsGuestInLongMode(pVCpu)
+        || pVCpu->cpum.s.Guest.cs.Attr.n.u1Long)
+        return pVCpu->cpum.s.Guest.eip + (uint32_t)pVCpu->cpum.s.Guest.cs.u64Base;
+    return pVCpu->cpum.s.Guest.rip + pVCpu->cpum.s.Guest.cs.u64Base;
+}
+
+
+VMMDECL(uint64_t)   CPUMGetGuestFlatSP(PVMCPU pVCpu)
+{
+    CPUMSELREG_LAZY_LOAD_HIDDEN_PARTS(pVCpu, &pVCpu->cpum.s.Guest.ss);
+    if (   !CPUMIsGuestInLongMode(pVCpu)
+        || pVCpu->cpum.s.Guest.ss.Attr.n.u1Long)
+        return pVCpu->cpum.s.Guest.eip + (uint32_t)pVCpu->cpum.s.Guest.ss.u64Base;
+    return pVCpu->cpum.s.Guest.rip + pVCpu->cpum.s.Guest.ss.u64Base;
+}
+
+
 VMMDECL(RTSEL) CPUMGetGuestLDTR(PVMCPU pVCpu)
 {
     return pVCpu->cpum.s.Guest.ldtr.Sel;
@@ -1345,6 +1366,7 @@ VMMDECL(void) CPUMGetGuestCpuId(PVMCPU pVCpu, uint32_t uLeaf, uint32_t uSubLeaf,
         {
             default:
                 AssertFailed();
+                /* fall thru */
             case CPUMUNKNOWNCPUID_DEFAULTS:
             case CPUMUNKNOWNCPUID_LAST_STD_LEAF: /* ASSUME this is executed */
             case CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX: /** @todo Implement CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX */
@@ -2370,7 +2392,7 @@ VMMDECL(uint32_t) CPUMGetGuestCPL(PVMCPU pVCpu)
      * CPL can reliably be found in SS.DPL (hidden regs valid) or SS if not.
      *
      * Note! We used to check CS.DPL here, assuming it was always equal to
-     * CPL even if a conforming segment was loaded.  But this truned out to
+     * CPL even if a conforming segment was loaded.  But this turned out to
      * only apply to older AMD-V.  With VT-x we had an ACP2 regression
      * during install after a far call to ring 2 with VT-x.  Then on newer
      * AMD-V CPUs we have to move the VMCB.guest.u8CPL into cs.Attr.n.u2Dpl
@@ -2505,3 +2527,18 @@ VMMDECL(DISCPUMODE)     CPUMGetGuestDisMode(PVMCPU pVCpu)
     return DISCPUMODE_16BIT;
 }
 
+
+/**
+ * Gets the guest MXCSR_MASK value.
+ *
+ * This does not access the x87 state, but the value we determined at VM
+ * initialization.
+ *
+ * @returns MXCSR mask.
+ * @param   pVM                 The cross context VM structure.
+ */
+VMMDECL(uint32_t) CPUMGetGuestMxCsrMask(PVM pVM)
+{
+    return pVM->cpum.s.GuestInfo.fMxCsrMask;
+}
+
diff --git a/src/VBox/VMM/VMMAll/EMAll.cpp b/src/VBox/VMM/VMMAll/EMAll.cpp
index c7bfc0a..155bef8 100644
--- a/src/VBox/VMM/VMMAll/EMAll.cpp
+++ b/src/VBox/VMM/VMMAll/EMAll.cpp
@@ -196,6 +196,18 @@ VMM_INT_DECL(int) EMMonitorWaitPrepare(PVMCPU pVCpu, uint64_t rax, uint64_t rcx,
 
 
 /**
+ * Checks if the monitor hardware is armed / active.
+ *
+ * @returns true if armed, false otherwise.
+ * @param   pVCpu               The cross context virtual CPU structure of the calling EMT.
+ */
+VMM_INT_DECL(bool) EMMonitorIsArmed(PVMCPU pVCpu)
+{
+    return RT_BOOL(pVCpu->em.s.MWait.fWait & EMMWAIT_FLAG_MONITOR_ACTIVE);
+}
+
+
+/**
  * Performs an MWAIT.
  *
  * @returns VINF_SUCCESS
diff --git a/src/VBox/VMM/VMMAll/GIMAll.cpp b/src/VBox/VMM/VMMAll/GIMAll.cpp
index ab1ec8d..f82bfe7 100644
--- a/src/VBox/VMM/VMMAll/GIMAll.cpp
+++ b/src/VBox/VMM/VMMAll/GIMAll.cpp
@@ -91,6 +91,8 @@ VMM_INT_DECL(bool) GIMAreHypercallsEnabled(PVMCPU pVCpu)
  * @returns Strict VBox status code.
  * @retval  VINF_SUCCESS if the hypercall succeeded (even if its operation
  *          failed).
+ * @retval  VINF_GIM_HYPERCALL_CONTINUING continue hypercall without updating
+ *          RIP.
  * @retval  VINF_GIM_R3_HYPERCALL re-start the hypercall from ring-3.
  * @retval  VERR_GIM_HYPERCALL_ACCESS_DENIED CPL is insufficient.
  * @retval  VERR_GIM_HYPERCALLS_NOT_AVAILABLE hypercalls unavailable.
@@ -103,6 +105,7 @@ VMM_INT_DECL(bool) GIMAreHypercallsEnabled(PVMCPU pVCpu)
  * @param   pVCpu       The cross context virtual CPU structure.
  * @param   pCtx        Pointer to the guest-CPU context.
  *
+ * @remarks The caller of this function needs to advance RIP as required.
  * @thread  EMT.
  */
 VMM_INT_DECL(VBOXSTRICTRC) GIMHypercall(PVMCPU pVCpu, PCPUMCTX pCtx)
diff --git a/src/VBox/VMM/VMMAll/IEMAll.cpp b/src/VBox/VMM/VMMAll/IEMAll.cpp
index b8d1e37..7372774 100644
--- a/src/VBox/VMM/VMMAll/IEMAll.cpp
+++ b/src/VBox/VMM/VMMAll/IEMAll.cpp
@@ -100,6 +100,11 @@
 #include <VBox/vmm/iom.h>
 #include <VBox/vmm/em.h>
 #include <VBox/vmm/hm.h>
+#ifdef VBOX_WITH_NESTED_HWVIRT
+# include <VBox/vmm/hm_svm.h>
+#else
+# include <VBox/vmm/hm_svm.h> /* For SVMIOIOTYPE */
+#endif
 #include <VBox/vmm/tm.h>
 #include <VBox/vmm/dbgf.h>
 #include <VBox/vmm/dbgftrace.h>
@@ -361,6 +366,112 @@ typedef IEMSELDESC *PIEMSELDESC;
 # define IEM_USE_UNALIGNED_DATA_ACCESS
 #endif
 
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Check the common SVM instruction preconditions.
+ */
+# define IEM_SVM_INSTR_COMMON_CHECKS(a_pVCpu, a_Instr) \
+    do { \
+        if (!IEM_IS_SVM_ENABLED(a_pVCpu)) \
+        { \
+            Log((RT_STR(a_Instr) ": EFER.SVME not enabled -> #UD\n")); \
+            return iemRaiseUndefinedOpcode(pVCpu); \
+        } \
+        if (IEM_IS_REAL_OR_V86_MODE(pVCpu)) \
+        { \
+            Log((RT_STR(a_Instr) ": Real or v8086 mode -> #UD\n")); \
+            return iemRaiseUndefinedOpcode(pVCpu); \
+        } \
+        if (pVCpu->iem.s.uCpl != 0) \
+        { \
+            Log((RT_STR(a_Instr) ": CPL != 0 -> #GP(0)\n")); \
+            return iemRaiseGeneralProtectionFault0(pVCpu); \
+        } \
+    } while (0)
+
+/**
+ * Check if an SVM is enabled.
+ */
+# define IEM_IS_SVM_ENABLED(a_pVCpu)                         (CPUMIsGuestSvmEnabled(IEM_GET_CTX(a_pVCpu)))
+
+/**
+ * Check if an SVM control/instruction intercept is set.
+ */
+# define IEM_IS_SVM_CTRL_INTERCEPT_SET(a_pVCpu, a_Intercept) (CPUMIsGuestSvmCtrlInterceptSet(IEM_GET_CTX(a_pVCpu), (a_Intercept)))
+
+/**
+ * Check if an SVM read CRx intercept is set.
+ */
+# define IEM_IS_SVM_READ_CR_INTERCEPT_SET(a_pVCpu, a_uCr)    (CPUMIsGuestSvmReadCRxInterceptSet(IEM_GET_CTX(a_pVCpu), (a_uCr)))
+
+/**
+ * Check if an SVM write CRx intercept is set.
+ */
+# define IEM_IS_SVM_WRITE_CR_INTERCEPT_SET(a_pVCpu, a_uCr)   (CPUMIsGuestSvmWriteCRxInterceptSet(IEM_GET_CTX(a_pVCpu), (a_uCr)))
+
+/**
+ * Check if an SVM read DRx intercept is set.
+ */
+# define IEM_IS_SVM_READ_DR_INTERCEPT_SET(a_pVCpu, a_uDr)    (CPUMIsGuestSvmReadDRxInterceptSet(IEM_GET_CTX(a_pVCpu), (a_uDr)))
+
+/**
+ * Check if an SVM write DRx intercept is set.
+ */
+# define IEM_IS_SVM_WRITE_DR_INTERCEPT_SET(a_pVCpu, a_uDr)   (CPUMIsGuestSvmWriteDRxInterceptSet(IEM_GET_CTX(a_pVCpu), (a_uDr)))
+
+/**
+ * Check if an SVM exception intercept is set.
+ */
+# define IEM_IS_SVM_XCPT_INTERCEPT_SET(a_pVCpu, a_uVector)   (CPUMIsGuestSvmXcptInterceptSet(IEM_GET_CTX(a_pVCpu), (a_uVector)))
+
+/**
+ * Invokes the SVM \#VMEXIT handler for the nested-guest.
+ */
+# define IEM_RETURN_SVM_NST_GST_VMEXIT(a_pVCpu, a_uExitCode, a_uExitInfo1, a_uExitInfo2) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrictVmExit = HMSvmNstGstVmExit((a_pVCpu), IEM_GET_CTX(a_pVCpu), (a_uExitCode), (a_uExitInfo1), \
+                                                        (a_uExitInfo2)); \
+        return rcStrictVmExit == VINF_SVM_VMEXIT ? VINF_SUCCESS : rcStrictVmExit; \
+    } while (0)
+
+/**
+ * Invokes the 'MOV CRx' SVM \#VMEXIT handler after constructing the
+ * corresponding decode assist information.
+ */
+# define IEM_RETURN_SVM_NST_GST_CRX_VMEXIT(a_pVCpu, a_uExitCode, a_enmAccessCrX, a_iGReg) \
+    do \
+    { \
+        uint64_t uExitInfo1; \
+        if (   IEM_GET_GUEST_CPU_FEATURES(a_pVCpu)->fSvmDecodeAssist \
+            && (a_enmAccessCrX) == IEMACCESSCRX_MOV_CRX) \
+            uExitInfo1 = SVM_EXIT1_MOV_CRX_MASK | ((a_iGReg) & 7); \
+        else \
+            uExitInfo1 = 0; \
+        IEM_RETURN_SVM_NST_GST_VMEXIT(a_pVCpu, a_uExitCode, uExitInfo1, 0); \
+    } while (0)
+
+/**
+ * Checks and handles an SVM MSR intercept.
+ */
+# define IEM_SVM_NST_GST_MSR_INTERCEPT(a_pVCpu, a_idMsr, a_fWrite) \
+    HMSvmNstGstHandleMsrIntercept((a_pVCpu), IEM_GET_CTX(a_pVCpu), (a_idMsr), (a_fWrite))
+
+#else
+# define IEM_SVM_INSTR_COMMON_CHECKS(a_pVCpu, a_Instr)                                    do { } while (0)
+# define IEM_IS_SVM_ENABLED(a_pVCpu)                                                      (false)
+# define IEM_IS_SVM_CTRL_INTERCEPT_SET(a_pVCpu, a_Intercept)                              (false)
+# define IEM_IS_SVM_READ_CR_INTERCEPT_SET(a_pVCpu, a_uCr)                                 (false)
+# define IEM_IS_SVM_WRITE_CR_INTERCEPT_SET(a_pVCpu, a_uCr)                                (false)
+# define IEM_IS_SVM_READ_DR_INTERCEPT_SET(a_pVCpu, a_uDr)                                 (false)
+# define IEM_IS_SVM_WRITE_DR_INTERCEPT_SET(a_pVCpu, a_uDr)                                (false)
+# define IEM_IS_SVM_XCPT_INTERCEPT_SET(a_pVCpu, a_uVector)                                (false)
+# define IEM_RETURN_SVM_NST_GST_VMEXIT(a_pVCpu, a_uExitCode, a_uExitInfo1, a_uExitInfo2)  do { return VERR_SVM_IPE_1; } while (0)
+# define IEM_RETURN_SVM_NST_GST_CRX_VMEXIT(a_pVCpu, a_uExitCode, a_enmAccessCrX, a_iGReg) do { return VERR_SVM_IPE_1; } while (0)
+# define IEM_SVM_NST_GST_MSR_INTERCEPT(a_pVCpu, a_idMsr, a_fWrite)                        (VERR_SVM_IPE_1)
+
+#endif /* VBOX_WITH_NESTED_HWVIRT */
+
 
 /*********************************************************************************************************************************
 *   Global Variables                                                                                                             *
@@ -770,6 +881,56 @@ IEM_STATIC PIEMVERIFYEVTREC iemVerifyAllocRecord(PVMCPU pVCpu);
 IEM_STATIC VBOXSTRICTRC     iemVerifyFakeIOPortRead(PVMCPU pVCpu, RTIOPORT Port, uint32_t *pu32Value, size_t cbValue);
 IEM_STATIC VBOXSTRICTRC     iemVerifyFakeIOPortWrite(PVMCPU pVCpu, RTIOPORT Port, uint32_t u32Value, size_t cbValue);
 
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Checks if the intercepted IO instruction causes a \#VMEXIT and handles it
+ * accordingly.
+ *
+ * @returns VBox strict status code.
+ * @param   pVCpu           The cross context virtual CPU structure of the calling thread.
+ * @param   u16Port         The IO port being accessed.
+ * @param   enmIoType       The type of IO access.
+ * @param   cbReg           The IO operand size in bytes.
+ * @param   cAddrSizeBits   The address size bits (for 16, 32 or 64).
+ * @param   iEffSeg         The effective segment number.
+ * @param   fRep            Whether this is a repeating IO instruction (REP prefix).
+ * @param   fStrIo          Whether this is a string IO instruction.
+ * @param   cbInstr         The length of the IO instruction in bytes.
+ *
+ * @remarks This must be called only when IO instructions are intercepted by the
+ *          nested-guest hypervisor.
+ */
+IEM_STATIC VBOXSTRICTRC iemSvmHandleIOIntercept(PVMCPU pVCpu, uint16_t u16Port, SVMIOIOTYPE enmIoType, uint8_t cbReg,
+                                                uint8_t cAddrSizeBits, uint8_t iEffSeg, bool fRep, bool fStrIo, uint8_t cbInstr)
+{
+    Assert(IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IOIO_PROT));
+    Assert(cAddrSizeBits == 16 || cAddrSizeBits == 32 || cAddrSizeBits == 64);
+    Assert(cbReg == 1 || cbReg == 2 || cbReg == 4 || cbReg == 8);
+
+    static const uint32_t s_auIoOpSize[]   = { SVM_IOIO_32_BIT_OP, SVM_IOIO_8_BIT_OP, SVM_IOIO_16_BIT_OP, 0, SVM_IOIO_32_BIT_OP, 0, 0, 0 };
+    static const uint32_t s_auIoAddrSize[] = { 0, SVM_IOIO_16_BIT_ADDR, SVM_IOIO_32_BIT_ADDR, 0, SVM_IOIO_64_BIT_ADDR, 0, 0, 0 };
+
+    SVMIOIOEXITINFO IoExitInfo;
+    IoExitInfo.u         = s_auIoOpSize[cbReg & 7];
+    IoExitInfo.u        |= s_auIoAddrSize[(cAddrSizeBits >> 4) & 7];
+    IoExitInfo.n.u1STR   = fStrIo;
+    IoExitInfo.n.u1REP   = fRep;
+    IoExitInfo.n.u3SEG   = iEffSeg & 0x7;
+    IoExitInfo.n.u1Type  = enmIoType;
+    IoExitInfo.n.u16Port = u16Port;
+
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    return HMSvmNstGstHandleIOIntercept(pVCpu, pCtx, &IoExitInfo, pCtx->rip + cbInstr);
+}
+
+#else
+IEM_STATIC VBOXSTRICTRC iemSvmHandleIOIntercept(PVMCPU pVCpu, uint16_t u16Port, SVMIOIOTYPE enmIoType, uint8_t cbReg,
+                                                uint8_t cAddrSizeBits, uint8_t iEffSeg, bool fRep, bool fStrIo, uint8_t cbInstr)
+{
+    RT_NOREF(pVCpu, u16Port, enmIoType, cbReg, cAddrSizeBits, iEffSeg, fRep, fStrIo, cbInstr);
+    return VERR_IEM_IPE_9;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT */
 
 
 /**
@@ -864,15 +1025,19 @@ DECLINLINE(void) iemInitExec(PVMCPU pVCpu, bool fBypassHandlers)
     pVCpu->iem.s.uCpl               = CPUMGetGuestCPL(pVCpu);
     pVCpu->iem.s.enmCpuMode         = iemCalcCpuMode(pCtx);
 #ifdef VBOX_STRICT
-    pVCpu->iem.s.enmDefAddrMode     = (IEMMODE)0xc0fe;
-    pVCpu->iem.s.enmEffAddrMode     = (IEMMODE)0xc0fe;
-    pVCpu->iem.s.enmDefOpSize       = (IEMMODE)0xc0fe;
-    pVCpu->iem.s.enmEffOpSize       = (IEMMODE)0xc0fe;
+    pVCpu->iem.s.enmDefAddrMode     = (IEMMODE)0xfe;
+    pVCpu->iem.s.enmEffAddrMode     = (IEMMODE)0xfe;
+    pVCpu->iem.s.enmDefOpSize       = (IEMMODE)0xfe;
+    pVCpu->iem.s.enmEffOpSize       = (IEMMODE)0xfe;
     pVCpu->iem.s.fPrefixes          = 0xfeedbeef;
     pVCpu->iem.s.uRexReg            = 127;
     pVCpu->iem.s.uRexB              = 127;
     pVCpu->iem.s.uRexIndex          = 127;
     pVCpu->iem.s.iEffSeg            = 127;
+    pVCpu->iem.s.idxPrefix          = 127;
+    pVCpu->iem.s.uVex3rdReg         = 127;
+    pVCpu->iem.s.uVexLength         = 127;
+    pVCpu->iem.s.fEvexStuff         = 127;
     pVCpu->iem.s.uFpuOpcode         = UINT16_MAX;
 # ifdef IEM_WITH_CODE_TLB
     pVCpu->iem.s.offInstrNextByte   = UINT16_MAX;
@@ -983,6 +1148,10 @@ DECLINLINE(void) iemInitDecoder(PVMCPU pVCpu, bool fBypassHandlers)
     pVCpu->iem.s.uRexReg            = 0;
     pVCpu->iem.s.uRexB              = 0;
     pVCpu->iem.s.uRexIndex          = 0;
+    pVCpu->iem.s.idxPrefix          = 0;
+    pVCpu->iem.s.uVex3rdReg         = 0;
+    pVCpu->iem.s.uVexLength         = 0;
+    pVCpu->iem.s.fEvexStuff         = 0;
     pVCpu->iem.s.iEffSeg            = X86_SREG_DS;
 #ifdef IEM_WITH_CODE_TLB
     pVCpu->iem.s.pbInstrBuf         = NULL;
@@ -1074,6 +1243,10 @@ DECLINLINE(void) iemReInitDecoder(PVMCPU pVCpu)
     pVCpu->iem.s.uRexReg            = 0;
     pVCpu->iem.s.uRexB              = 0;
     pVCpu->iem.s.uRexIndex          = 0;
+    pVCpu->iem.s.idxPrefix          = 0;
+    pVCpu->iem.s.uVex3rdReg         = 0;
+    pVCpu->iem.s.uVexLength         = 0;
+    pVCpu->iem.s.fEvexStuff         = 0;
     pVCpu->iem.s.iEffSeg            = X86_SREG_DS;
 #ifdef IEM_WITH_CODE_TLB
     if (pVCpu->iem.s.pbInstrBuf)
@@ -1177,18 +1350,21 @@ IEM_STATIC VBOXSTRICTRC iemInitDecoderAndPrefetchOpcodes(PVMCPU pVCpu, bool fByp
     {
         cbToTryRead = PAGE_SIZE;
         GCPtrPC     = pCtx->rip;
-        if (!IEM_IS_CANONICAL(GCPtrPC))
+        if (IEM_IS_CANONICAL(GCPtrPC))
+            cbToTryRead = PAGE_SIZE - (GCPtrPC & PAGE_OFFSET_MASK);
+        else
             return iemRaiseGeneralProtectionFault0(pVCpu);
-        cbToTryRead = PAGE_SIZE - (GCPtrPC & PAGE_OFFSET_MASK);
     }
     else
     {
         uint32_t GCPtrPC32 = pCtx->eip;
         AssertMsg(!(GCPtrPC32 & ~(uint32_t)UINT16_MAX) || pVCpu->iem.s.enmCpuMode == IEMMODE_32BIT, ("%04x:%RX64\n", pCtx->cs.Sel, pCtx->rip));
-        if (GCPtrPC32 > pCtx->cs.u32Limit)
+        if (GCPtrPC32 <= pCtx->cs.u32Limit)
+            cbToTryRead = pCtx->cs.u32Limit - GCPtrPC32 + 1;
+        else
             return iemRaiseSelectorBounds(pVCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
-        cbToTryRead = pCtx->cs.u32Limit - GCPtrPC32 + 1;
-        if (!cbToTryRead) /* overflowed */
+        if (cbToTryRead) { /* likely */ }
+        else /* overflowed */
         {
             Assert(GCPtrPC32 == 0); Assert(pCtx->cs.u32Limit == UINT32_MAX);
             cbToTryRead = UINT32_MAX;
@@ -1213,17 +1389,20 @@ IEM_STATIC VBOXSTRICTRC iemInitDecoderAndPrefetchOpcodes(PVMCPU pVCpu, bool fByp
     RTGCPHYS    GCPhys;
     uint64_t    fFlags;
     int rc = PGMGstGetPage(pVCpu, GCPtrPC, &fFlags, &GCPhys);
-    if (RT_FAILURE(rc))
+    if (RT_SUCCESS(rc)) { /* probable */ }
+    else
     {
         Log(("iemInitDecoderAndPrefetchOpcodes: %RGv - rc=%Rrc\n", GCPtrPC, rc));
         return iemRaisePageFault(pVCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, rc);
     }
-    if (!(fFlags & X86_PTE_US) && pVCpu->iem.s.uCpl == 3)
+    if ((fFlags & X86_PTE_US) || pVCpu->iem.s.uCpl != 3) { /* likely */ }
+    else
     {
         Log(("iemInitDecoderAndPrefetchOpcodes: %RGv - supervisor page\n", GCPtrPC));
         return iemRaisePageFault(pVCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, VERR_ACCESS_DENIED);
     }
-    if ((fFlags & X86_PTE_PAE_NX) && (pCtx->msrEFER & MSR_K6_EFER_NXE))
+    if (!(fFlags & X86_PTE_PAE_NX) || !(pCtx->msrEFER & MSR_K6_EFER_NXE)) { /* likely */ }
+    else
     {
         Log(("iemInitDecoderAndPrefetchOpcodes: %RGv - NX\n", GCPtrPC));
         return iemRaisePageFault(pVCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, VERR_ACCESS_DENIED);
@@ -3033,6 +3212,29 @@ DECLINLINE(uint64_t) iemOpcodeGetNextU64Jmp(PVMCPU pVCpu)
  * @{
  */
 
+/* Currently used only with nested hw.virt. */
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Initiates a CPU shutdown sequence.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure of the
+ *                          calling thread.
+ */
+IEM_STATIC VBOXSTRICTRC iemInitiateCpuShutdown(PVMCPU pVCpu)
+{
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_SHUTDOWN))
+    {
+        Log2(("shutdown: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_SHUTDOWN, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    RT_NOREF_PV(pVCpu);
+    /** @todo Probably need a separate error code and handling for this to
+     *        distinguish it from the regular triple fault. */
+    return VINF_EM_TRIPLE_FAULT;
+}
+#endif
 
 /**
  * Validates a new SS segment.
@@ -3151,25 +3353,6 @@ IEM_STATIC VBOXSTRICTRC iemMiscValidateNewSS(PVMCPU pVCpu, PCCPUMCTX pCtx, RTSEL
  * @{
  */
 
-/** @name IEM_XCPT_FLAGS_XXX - flags for iemRaiseXcptOrInt.
- * @{ */
-/** CPU exception. */
-#define IEM_XCPT_FLAGS_T_CPU_XCPT       RT_BIT_32(0)
-/** External interrupt (from PIC, APIC, whatever). */
-#define IEM_XCPT_FLAGS_T_EXT_INT        RT_BIT_32(1)
-/** Software interrupt (int or into, not bound).
- * Returns to the following instruction */
-#define IEM_XCPT_FLAGS_T_SOFT_INT       RT_BIT_32(2)
-/** Takes an error code. */
-#define IEM_XCPT_FLAGS_ERR              RT_BIT_32(3)
-/** Takes a CR2. */
-#define IEM_XCPT_FLAGS_CR2              RT_BIT_32(4)
-/** Generated by the breakpoint instruction. */
-#define IEM_XCPT_FLAGS_BP_INSTR         RT_BIT_32(5)
-/** Generated by a DRx instruction breakpoint and RF should be cleared. */
-#define IEM_XCPT_FLAGS_DRx_INSTR_BP     RT_BIT_32(6)
-/** @}  */
-
 
 /**
  * Loads the specified stack far pointer from the TSS.
@@ -3192,7 +3375,7 @@ IEM_STATIC VBOXSTRICTRC iemRaiseLoadStackFromTss32Or16(PVMCPU pVCpu, PCCPUMCTX p
         /*
          * 16-bit TSS (X86TSS16).
          */
-        case X86_SEL_TYPE_SYS_286_TSS_AVAIL: AssertFailed();
+        case X86_SEL_TYPE_SYS_286_TSS_AVAIL: AssertFailed(); /* fall thru */
         case X86_SEL_TYPE_SYS_286_TSS_BUSY:
         {
             uint32_t off = uCpl * 4 + 2;
@@ -3219,7 +3402,7 @@ IEM_STATIC VBOXSTRICTRC iemRaiseLoadStackFromTss32Or16(PVMCPU pVCpu, PCCPUMCTX p
         /*
          * 32-bit TSS (X86TSS32).
          */
-        case X86_SEL_TYPE_SYS_386_TSS_AVAIL: AssertFailed();
+        case X86_SEL_TYPE_SYS_386_TSS_AVAIL: AssertFailed(); /* fall thru */
         case X86_SEL_TYPE_SYS_386_TSS_BUSY:
         {
             uint32_t off = uCpl * 8 + 4;
@@ -4380,6 +4563,7 @@ iemRaiseXcptOrIntInProtMode(PVMCPU      pVCpu,
 
         case X86_SEL_TYPE_SYS_286_INT_GATE:
             f32BitGate = false;
+            /* fall thru */
         case X86_SEL_TYPE_SYS_386_INT_GATE:
             fEflToClear |= X86_EFL_IF;
             break;
@@ -4520,6 +4704,7 @@ iemRaiseXcptOrIntInProtMode(PVMCPU      pVCpu,
              u8Vector, uNewEip, cbLimitCS, NewCS));
         return iemRaiseGeneralProtectionFault(pVCpu, 0);
     }
+    Log7(("iemRaiseXcptOrIntInProtMode: new EIP=%#x CS=%#x\n", uNewEip, NewCS));
 
     /* Calc the flag image to push. */
     uint32_t        fEfl    = IEMMISC_GET_EFL(pVCpu, pCtx);
@@ -4614,6 +4799,7 @@ iemRaiseXcptOrIntInProtMode(PVMCPU      pVCpu,
             uStackFrame.pu32[2] = fEfl;
             uStackFrame.pu32[3] = pCtx->esp;
             uStackFrame.pu32[4] = pCtx->ss.Sel;
+            Log7(("iemRaiseXcptOrIntInProtMode: 32-bit push SS=%#x ESP=%#x\n", pCtx->ss.Sel, pCtx->esp));
             if (fEfl & X86_EFL_VM)
             {
                 uStackFrame.pu32[1] = pCtx->cs.Sel;
@@ -4632,6 +4818,7 @@ iemRaiseXcptOrIntInProtMode(PVMCPU      pVCpu,
             uStackFrame.pu16[2] = fEfl;
             uStackFrame.pu16[3] = pCtx->sp;
             uStackFrame.pu16[4] = pCtx->ss.Sel;
+            Log7(("iemRaiseXcptOrIntInProtMode: 16-bit push SS=%#x SP=%#x\n", pCtx->ss.Sel, pCtx->sp));
             if (fEfl & X86_EFL_VM)
             {
                 uStackFrame.pu16[1] = pCtx->cs.Sel;
@@ -5063,6 +5250,67 @@ iemRaiseXcptOrInt(PVMCPU      pVCpu,
                       pCtx->cs.Sel, pCtx->rip, pCtx->ss.Sel, pCtx->rsp);
 #endif
 
+#ifdef VBOX_WITH_NESTED_HWVIRT
+    if (IEM_IS_SVM_ENABLED(pVCpu))
+    {
+        /*
+         * Handle nested-guest SVM exception and software interrupt intercepts,
+         * see AMD spec. 15.12 "Exception Intercepts".
+         *
+         *   - NMI intercepts have their own exit code and do not cause SVM_EXIT_EXCEPTION_2 #VMEXITs.
+         *   - External interrupts and software interrupts (INTn instruction) do not check the exception intercepts
+         *     even when they use a vector in the range 0 to 31.
+         *   - ICEBP should not trigger #DB intercept, but its own intercept, so we catch it early in iemOp_int1.
+         *   - For #PF exceptions, its intercept is checked before CR2 is written by the exception.
+         */
+        /* Check NMI intercept */
+        if (   u8Vector == X86_XCPT_NMI
+            && IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_NMI))
+        {
+            Log(("iemRaiseXcptOrInt: NMI intercept -> #VMEXIT\n"));
+            IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_NMI, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+        }
+
+        /* Check CPU exception intercepts. */
+        if (   IEM_IS_SVM_XCPT_INTERCEPT_SET(pVCpu, u8Vector)
+            && (fFlags & IEM_XCPT_FLAGS_T_CPU_XCPT))
+        {
+            Assert(u8Vector <= X86_XCPT_LAST);
+            uint64_t const uExitInfo1 = fFlags & IEM_XCPT_FLAGS_ERR ? uErr : 0;
+            uint64_t const uExitInfo2 = fFlags & IEM_XCPT_FLAGS_CR2 ? uCr2 : 0;
+            if (   IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSvmDecodeAssist
+                && u8Vector == X86_XCPT_PF
+                && !(uErr & X86_TRAP_PF_ID))
+            {
+                /** @todo Nested-guest SVM - figure out fetching op-code bytes from IEM. */
+#ifdef IEM_WITH_CODE_TLB
+#else
+                uint8_t const offOpCode = pVCpu->iem.s.offOpcode;
+                uint8_t const cbCurrent = pVCpu->iem.s.cbOpcode - pVCpu->iem.s.offOpcode;
+                if (   cbCurrent > 0
+                    && cbCurrent < sizeof(pCtx->hwvirt.svm.VmcbCtrl.abInstr))
+                {
+                    Assert(cbCurrent <= sizeof(pVCpu->iem.s.abOpcode));
+                    memcpy(&pCtx->hwvirt.svm.VmcbCtrl.abInstr[0], &pVCpu->iem.s.abOpcode[offOpCode], cbCurrent);
+                }
+#endif
+            }
+            Log(("iemRaiseXcptOrInt: Xcpt intercept (u8Vector=%#x uExitInfo1=%#RX64, uExitInfo2=%#RX64 -> #VMEXIT\n", u8Vector,
+                 uExitInfo1, uExitInfo2));
+            IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_EXCEPTION_0 + u8Vector, uExitInfo1, uExitInfo2);
+        }
+
+        /* Check software interrupt (INTn) intercepts. */
+        if (   IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_INTN)
+            && (fFlags & IEM_XCPT_FLAGS_T_SOFT_INT))
+        {
+            uint64_t const uExitInfo1 = IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSvmDecodeAssist ? u8Vector : 0;
+            Log(("iemRaiseXcptOrInt: Software INT intercept (u8Vector=%#x) -> #VMEXIT\n", u8Vector));
+            IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_SWINT, uExitInfo1, 0 /* uExitInfo2 */);
+        }
+    }
+#endif /* VBOX_WITH_NESTED_HWVIRT */
+
     /*
      * Do recursion accounting.
      */
@@ -5077,6 +5325,8 @@ iemRaiseXcptOrInt(PVMCPU      pVCpu,
              u8Vector, pCtx->cs.Sel, pCtx->rip, cbInstr, fFlags, uErr, uCr2, pVCpu->iem.s.uCurXcpt, pVCpu->iem.s.cXcptRecursions + 1, fPrevXcpt));
 
         /** @todo double and tripple faults. */
+        /** @todo When implementing \#DF, the SVM nested-guest \#DF intercepts needs
+         *        some care. See AMD spec. 15.12 "Exception Intercepts". */
         if (pVCpu->iem.s.cXcptRecursions >= 3)
         {
 #ifdef DEBUG_bird
@@ -5092,8 +5342,10 @@ iemRaiseXcptOrInt(PVMCPU      pVCpu,
         } */
     }
     pVCpu->iem.s.cXcptRecursions++;
-    pVCpu->iem.s.uCurXcpt = u8Vector;
-    pVCpu->iem.s.fCurXcpt = fFlags;
+    pVCpu->iem.s.uCurXcpt    = u8Vector;
+    pVCpu->iem.s.fCurXcpt    = fFlags;
+    pVCpu->iem.s.uCurXcptErr = uErr;
+    pVCpu->iem.s.uCurXcptCr2 = uCr2;
 
     /*
      * Extensive logging.
@@ -5202,6 +5454,13 @@ DECL_NO_INLINE(IEM_STATIC, VBOXSTRICTRC) iemRaiseDebugException(PVMCPU pVCpu)
 }
 
 
+/** \#BR - 05.  */
+DECL_NO_INLINE(IEM_STATIC, VBOXSTRICTRC) iemRaiseBoundRangeExceeded(PVMCPU pVCpu)
+{
+    return iemRaiseXcptOrInt(pVCpu, 0, X86_XCPT_BR, IEM_XCPT_FLAGS_T_CPU_XCPT, 0, 0);
+}
+
+
 /** \#UD - 06.  */
 DECL_NO_INLINE(IEM_STATIC, VBOXSTRICTRC) iemRaiseUndefinedOpcode(PVMCPU pVCpu)
 {
@@ -5382,6 +5641,7 @@ DECL_NO_INLINE(IEM_STATIC, VBOXSTRICTRC) iemRaisePageFault(PVMCPU pVCpu, RTGCPTR
 
         default:
             AssertMsgFailed(("%Rrc\n", rc));
+            /* fall thru */
         case VERR_ACCESS_DENIED:
             uErr = X86_TRAP_PF_P;
             break;
@@ -6454,7 +6714,8 @@ DECLINLINE(void) iemFpuActualizeStateForChange(PVMCPU pVCpu)
 
 
 /**
- * Hook for actualizing the guest XMM0..15 register state for read only.
+ * Hook for actualizing the guest XMM0..15 and MXCSR register state for read
+ * only.
  *
  * This is necessary in ring-0 and raw-mode context (nop in ring-3).
  *
@@ -6471,7 +6732,8 @@ DECLINLINE(void) iemFpuActualizeSseStateForRead(PVMCPU pVCpu)
 
 
 /**
- * Hook for actualizing the guest XMM0..15 register state for read+write.
+ * Hook for actualizing the guest XMM0..15 and MXCSR register state for
+ * read+write.
  *
  * This is necessary in ring-0 and raw-mode context (nop in ring-3).
  *
@@ -8336,7 +8598,7 @@ iemMemMap(PVMCPU pVCpu, void **ppvMem, size_t cbMem, uint8_t iSegReg, RTGCPTR GC
     /*
      * Check the input and figure out which mapping entry to use.
      */
-    Assert(cbMem <= 64 || cbMem == 512 || cbMem == 108 || cbMem == 104 || cbMem == 94); /* 512 is the max! */
+    Assert(cbMem <= 64 || cbMem == 512 || cbMem == 256 || cbMem == 108 || cbMem == 104 || cbMem == 94); /* 512 is the max! */
     Assert(~(fAccess & ~(IEM_ACCESS_TYPE_MASK | IEM_ACCESS_WHAT_MASK)));
     Assert(pVCpu->iem.s.cActiveMappings < RT_ELEMENTS(pVCpu->iem.s.aMemMappings));
 
@@ -9085,14 +9347,15 @@ DECL_NO_INLINE(IEM_STATIC, void) iemMemFetchDataR80Jmp(PVMCPU pVCpu, PRTFLOAT80U
  *                              this access.  The base and limits are checked.
  * @param   GCPtrMem            The address of the guest memory.
  */
-IEM_STATIC VBOXSTRICTRC iemMemFetchDataU128(PVMCPU pVCpu, uint128_t *pu128Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+IEM_STATIC VBOXSTRICTRC iemMemFetchDataU128(PVMCPU pVCpu, PRTUINT128U pu128Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
 {
     /* The lazy approach for now... */
-    uint128_t const *pu128Src;
+    PCRTUINT128U pu128Src;
     VBOXSTRICTRC rc = iemMemMap(pVCpu, (void **)&pu128Src, sizeof(*pu128Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
     if (rc == VINF_SUCCESS)
     {
-        *pu128Dst = *pu128Src;
+        pu128Dst->au64[0] = pu128Src->au64[0];
+        pu128Dst->au64[1] = pu128Src->au64[1];
         rc = iemMemCommitAndUnmap(pVCpu, (void *)pu128Src, IEM_ACCESS_DATA_R);
     }
     return rc;
@@ -9109,11 +9372,12 @@ IEM_STATIC VBOXSTRICTRC iemMemFetchDataU128(PVMCPU pVCpu, uint128_t *pu128Dst, u
  *                              this access.  The base and limits are checked.
  * @param   GCPtrMem            The address of the guest memory.
  */
-IEM_STATIC void iemMemFetchDataU128Jmp(PVMCPU pVCpu, uint128_t *pu128Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+IEM_STATIC void iemMemFetchDataU128Jmp(PVMCPU pVCpu, PRTUINT128U pu128Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
 {
     /* The lazy approach for now... */
-    uint128_t const *pu128Src = (uint128_t const *)iemMemMapJmp(pVCpu, sizeof(*pu128Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
-    *pu128Dst = *pu128Src;
+    PCRTUINT128U pu128Src = (PCRTUINT128U)iemMemMapJmp(pVCpu, sizeof(*pu128Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
+    pu128Dst->au64[0] = pu128Src->au64[0];
+    pu128Dst->au64[1] = pu128Src->au64[1];
     iemMemCommitAndUnmapJmp(pVCpu, (void *)pu128Src, IEM_ACCESS_DATA_R);
 }
 #endif
@@ -9132,19 +9396,20 @@ IEM_STATIC void iemMemFetchDataU128Jmp(PVMCPU pVCpu, uint128_t *pu128Dst, uint8_
  *                              this access.  The base and limits are checked.
  * @param   GCPtrMem            The address of the guest memory.
  */
-IEM_STATIC VBOXSTRICTRC iemMemFetchDataU128AlignedSse(PVMCPU pVCpu, uint128_t *pu128Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+IEM_STATIC VBOXSTRICTRC iemMemFetchDataU128AlignedSse(PVMCPU pVCpu, PRTUINT128U pu128Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
 {
     /* The lazy approach for now... */
     /** @todo testcase: Ordering of \#SS(0) vs \#GP() vs \#PF on SSE stuff. */
     if (   (GCPtrMem & 15)
-        && !(IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.MXCSR & X86_MXSCR_MM)) /** @todo should probably check this *after* applying seg.u64Base... Check real HW. */
+        && !(IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.MXCSR & X86_MXCSR_MM)) /** @todo should probably check this *after* applying seg.u64Base... Check real HW. */
         return iemRaiseGeneralProtectionFault0(pVCpu);
 
-    uint128_t const *pu128Src;
+    PCRTUINT128U pu128Src;
     VBOXSTRICTRC rc = iemMemMap(pVCpu, (void **)&pu128Src, sizeof(*pu128Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
     if (rc == VINF_SUCCESS)
     {
-        *pu128Dst = *pu128Src;
+        pu128Dst->au64[0] = pu128Src->au64[0];
+        pu128Dst->au64[1] = pu128Src->au64[1];
         rc = iemMemCommitAndUnmap(pVCpu, (void *)pu128Src, IEM_ACCESS_DATA_R);
     }
     return rc;
@@ -9164,16 +9429,16 @@ IEM_STATIC VBOXSTRICTRC iemMemFetchDataU128AlignedSse(PVMCPU pVCpu, uint128_t *p
  *                              this access.  The base and limits are checked.
  * @param   GCPtrMem            The address of the guest memory.
  */
-DECL_NO_INLINE(IEM_STATIC, void) iemMemFetchDataU128AlignedSseJmp(PVMCPU pVCpu, uint128_t *pu128Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+DECL_NO_INLINE(IEM_STATIC, void) iemMemFetchDataU128AlignedSseJmp(PVMCPU pVCpu, PRTUINT128U pu128Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
 {
     /* The lazy approach for now... */
     /** @todo testcase: Ordering of \#SS(0) vs \#GP() vs \#PF on SSE stuff. */
     if (   (GCPtrMem & 15) == 0
-        || (IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.MXCSR & X86_MXSCR_MM)) /** @todo should probably check this *after* applying seg.u64Base... Check real HW. */
+        || (IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.MXCSR & X86_MXCSR_MM)) /** @todo should probably check this *after* applying seg.u64Base... Check real HW. */
     {
-        uint128_t const *pu128Src = (uint128_t const *)iemMemMapJmp(pVCpu, sizeof(*pu128Src), iSegReg, GCPtrMem,
-                                                                    IEM_ACCESS_DATA_R);
-        *pu128Dst = *pu128Src;
+        PCRTUINT128U pu128Src = (PCRTUINT128U)iemMemMapJmp(pVCpu, sizeof(*pu128Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
+        pu128Dst->au64[0] = pu128Src->au64[0];
+        pu128Dst->au64[1] = pu128Src->au64[1];
         iemMemCommitAndUnmapJmp(pVCpu, (void *)pu128Src, IEM_ACCESS_DATA_R);
         return;
     }
@@ -9444,14 +9709,15 @@ IEM_STATIC void iemMemStoreDataU64Jmp(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCP
  * @param   GCPtrMem            The address of the guest memory.
  * @param   u128Value            The value to store.
  */
-IEM_STATIC VBOXSTRICTRC iemMemStoreDataU128(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint128_t u128Value)
+IEM_STATIC VBOXSTRICTRC iemMemStoreDataU128(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, RTUINT128U u128Value)
 {
     /* The lazy approach for now... */
-    uint128_t *pu128Dst;
+    PRTUINT128U pu128Dst;
     VBOXSTRICTRC rc = iemMemMap(pVCpu, (void **)&pu128Dst, sizeof(*pu128Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
     if (rc == VINF_SUCCESS)
     {
-        *pu128Dst = u128Value;
+        pu128Dst->au64[0] = u128Value.au64[0];
+        pu128Dst->au64[1] = u128Value.au64[1];
         rc = iemMemCommitAndUnmap(pVCpu, pu128Dst, IEM_ACCESS_DATA_W);
     }
     return rc;
@@ -9468,11 +9734,12 @@ IEM_STATIC VBOXSTRICTRC iemMemStoreDataU128(PVMCPU pVCpu, uint8_t iSegReg, RTGCP
  * @param   GCPtrMem            The address of the guest memory.
  * @param   u128Value            The value to store.
  */
-IEM_STATIC void iemMemStoreDataU128Jmp(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint128_t u128Value)
+IEM_STATIC void iemMemStoreDataU128Jmp(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, RTUINT128U u128Value)
 {
     /* The lazy approach for now... */
-    uint128_t *pu128Dst = (uint128_t *)iemMemMapJmp(pVCpu, sizeof(*pu128Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
-    *pu128Dst = u128Value;
+    PRTUINT128U pu128Dst = (PRTUINT128U)iemMemMapJmp(pVCpu, sizeof(*pu128Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
+    pu128Dst->au64[0] = u128Value.au64[0];
+    pu128Dst->au64[1] = u128Value.au64[1];
     iemMemCommitAndUnmapJmp(pVCpu, pu128Dst, IEM_ACCESS_DATA_W);
 }
 #endif
@@ -9488,18 +9755,19 @@ IEM_STATIC void iemMemStoreDataU128Jmp(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GC
  * @param   GCPtrMem            The address of the guest memory.
  * @param   u128Value           The value to store.
  */
-IEM_STATIC VBOXSTRICTRC iemMemStoreDataU128AlignedSse(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint128_t u128Value)
+IEM_STATIC VBOXSTRICTRC iemMemStoreDataU128AlignedSse(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, RTUINT128U u128Value)
 {
     /* The lazy approach for now... */
     if (   (GCPtrMem & 15)
-        && !(IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.MXCSR & X86_MXSCR_MM)) /** @todo should probably check this *after* applying seg.u64Base... Check real HW. */
+        && !(IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.MXCSR & X86_MXCSR_MM)) /** @todo should probably check this *after* applying seg.u64Base... Check real HW. */
         return iemRaiseGeneralProtectionFault0(pVCpu);
 
-    uint128_t *pu128Dst;
+    PRTUINT128U pu128Dst;
     VBOXSTRICTRC rc = iemMemMap(pVCpu, (void **)&pu128Dst, sizeof(*pu128Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
     if (rc == VINF_SUCCESS)
     {
-        *pu128Dst = u128Value;
+        pu128Dst->au64[0] = u128Value.au64[0];
+        pu128Dst->au64[1] = u128Value.au64[1];
         rc = iemMemCommitAndUnmap(pVCpu, pu128Dst, IEM_ACCESS_DATA_W);
     }
     return rc;
@@ -9518,14 +9786,15 @@ IEM_STATIC VBOXSTRICTRC iemMemStoreDataU128AlignedSse(PVMCPU pVCpu, uint8_t iSeg
  * @param   u128Value           The value to store.
  */
 DECL_NO_INLINE(IEM_STATIC, void)
-iemMemStoreDataU128AlignedSseJmp(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint128_t u128Value)
+iemMemStoreDataU128AlignedSseJmp(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, RTUINT128U u128Value)
 {
     /* The lazy approach for now... */
     if (   (GCPtrMem & 15) == 0
-        || (IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.MXCSR & X86_MXSCR_MM)) /** @todo should probably check this *after* applying seg.u64Base... Check real HW. */
+        || (IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.MXCSR & X86_MXCSR_MM)) /** @todo should probably check this *after* applying seg.u64Base... Check real HW. */
     {
-        uint128_t *pu128Dst = (uint128_t *)iemMemMapJmp(pVCpu, sizeof(*pu128Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
-        *pu128Dst = u128Value;
+        PRTUINT128U pu128Dst = (PRTUINT128U)iemMemMapJmp(pVCpu, sizeof(*pu128Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
+        pu128Dst->au64[0] = u128Value.au64[0];
+        pu128Dst->au64[1] = u128Value.au64[1];
         iemMemCommitAndUnmapJmp(pVCpu, pu128Dst, IEM_ACCESS_DATA_W);
         return;
     }
@@ -9550,11 +9819,18 @@ iemMemStoreDataU128AlignedSseJmp(PVMCPU pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem
 IEM_STATIC VBOXSTRICTRC
 iemMemStoreDataXdtr(PVMCPU pVCpu, uint16_t cbLimit, RTGCPTR GCPtrBase, uint8_t iSegReg, RTGCPTR GCPtrMem)
 {
+    VBOXSTRICTRC rcStrict;
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IDTR_READS))
+    {
+        Log(("sidt/sgdt: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_IDTR_READ, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     /*
      * The SIDT and SGDT instructions actually stores the data using two
      * independent writes.  The instructions does not respond to opsize prefixes.
      */
-    VBOXSTRICTRC rcStrict = iemMemStoreDataU16(pVCpu, iSegReg, GCPtrMem, cbLimit);
+    rcStrict = iemMemStoreDataU16(pVCpu, iSegReg, GCPtrMem, cbLimit);
     if (rcStrict == VINF_SUCCESS)
     {
         if (pVCpu->iem.s.enmCpuMode == IEMMODE_16BIT)
@@ -10268,14 +10544,15 @@ iemMemFetchSelDescWithErr(PVMCPU pVCpu, PIEMSELDESC pDesc, uint16_t uSel, uint8_
         rcStrict = iemMemFetchSysU64(pVCpu, &pDesc->Legacy.u, UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK));
     else
     {
-        rcStrict = iemMemFetchSysU16(pVCpu, &pDesc->Legacy.au16[0], UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK) + 0);
-        if (rcStrict != VINF_SUCCESS)
-            return rcStrict;
-        rcStrict = iemMemFetchSysU16(pVCpu, &pDesc->Legacy.au16[1], UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK) + 2);
-        if (rcStrict != VINF_SUCCESS)
+        rcStrict     = iemMemFetchSysU16(pVCpu, &pDesc->Legacy.au16[0], UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK) + 0);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemFetchSysU16(pVCpu, &pDesc->Legacy.au16[1], UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK) + 2);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemFetchSysU16(pVCpu, &pDesc->Legacy.au16[2], UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK) + 4);
+        if (rcStrict == VINF_SUCCESS)
+            pDesc->Legacy.au16[3] = 0;
+        else
             return rcStrict;
-        rcStrict = iemMemFetchSysU16(pVCpu, &pDesc->Legacy.au16[2], UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK) + 4);
-        pDesc->Legacy.au16[3] = 0;
     }
 
     if (rcStrict == VINF_SUCCESS)
@@ -10440,6 +10717,15 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
         if ((pVCpu)->iem.s.CTX_SUFF(pCtx)->CTX_SUFF(pXState)->x87.FSW & X86_FSW_ES) \
             return iemRaiseMathFault(pVCpu); \
     } while (0)
+#define IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT() \
+    do { \
+        if (   (IEM_GET_CTX(pVCpu)->cr0 & X86_CR0_EM) \
+            || !(IEM_GET_CTX(pVCpu)->cr4 & X86_CR4_OSFXSR) \
+            || !IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse3) \
+            return iemRaiseUndefinedOpcode(pVCpu); \
+        if (IEM_GET_CTX(pVCpu)->cr0 & X86_CR0_TS) \
+            return iemRaiseDeviceNotAvailable(pVCpu); \
+    } while (0)
 #define IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT() \
     do { \
         if (   (IEM_GET_CTX(pVCpu)->cr0 & X86_CR0_EM) \
@@ -10480,6 +10766,11 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
         if (pVCpu->iem.s.uCpl != 0) \
             return iemRaiseGeneralProtectionFault0(pVCpu); \
     } while (0)
+#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
+    do { \
+        if (!((a_EffAddr) & ((a_cbAlign) - 1))) { /* likely */ } \
+        else return iemRaiseGeneralProtectionFault0(pVCpu); \
+    } while (0)
 
 
 #define IEM_MC_LOCAL(a_Type, a_Name)                    a_Type a_Name
@@ -10655,13 +10946,19 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
         (a_pu32Dst) = ((uint32_t const *)&IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aRegs[(a_iMReg)].mmx)
 
 #define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
-    do { (a_u128Value) = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].xmm; } while (0)
+    do { (a_u128Value).au64[0] = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[0]; \
+         (a_u128Value).au64[1] = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[1]; \
+    } while (0)
 #define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg) \
     do { (a_u64Value) = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[0]; } while (0)
 #define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg) \
     do { (a_u32Value) = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au32[0]; } while (0)
+#define IEM_MC_FETCH_XREG_HI_U64(a_u64Value, a_iXReg) \
+    do { (a_u64Value) = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[1]; } while (0)
 #define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
-    do { IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].xmm = (a_u128Value); } while (0)
+    do { IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[0] = (a_u128Value).au64[0]; \
+         IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[1] = (a_u128Value).au64[1]; \
+    } while (0)
 #define IEM_MC_STORE_XREG_U64(a_iXReg, a_u64Value) \
     do { IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[0] = (a_u64Value); } while (0)
 #define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
@@ -10675,14 +10972,17 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
          IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[1] = 0; \
     } while (0)
 #define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg)       \
-    (a_pu128Dst) = (&IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].xmm)
+    (a_pu128Dst) = (&IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].uXmm)
 #define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
-    (a_pu128Dst) = ((uint128_t const *)&IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].xmm)
+    (a_pu128Dst) = ((PCRTUINT128U)&IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].uXmm)
 #define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
     (a_pu64Dst) = ((uint64_t const *)&IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXReg)].au64[0])
 #define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
-    do { IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXRegDst)].xmm \
-            = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXRegSrc)].xmm; } while (0)
+    do { IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXRegDst)].au64[0] \
+            = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXRegSrc)].au64[0]; \
+         IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXRegDst)].au64[1] \
+            = IEM_GET_CTX(pVCpu)->CTX_SUFF(pXState)->x87.aXMM[(a_iXRegSrc)].au64[1]; \
+    } while (0)
 
 #ifndef IEM_WITH_SETJMP
 # define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
@@ -11235,7 +11535,7 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
     iemFpuUpdateFSWWithMemOpThenPop(pVCpu, a_u16FSW, a_iEffSeg, a_GCPtrEff)
 /** Updates the FSW, FOP, FPUIP, and FPUCS, and then pops the stack twice. */
 #define IEM_MC_UPDATE_FSW_THEN_POP_POP(a_u16FSW) \
-    iemFpuUpdateFSWThenPop(pVCpu, a_u16FSW)
+    iemFpuUpdateFSWThenPopPop(pVCpu, a_u16FSW)
 
 /** Raises a FPU stack underflow exception.  Sets FPUIP, FPUCS and FOP. */
 #define IEM_MC_FPU_STACK_UNDERFLOW(a_iStDst) \
@@ -11286,9 +11586,9 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
  * Ensures that we can use the host SSE/FPU in the current context (RC+R0.
  * Ensures the guest SSE state in the CPUMCTX is up to date. */
 #define IEM_MC_PREPARE_SSE_USAGE()              iemFpuPrepareUsageSse(pVCpu)
-/** Actualizes the guest XMM0..15 register state for read-only access. */
+/** Actualizes the guest XMM0..15 and MXCSR register state for read-only access. */
 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ()   iemFpuActualizeSseStateForRead(pVCpu)
-/** Actualizes the guest XMM0..15 register state for read-write access. */
+/** Actualizes the guest XMM0..15 and MXCSR register state for read-write access. */
 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() iemFpuActualizeSseStateForChange(pVCpu)
 
 /**
@@ -11424,18 +11724,112 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
 /** @name   Opcode Debug Helpers.
  * @{
  */
-#ifdef DEBUG
-# define IEMOP_MNEMONIC(a_szMnemonic) \
-    Log4(("decode - %04x:%RGv %s%s [#%u]\n", IEM_GET_CTX(pVCpu)->cs.Sel, IEM_GET_CTX(pVCpu)->rip, \
-          pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK ? "lock " : "", a_szMnemonic, pVCpu->iem.s.cInstructions))
-# define IEMOP_MNEMONIC2(a_szMnemonic, a_szOps) \
-    Log4(("decode - %04x:%RGv %s%s %s [#%u]\n", IEM_GET_CTX(pVCpu)->cs.Sel, IEM_GET_CTX(pVCpu)->rip, \
-          pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK ? "lock " : "", a_szMnemonic, a_szOps, pVCpu->iem.s.cInstructions))
+#ifdef VBOX_WITH_STATISTICS
+# define IEMOP_INC_STATS(a_Stats) do { pVCpu->iem.s.CTX_SUFF(pStats)->a_Stats += 1; } while (0)
 #else
-# define IEMOP_MNEMONIC(a_szMnemonic) do { } while (0)
-# define IEMOP_MNEMONIC2(a_szMnemonic, a_szOps) do { } while (0)
+# define IEMOP_INC_STATS(a_Stats) do { } while (0)
 #endif
 
+#ifdef DEBUG
+# define IEMOP_MNEMONIC(a_Stats, a_szMnemonic) \
+    do { \
+        IEMOP_INC_STATS(a_Stats); \
+        Log4(("decode - %04x:%RGv %s%s [#%u]\n", IEM_GET_CTX(pVCpu)->cs.Sel, IEM_GET_CTX(pVCpu)->rip, \
+              pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK ? "lock " : "", a_szMnemonic, pVCpu->iem.s.cInstructions)); \
+    } while (0)
+
+# define IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints) \
+    do { \
+        IEMOP_MNEMONIC(a_Stats, a_szMnemonic); \
+        (void)RT_CONCAT(IEMOPFORM_, a_Form); \
+        (void)RT_CONCAT(OP_,a_Upper); \
+        (void)(a_fDisHints); \
+        (void)(a_fIemHints); \
+    } while (0)
+
+# define IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints) \
+    do { \
+        IEMOP_MNEMONIC(a_Stats, a_szMnemonic); \
+        (void)RT_CONCAT(IEMOPFORM_, a_Form); \
+        (void)RT_CONCAT(OP_,a_Upper); \
+        (void)RT_CONCAT(OP_PARM_,a_Op1); \
+        (void)(a_fDisHints); \
+        (void)(a_fIemHints); \
+    } while (0)
+
+# define IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) \
+    do { \
+        IEMOP_MNEMONIC(a_Stats, a_szMnemonic); \
+        (void)RT_CONCAT(IEMOPFORM_, a_Form); \
+        (void)RT_CONCAT(OP_,a_Upper); \
+        (void)RT_CONCAT(OP_PARM_,a_Op1); \
+        (void)RT_CONCAT(OP_PARM_,a_Op2); \
+        (void)(a_fDisHints); \
+        (void)(a_fIemHints); \
+    } while (0)
+
+# define IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) \
+    do { \
+        IEMOP_MNEMONIC(a_Stats, a_szMnemonic); \
+        (void)RT_CONCAT(IEMOPFORM_, a_Form); \
+        (void)RT_CONCAT(OP_,a_Upper); \
+        (void)RT_CONCAT(OP_PARM_,a_Op1); \
+        (void)RT_CONCAT(OP_PARM_,a_Op2); \
+        (void)RT_CONCAT(OP_PARM_,a_Op3); \
+        (void)(a_fDisHints); \
+        (void)(a_fIemHints); \
+    } while (0)
+
+# define IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints) \
+    do { \
+        IEMOP_MNEMONIC(a_Stats, a_szMnemonic); \
+        (void)RT_CONCAT(IEMOPFORM_, a_Form); \
+        (void)RT_CONCAT(OP_,a_Upper); \
+        (void)RT_CONCAT(OP_PARM_,a_Op1); \
+        (void)RT_CONCAT(OP_PARM_,a_Op2); \
+        (void)RT_CONCAT(OP_PARM_,a_Op3); \
+        (void)RT_CONCAT(OP_PARM_,a_Op4); \
+        (void)(a_fDisHints); \
+        (void)(a_fIemHints); \
+    } while (0)
+
+#else
+# define IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
+
+# define IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints) \
+         IEMOP_MNEMONIC(a_Stats, a_szMnemonic)
+# define IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints) \
+         IEMOP_MNEMONIC(a_Stats, a_szMnemonic)
+# define IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) \
+         IEMOP_MNEMONIC(a_Stats, a_szMnemonic)
+# define IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) \
+         IEMOP_MNEMONIC(a_Stats, a_szMnemonic)
+# define IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints) \
+         IEMOP_MNEMONIC(a_Stats, a_szMnemonic)
+
+#endif
+
+#define IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints) \
+    IEMOP_MNEMONIC0EX(a_Lower, \
+                      #a_Lower, \
+                      a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
+#define IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints) \
+    IEMOP_MNEMONIC1EX(RT_CONCAT3(a_Lower,_,a_Op1), \
+                      #a_Lower " " #a_Op1, \
+                      a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
+#define IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) \
+    IEMOP_MNEMONIC2EX(RT_CONCAT5(a_Lower,_,a_Op1,_,a_Op2), \
+                      #a_Lower " " #a_Op1 "," #a_Op2, \
+                      a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
+#define IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) \
+    IEMOP_MNEMONIC3EX(RT_CONCAT7(a_Lower,_,a_Op1,_,a_Op2,_,a_Op3), \
+                      #a_Lower " " #a_Op1 "," #a_Op2 "," #a_Op3, \
+                      a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
+#define IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints) \
+    IEMOP_MNEMONIC4EX(RT_CONCAT9(a_Lower,_,a_Op1,_,a_Op2,_,a_Op3,_,a_Op4), \
+                      #a_Lower " " #a_Op1 "," #a_Op2 "," #a_Op3 "," #a_Op4, \
+                      a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
+
 /** @} */
 
 
@@ -11515,8 +11909,8 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
 #define IEMOP_HLP_NO_REAL_OR_V86_MODE() \
     do \
     { \
-        if (IEM_IS_REAL_OR_V86_MODE(pVCpu)) \
-            return IEMOP_RAISE_INVALID_LOCK_PREFIX(); \
+        if (!IEM_IS_REAL_OR_V86_MODE(pVCpu)) { /* likely */ } \
+        else return IEMOP_RAISE_INVALID_OPCODE(); \
     } while (0)
 
 /** The instruction is not available in 64-bit mode, throw \#UD if we're in
@@ -11590,6 +11984,7 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
         else \
             return IEMOP_RAISE_INVALID_LOCK_PREFIX(); \
     } while (0)
+
 #define IEMOP_HLP_DECODED_NL_1(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_fDisOpType) \
     do \
     { \
@@ -11628,6 +12023,64 @@ IEM_STATIC VBOXSTRICTRC iemMemMarkSelDescAccessed(PVMCPU pVCpu, uint16_t uSel)
 
 
 /**
+ * Done decoding VEX.
+ * Raises \#UD exception if rex, rep, opsize or lock prefixes are present, or if
+ * we're in real or v8086 mode.
+ */
+#define IEMOP_HLP_DONE_VEX_DECODING() \
+    do \
+    { \
+        if (RT_LIKELY(   !(  pVCpu->iem.s.fPrefixes \
+                           & (IEM_OP_PRF_LOCK | IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REX)) \
+                      && !IEM_IS_REAL_OR_V86_MODE(pVCpu) )) \
+        { /* likely */ } \
+        else \
+            return IEMOP_RAISE_INVALID_OPCODE(); \
+    } while (0)
+
+/**
+ * Done decoding VEX, no V, no L.
+ * Raises \#UD exception if rex, rep, opsize or lock prefixes are present, if
+ * we're in real or v8086 mode, if VEX.V!=0xf, or if VEX.L!=0.
+ */
+#define IEMOP_HLP_DONE_VEX_DECODING_L_ZERO_NO_VVV() \
+    do \
+    { \
+        if (RT_LIKELY(   !(  pVCpu->iem.s.fPrefixes \
+                           & (IEM_OP_PRF_LOCK | IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REX)) \
+                      && pVCpu->iem.s.uVexLength == 0 \
+                      && pVCpu->iem.s.uVex3rdReg == 0 \
+                      && !IEM_IS_REAL_OR_V86_MODE(pVCpu))) \
+        { /* likely */ } \
+        else \
+            return IEMOP_RAISE_INVALID_OPCODE(); \
+    } while (0)
+
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/** Check and handles SVM nested-guest control & instruction intercept. */
+# define IEMOP_HLP_SVM_CTRL_INTERCEPT(a_pVCpu, a_Intercept, a_uExitCode, a_uExitInfo1, a_uExitInfo2) \
+    do \
+    { \
+        if (IEM_IS_SVM_CTRL_INTERCEPT_SET(a_pVCpu, a_Intercept)) \
+            IEM_RETURN_SVM_NST_GST_VMEXIT(a_pVCpu, a_uExitCode, a_uExitInfo1, a_uExitInfo2); \
+    } while (0)
+
+/** Check and handle SVM nested-guest CR0 read intercept. */
+# define IEMOP_HLP_SVM_READ_CR_INTERCEPT(a_pVCpu, a_uCr, a_uExitInfo1, a_uExitInfo2) \
+    do \
+    { \
+        if (IEM_IS_SVM_READ_CR_INTERCEPT_SET(a_pVCpu, a_uCr)) \
+            IEM_RETURN_SVM_NST_GST_VMEXIT(a_pVCpu, SVM_EXIT_READ_CR0 + (a_uCr), a_uExitInfo1, a_uExitInfo2); \
+    } while (0)
+
+#else
+# define IEMOP_HLP_SVM_CTRL_INTERCEPT(a_pVCpu, a_Intercept, a_uExitCode, a_uExitInfo1, a_uExitInfo2)    do { } while (0)
+# define IEMOP_HLP_SVM_READ_CR_INTERCEPT(a_pVCpu, a_uCr, a_uExitInfo1, a_uExitInfo2)                    do { } while (0)
+
+#endif /* VBOX_WITH_NESTED_HWVIRT */
+
+
+/**
  * Calculates the effective address of a ModR/M memory operand.
  *
  * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
@@ -14755,6 +15208,133 @@ VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedXsetbv(PVMCPU pVCpu, uint8_t cbInstr)
     return iemUninitExecAndFiddleStatusAndMaybeReenter(pVCpu, rcStrict);
 }
 
+
+/**
+ * Checks if IEM is in the process of delivering an event (interrupt or
+ * exception).
+ *
+ * @returns true if we're in the process of raising an interrupt or exception,
+ *          false otherwise.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   puVector        Where to store the vector associated with the
+ *                          currently delivered event, optional.
+ * @param   pfFlags         Where to store th event delivery flags (see
+ *                          IEM_XCPT_FLAGS_XXX), optional.
+ * @param   puErr           Where to store the error code associated with the
+ *                          event, optional.
+ * @param   puCr2           Where to store the CR2 associated with the event,
+ *                          optional.
+ */
+VMM_INT_DECL(bool) IEMGetCurrentXcpt(PVMCPU pVCpu, uint8_t *puVector, uint32_t *pfFlags, uint32_t *puErr, uint64_t *puCr2)
+{
+    bool const fRaisingXcpt = pVCpu->iem.s.cXcptRecursions > 0;
+    if (fRaisingXcpt)
+    {
+        if (puVector)
+            *puVector = pVCpu->iem.s.uCurXcpt;
+        if (pfFlags)
+            *pfFlags = pVCpu->iem.s.fCurXcpt;
+        /* The caller should check the flags to determine if the error code & CR2 are valid for the event. */
+        if (puErr)
+            *puErr = pVCpu->iem.s.uCurXcptErr;
+        if (puCr2)
+            *puCr2 = pVCpu->iem.s.uCurXcptCr2;
+    }
+    return fRaisingXcpt;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Interface for HM and EM to emulate the STGI instruction.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   cbInstr     The instruction length in bytes.
+ * @thread  EMT(pVCpu)
+ */
+VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedClgi(PVMCPU pVCpu, uint8_t cbInstr)
+{
+    IEMEXEC_ASSERT_INSTR_LEN_RETURN(cbInstr, 3);
+
+    iemInitExec(pVCpu, false /*fBypassHandlers*/);
+    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_0(iemCImpl_clgi);
+    return iemUninitExecAndFiddleStatusAndMaybeReenter(pVCpu, rcStrict);
+}
+
+
+/**
+ * Interface for HM and EM to emulate the STGI instruction.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   cbInstr     The instruction length in bytes.
+ * @thread  EMT(pVCpu)
+ */
+VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedStgi(PVMCPU pVCpu, uint8_t cbInstr)
+{
+    IEMEXEC_ASSERT_INSTR_LEN_RETURN(cbInstr, 3);
+
+    iemInitExec(pVCpu, false /*fBypassHandlers*/);
+    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_0(iemCImpl_stgi);
+    return iemUninitExecAndFiddleStatusAndMaybeReenter(pVCpu, rcStrict);
+}
+
+
+/**
+ * Interface for HM and EM to emulate the VMLOAD instruction.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   cbInstr     The instruction length in bytes.
+ * @thread  EMT(pVCpu)
+ */
+VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedVmload(PVMCPU pVCpu, uint8_t cbInstr)
+{
+    IEMEXEC_ASSERT_INSTR_LEN_RETURN(cbInstr, 3);
+
+    iemInitExec(pVCpu, false /*fBypassHandlers*/);
+    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_0(iemCImpl_vmload);
+    return iemUninitExecAndFiddleStatusAndMaybeReenter(pVCpu, rcStrict);
+}
+
+
+/**
+ * Interface for HM and EM to emulate the VMSAVE instruction.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   cbInstr     The instruction length in bytes.
+ * @thread  EMT(pVCpu)
+ */
+VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedVmsave(PVMCPU pVCpu, uint8_t cbInstr)
+{
+    IEMEXEC_ASSERT_INSTR_LEN_RETURN(cbInstr, 3);
+
+    iemInitExec(pVCpu, false /*fBypassHandlers*/);
+    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_0(iemCImpl_vmsave);
+    return iemUninitExecAndFiddleStatusAndMaybeReenter(pVCpu, rcStrict);
+}
+
+
+/**
+ * Interface for HM and EM to emulate the INVLPGA instruction.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   cbInstr     The instruction length in bytes.
+ * @thread  EMT(pVCpu)
+ */
+VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedInvlpga(PVMCPU pVCpu, uint8_t cbInstr)
+{
+    IEMEXEC_ASSERT_INSTR_LEN_RETURN(cbInstr, 3);
+
+    iemInitExec(pVCpu, false /*fBypassHandlers*/);
+    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_0(iemCImpl_invlpga);
+    return iemUninitExecAndFiddleStatusAndMaybeReenter(pVCpu, rcStrict);
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT */
+
 #ifdef IN_RING3
 
 /**
diff --git a/src/VBox/VMM/VMMAll/IEMAllAImpl.asm b/src/VBox/VMM/VMMAll/IEMAllAImpl.asm
index 320d6df..53848fa 100644
--- a/src/VBox/VMM/VMMAll/IEMAllAImpl.asm
+++ b/src/VBox/VMM/VMMAll/IEMAllAImpl.asm
@@ -719,6 +719,8 @@ ENDPROC iemAImpl_xadd_u64_locked
 ; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
 ;                                             uint32_t *pEFlags));
 ;
+; Note! Identical to iemAImpl_cmpxchg16b.
+;
 BEGINCODE
 BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
 %ifdef RT_ARCH_AMD64
@@ -800,6 +802,74 @@ BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
         jmp     NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
 ENDPROC iemAImpl_cmpxchg8b_locked
 
+%ifdef RT_ARCH_AMD64
+
+;
+; CMPXCHG16B.
+;
+; These are tricky register wise, so the code is duplicated for each calling
+; convention.
+;
+; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
+;
+; C-proto:
+; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
+;                                              uint32_t *pEFlags));
+;
+; Note! Identical to iemAImpl_cmpxchg8b.
+;
+BEGINCODE
+BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
+ %ifdef ASM_CALL64_MSC
+        push    rbx
+
+        mov     r11, rdx                ; pu64RaxRdx (is also T1)
+        mov     r10, rcx                ; pu64Dst
+
+        mov     rbx, [r8]
+        mov     rcx, [r8 + 8]
+        IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
+        mov     rax, [r11]
+        mov     rdx, [r11 + 8]
+
+        lock cmpxchg16b [r10]
+
+        mov     [r11], rax
+        mov     [r11 + 8], rdx
+        IEM_SAVE_FLAGS       r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
+
+        pop     rbx
+        ret
+ %else
+        push    rbx
+
+        mov     r10, rcx                ; pEFlags
+        mov     r11, rdx                ; pu64RbxRcx (is also T1)
+
+        mov     rbx, [r11]
+        mov     rcx, [r11 + 8]
+        IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
+        mov     rax, [rsi]
+        mov     rdx, [rsi + 8]
+
+        lock cmpxchg16b [rdi]
+
+        mov     [rsi], eax
+        mov     [rsi + 8], edx
+        IEM_SAVE_FLAGS       r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
+
+        pop     rbx
+        ret
+
+ %endif
+ENDPROC iemAImpl_cmpxchg16b
+
+BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
+        ; Lazy bird always lock prefixes cmpxchg8b.
+        jmp     NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
+ENDPROC iemAImpl_cmpxchg16b_locked
+
+%endif ; RT_ARCH_AMD64
 
 
 ;
@@ -2482,6 +2552,7 @@ ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
 %endmacro
 
 IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
+IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
 IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
 
 
@@ -2588,7 +2659,6 @@ ENDPROC iemAImpl_ %+ %1 %+ _r80
 IEMIMPL_FPU_R80 fchs
 IEMIMPL_FPU_R80 fabs
 IEMIMPL_FPU_R80 f2xm1
-IEMIMPL_FPU_R80 fyl2x
 IEMIMPL_FPU_R80 fsqrt
 IEMIMPL_FPU_R80 frndint
 IEMIMPL_FPU_R80 fsin
diff --git a/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp b/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp
index 4d5989e..c086364 100644
--- a/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp
+++ b/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp
@@ -1351,3 +1351,39 @@ IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint3
         *pfEFlags &= ~X86_EFL_ZF;
 }
 
+
+
+IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
+                                                      PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
+{
+    RTUINT128U u128Tmp = *pu128Dst;
+    if (   u128Tmp.s.Lo == pu128RaxRdx->s.Lo
+        && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
+    {
+        *pu128Dst = *pu128RbxRcx;
+        *pEFlags |= X86_EFL_ZF;
+    }
+    else
+    {
+        *pu128RaxRdx = u128Tmp;
+        *pEFlags &= ~X86_EFL_ZF;
+    }
+}
+
+
+IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
+{
+    RT_NOREF(pFpuState);
+    puDst->au32[0] = puSrc->au32[0];
+    puDst->au32[1] = puSrc->au32[0];
+    puDst->au32[2] = puSrc->au32[2];
+    puDst->au32[3] = puSrc->au32[2];
+}
+
+IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
+{
+    RT_NOREF(pFpuState);
+    puDst->au64[0] = uSrc;
+    puDst->au64[1] = uSrc;
+}
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllCImpl.cpp.h b/src/VBox/VMM/VMMAll/IEMAllCImpl.cpp.h
index bde6c6e..e771ec7 100644
--- a/src/VBox/VMM/VMMAll/IEMAllCImpl.cpp.h
+++ b/src/VBox/VMM/VMMAll/IEMAllCImpl.cpp.h
@@ -188,8 +188,31 @@ static void iemHlpUpdateArithEFlagsU8(PVMCPU pVCpu, uint8_t u8Result, uint32_t f
 
 
 /**
+ * Updates the specified flags according to a 16-bit result.
+ *
+ * @param   pVCpu               The cross context virtual CPU structure of the calling thread.
+ * @param   u16Result           The result to set the flags according to.
+ * @param   fToUpdate           The flags to update.
+ * @param   fUndefined          The flags that are specified as undefined.
+ */
+static void iemHlpUpdateArithEFlagsU16(PVMCPU pVCpu, uint16_t u16Result, uint32_t fToUpdate, uint32_t fUndefined)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    uint32_t fEFlags = pCtx->eflags.u;
+    iemAImpl_test_u16(&u16Result, u16Result, &fEFlags);
+    pCtx->eflags.u &= ~(fToUpdate | fUndefined);
+    pCtx->eflags.u |= (fToUpdate | fUndefined) & fEFlags;
+#ifdef IEM_VERIFICATION_MODE_FULL
+    pVCpu->iem.s.fUndefinedEFlags |= fUndefined;
+#endif
+}
+
+
+/**
  * Helper used by iret.
  *
+ * @param   pVCpu               The cross context virtual CPU structure of the calling thread.
  * @param   uCpl                The new CPL.
  * @param   pSReg               Pointer to the segment register.
  */
@@ -532,6 +555,13 @@ IEM_CIMPL_DEF_0(iemCImpl_pusha_32)
 IEM_CIMPL_DEF_1(iemCImpl_pushf, IEMMODE, enmEffOpSize)
 {
     PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    VBOXSTRICTRC rcStrict;
+
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_PUSHF))
+    {
+        Log2(("pushf: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_PUSHF, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
 
     /*
      * If we're in V8086 mode some care is required (which is why we're in
@@ -547,30 +577,32 @@ IEM_CIMPL_DEF_1(iemCImpl_pushf, IEMMODE, enmEffOpSize)
             return iemRaiseGeneralProtectionFault0(pVCpu);
         fEfl &= ~X86_EFL_IF;          /* (RF and VM are out of range) */
         fEfl |= (fEfl & X86_EFL_VIF) >> (19 - 9);
-        return iemMemStackPushU16(pVCpu, (uint16_t)fEfl);
+        rcStrict = iemMemStackPushU16(pVCpu, (uint16_t)fEfl);
     }
+    else
+    {
 
-    /*
-     * Ok, clear RF and VM, adjust for ancient CPUs, and push the flags.
-     */
-    fEfl &= ~(X86_EFL_RF | X86_EFL_VM);
+        /*
+         * Ok, clear RF and VM, adjust for ancient CPUs, and push the flags.
+         */
+        fEfl &= ~(X86_EFL_RF | X86_EFL_VM);
 
-    VBOXSTRICTRC rcStrict;
-    switch (enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            AssertCompile(IEMTARGETCPU_8086 <= IEMTARGETCPU_186 && IEMTARGETCPU_V20 <= IEMTARGETCPU_186 && IEMTARGETCPU_286 > IEMTARGETCPU_186);
-            if (IEM_GET_TARGET_CPU(pVCpu) <= IEMTARGETCPU_186)
-                fEfl |= UINT16_C(0xf000);
-            rcStrict = iemMemStackPushU16(pVCpu, (uint16_t)fEfl);
-            break;
-        case IEMMODE_32BIT:
-            rcStrict = iemMemStackPushU32(pVCpu, fEfl);
-            break;
-        case IEMMODE_64BIT:
-            rcStrict = iemMemStackPushU64(pVCpu, fEfl);
-            break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        switch (enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                AssertCompile(IEMTARGETCPU_8086 <= IEMTARGETCPU_186 && IEMTARGETCPU_V20 <= IEMTARGETCPU_186 && IEMTARGETCPU_286 > IEMTARGETCPU_186);
+                if (IEM_GET_TARGET_CPU(pVCpu) <= IEMTARGETCPU_186)
+                    fEfl |= UINT16_C(0xf000);
+                rcStrict = iemMemStackPushU16(pVCpu, (uint16_t)fEfl);
+                break;
+            case IEMMODE_32BIT:
+                rcStrict = iemMemStackPushU32(pVCpu, fEfl);
+                break;
+            case IEMMODE_64BIT:
+                rcStrict = iemMemStackPushU64(pVCpu, fEfl);
+                break;
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
     }
     if (rcStrict != VINF_SUCCESS)
         return rcStrict;
@@ -592,6 +624,12 @@ IEM_CIMPL_DEF_1(iemCImpl_popf, IEMMODE, enmEffOpSize)
     VBOXSTRICTRC    rcStrict;
     uint32_t        fEflNew;
 
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_POPF))
+    {
+        Log2(("popf: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_POPF, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     /*
      * V8086 is special as usual.
      */
@@ -1375,6 +1413,8 @@ IEM_CIMPL_DEF_4(iemCImpl_BranchCallGate, uint16_t, uSel, IEMBRANCH, enmBranch, I
             else
             {
                 /* Just grab the new (NULL) SS descriptor. */
+                /** @todo testcase: Check whether the zero GDT entry is actually loaded here
+                 *        like we do... */
                 rcStrict = iemMemFetchSelDesc(pVCpu, &DescSS, uNewSS, X86_XCPT_SS);
                 if (rcStrict != VINF_SUCCESS)
                     return rcStrict;
@@ -1385,6 +1425,10 @@ IEM_CIMPL_DEF_4(iemCImpl_BranchCallGate, uint16_t, uSel, IEMBRANCH, enmBranch, I
             /** @todo: According to Intel, new stack is checked for enough space first,
              *         then switched. According to AMD, the stack is switched first and
              *         then pushes might fault!
+             *         NB: OS/2 Warp 3/4 actively relies on the fact that possible
+             *         incoming stack #PF happens before actual stack switch. AMD is
+             *         either lying or implicitly assumes that new state is committed
+             *         only if and when an instruction doesn't fault.
              */
 
             /** @todo: According to AMD, CS is loaded first, then SS.
@@ -1404,9 +1448,29 @@ IEM_CIMPL_DEF_4(iemCImpl_BranchCallGate, uint16_t, uSel, IEMBRANCH, enmBranch, I
 
             /* Remember the old SS:rSP and their linear address. */
             uOldSS  = pCtx->ss.Sel;
-            uOldRsp = pCtx->rsp;
-
-            GCPtrParmWds = pCtx->ss.u64Base + pCtx->rsp;
+            uOldRsp = pCtx->ss.Attr.n.u1DefBig ? pCtx->rsp : pCtx->sp;
+
+            GCPtrParmWds = pCtx->ss.u64Base + uOldRsp;
+
+            /* HACK ALERT! Probe if the write to the new stack will succeed. May #SS(NewSS)
+                           or #PF, the former is not implemented in this workaround. */
+            /** @todo Proper fix callgate target stack exceptions. */
+            /** @todo testcase: Cover callgates with partially or fully inaccessible
+             *        target stacks. */
+            void    *pvNewFrame;
+            RTGCPTR  GCPtrNewStack = X86DESC_BASE(&DescSS.Legacy) + uNewRsp - cbNewStack;
+            rcStrict = iemMemMap(pVCpu, &pvNewFrame, cbNewStack, UINT8_MAX, GCPtrNewStack, IEM_ACCESS_SYS_RW);
+            if (rcStrict != VINF_SUCCESS)
+            {
+                Log(("BranchCallGate: Incoming stack (%04x:%08RX64) not accessible, rc=%Rrc\n", uNewSS, uNewRsp, VBOXSTRICTRC_VAL(rcStrict)));
+                return rcStrict;
+            }
+            rcStrict = iemMemCommitAndUnmap(pVCpu, pvNewFrame, IEM_ACCESS_SYS_RW);
+            if (rcStrict != VINF_SUCCESS)
+            {
+                Log(("BranchCallGate: New stack probe unmapping failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+                return rcStrict;
+            }
 
             /* Commit new SS:rSP. */
             pCtx->ss.Sel      = uNewSS;
@@ -1416,18 +1480,16 @@ IEM_CIMPL_DEF_4(iemCImpl_BranchCallGate, uint16_t, uSel, IEMBRANCH, enmBranch, I
             pCtx->ss.u64Base  = X86DESC_BASE(&DescSS.Legacy);
             pCtx->ss.fFlags   = CPUMSELREG_FLAGS_VALID;
             pCtx->rsp         = uNewRsp;
-            pVCpu->iem.s.uCpl     = uNewCSDpl;
+            pVCpu->iem.s.uCpl = uNewCSDpl;
             Assert(CPUMSELREG_ARE_HIDDEN_PARTS_VALID(pVCpu, &pCtx->ss));
             CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_HIDDEN_SEL_REGS);
 
-            /* Check new stack - may #SS(NewSS). */
+            /* At this point the stack access must not fail because new state was already committed. */
+            /** @todo this can still fail due to SS.LIMIT not check.   */
             rcStrict = iemMemStackPushBeginSpecial(pVCpu, cbNewStack,
                                                    &uPtrRet.pv, &uNewRsp);
-            if (rcStrict != VINF_SUCCESS)
-            {
-                Log(("BranchCallGate: New stack mapping failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
-                return rcStrict;
-            }
+            AssertMsgReturn(rcStrict == VINF_SUCCESS, ("BranchCallGate: New stack mapping failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)),
+                            VERR_INTERNAL_ERROR_5);
 
             if (!IEM_IS_LONG_MODE(pVCpu))
             {
@@ -1437,24 +1499,27 @@ IEM_CIMPL_DEF_4(iemCImpl_BranchCallGate, uint16_t, uSel, IEMBRANCH, enmBranch, I
                     uPtrRet.pu32[0] = pCtx->eip + cbInstr;
                     uPtrRet.pu32[1] = pCtx->cs.Sel; /** @todo Testcase: What is written to the high word when pushing CS? */
 
-                    /* Map the relevant chunk of the old stack. */
-                    rcStrict = iemMemMap(pVCpu, &uPtrParmWds.pv, cbWords * 4, UINT8_MAX, GCPtrParmWds, IEM_ACCESS_DATA_R);
-                    if (rcStrict != VINF_SUCCESS)
-                    {
-                        Log(("BranchCallGate: Old stack mapping (32-bit) failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
-                        return rcStrict;
-                    }
-
-                    /* Copy the parameter (d)words. */
-                    for (int i = 0; i < cbWords; ++i)
-                        uPtrRet.pu32[2 + i] = uPtrParmWds.pu32[i];
-
-                    /* Unmap the old stack. */
-                    rcStrict = iemMemCommitAndUnmap(pVCpu, uPtrParmWds.pv, IEM_ACCESS_DATA_R);
-                    if (rcStrict != VINF_SUCCESS)
+                    if (cbWords)
                     {
-                        Log(("BranchCallGate: Old stack unmapping (32-bit) failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
-                        return rcStrict;
+                        /* Map the relevant chunk of the old stack. */
+                        rcStrict = iemMemMap(pVCpu, &uPtrParmWds.pv, cbWords * 4, UINT8_MAX, GCPtrParmWds, IEM_ACCESS_DATA_R);
+                        if (rcStrict != VINF_SUCCESS)
+                        {
+                            Log(("BranchCallGate: Old stack mapping (32-bit) failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+                            return rcStrict;
+                        }
+
+                        /* Copy the parameter (d)words. */
+                        for (int i = 0; i < cbWords; ++i)
+                            uPtrRet.pu32[2 + i] = uPtrParmWds.pu32[i];
+
+                        /* Unmap the old stack. */
+                        rcStrict = iemMemCommitAndUnmap(pVCpu, uPtrParmWds.pv, IEM_ACCESS_DATA_R);
+                        if (rcStrict != VINF_SUCCESS)
+                        {
+                            Log(("BranchCallGate: Old stack unmapping (32-bit) failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+                            return rcStrict;
+                        }
                     }
 
                     /* Push the old SS:rSP. */
@@ -1469,24 +1534,27 @@ IEM_CIMPL_DEF_4(iemCImpl_BranchCallGate, uint16_t, uSel, IEMBRANCH, enmBranch, I
                     uPtrRet.pu16[0] = pCtx->ip + cbInstr;
                     uPtrRet.pu16[1] = pCtx->cs.Sel;
 
-                    /* Map the relevant chunk of the old stack. */
-                    rcStrict = iemMemMap(pVCpu, &uPtrParmWds.pv, cbWords * 2, UINT8_MAX, GCPtrParmWds, IEM_ACCESS_DATA_R);
-                    if (rcStrict != VINF_SUCCESS)
-                    {
-                        Log(("BranchCallGate: Old stack mapping (16-bit) failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
-                        return rcStrict;
-                    }
-
-                    /* Copy the parameter words. */
-                    for (int i = 0; i < cbWords; ++i)
-                        uPtrRet.pu16[2 + i] = uPtrParmWds.pu16[i];
-
-                    /* Unmap the old stack. */
-                    rcStrict = iemMemCommitAndUnmap(pVCpu, uPtrParmWds.pv, IEM_ACCESS_DATA_R);
-                    if (rcStrict != VINF_SUCCESS)
+                    if (cbWords)
                     {
-                        Log(("BranchCallGate: Old stack unmapping (32-bit) failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
-                        return rcStrict;
+                        /* Map the relevant chunk of the old stack. */
+                        rcStrict = iemMemMap(pVCpu, &uPtrParmWds.pv, cbWords * 2, UINT8_MAX, GCPtrParmWds, IEM_ACCESS_DATA_R);
+                        if (rcStrict != VINF_SUCCESS)
+                        {
+                            Log(("BranchCallGate: Old stack mapping (16-bit) failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+                            return rcStrict;
+                        }
+
+                        /* Copy the parameter words. */
+                        for (int i = 0; i < cbWords; ++i)
+                            uPtrRet.pu16[2 + i] = uPtrParmWds.pu16[i];
+
+                        /* Unmap the old stack. */
+                        rcStrict = iemMemCommitAndUnmap(pVCpu, uPtrParmWds.pv, IEM_ACCESS_DATA_R);
+                        if (rcStrict != VINF_SUCCESS)
+                        {
+                            Log(("BranchCallGate: Old stack unmapping (32-bit) failed (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+                            return rcStrict;
+                        }
                     }
 
                     /* Push the old SS:rSP. */
@@ -2433,7 +2501,6 @@ IEM_CIMPL_DEF_2(iemCImpl_retf, IEMMODE, enmEffOpSize, uint16_t, cbPop)
         pCtx->cs.u32Limit       = cbLimitCs;
         pCtx->cs.u64Base        = u64Base;
         pVCpu->iem.s.enmCpuMode     = iemCalcCpuMode(pCtx);
-        pCtx->rsp               = uNewOuterRsp;
         pCtx->ss.Sel            = uNewOuterSs;
         pCtx->ss.ValidSel       = uNewOuterSs;
         pCtx->ss.fFlags         = CPUMSELREG_FLAGS_VALID;
@@ -2443,6 +2510,10 @@ IEM_CIMPL_DEF_2(iemCImpl_retf, IEMMODE, enmEffOpSize, uint16_t, cbPop)
             pCtx->ss.u64Base    = 0;
         else
             pCtx->ss.u64Base    = X86DESC_BASE(&DescSs.Legacy);
+        if (!pCtx->ss.Attr.n.u1DefBig)
+            pCtx->sp            = (uint16_t)uNewOuterRsp;
+        else
+            pCtx->rsp           = uNewOuterRsp;
 
         pVCpu->iem.s.uCpl           = (uNewCs & X86_SEL_RPL);
         iemHlpAdjustSelectorForNewCpl(pVCpu, uNewCs & X86_SEL_RPL, &pCtx->ds);
@@ -2506,7 +2577,10 @@ IEM_CIMPL_DEF_2(iemCImpl_retf, IEMMODE, enmEffOpSize, uint16_t, cbPop)
         }
 
         /* commit */
-        pCtx->rsp           = uNewRsp;
+        if (!pCtx->ss.Attr.n.u1DefBig)
+            pCtx->sp        = (uint16_t)uNewRsp;
+        else
+            pCtx->rsp       = uNewRsp;
         if (enmEffOpSize == IEMMODE_16BIT)
             pCtx->rip       = uNewRip & UINT16_MAX; /** @todo Testcase: When exactly does this occur? With call it happens prior to the limit check according to Intel... */
         else
@@ -3382,16 +3456,17 @@ IEM_CIMPL_DEF_1(iemCImpl_iret_prot, IEMMODE, enmEffOpSize)
         pCtx->cs.u32Limit   = cbLimitCS;
         pCtx->cs.u64Base    = X86DESC_BASE(&DescCS.Legacy);
         pVCpu->iem.s.enmCpuMode = iemCalcCpuMode(pCtx);
-        if (!pCtx->ss.Attr.n.u1DefBig)
-            pCtx->sp        = (uint16_t)uNewESP;
-        else
-            pCtx->rsp       = uNewESP;
+
         pCtx->ss.Sel        = uNewSS;
         pCtx->ss.ValidSel   = uNewSS;
         pCtx->ss.fFlags     = CPUMSELREG_FLAGS_VALID;
         pCtx->ss.Attr.u     = X86DESC_GET_HID_ATTR(&DescSS.Legacy);
         pCtx->ss.u32Limit   = cbLimitSs;
         pCtx->ss.u64Base    = X86DESC_BASE(&DescSS.Legacy);
+        if (!pCtx->ss.Attr.n.u1DefBig)
+            pCtx->sp        = (uint16_t)uNewESP;
+        else
+            pCtx->rsp       = uNewESP;
 
         pVCpu->iem.s.uCpl       = uNewCs & X86_SEL_RPL;
         iemHlpAdjustSelectorForNewCpl(pVCpu, uNewCs & X86_SEL_RPL, &pCtx->ds);
@@ -3455,7 +3530,10 @@ IEM_CIMPL_DEF_1(iemCImpl_iret_prot, IEMMODE, enmEffOpSize)
         pCtx->cs.u32Limit   = cbLimitCS;
         pCtx->cs.u64Base    = X86DESC_BASE(&DescCS.Legacy);
         pVCpu->iem.s.enmCpuMode = iemCalcCpuMode(pCtx);
-        pCtx->rsp           = uNewRsp;
+        if (!pCtx->ss.Attr.n.u1DefBig)
+            pCtx->sp        = (uint16_t)uNewRsp;
+        else
+            pCtx->rsp       = uNewRsp;
         /* Done! */
     }
 
@@ -3789,6 +3867,16 @@ IEM_CIMPL_DEF_1(iemCImpl_iret, IEMMODE, enmEffOpSize)
     VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
 
     /*
+     * The SVM nested-guest intercept for iret takes priority over all exceptions,
+     * see AMD spec. "15.9 Instruction Intercepts".
+     */
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IRET))
+    {
+        Log(("iret: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_IRET, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    /*
      * Call a mode specific worker.
      */
     if (IEM_IS_REAL_OR_V86_MODE(pVCpu))
@@ -4565,6 +4653,12 @@ IEM_CIMPL_DEF_3(iemCImpl_lgdt, uint8_t, iEffSeg, RTGCPTR, GCPtrEffSrc, IEMMODE,
         return iemRaiseGeneralProtectionFault0(pVCpu);
     Assert(!IEM_GET_CTX(pVCpu)->eflags.Bits.u1VM);
 
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_GDTR_WRITES))
+    {
+        Log(("lgdt: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_GDTR_WRITE, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     /*
      * Fetch the limit and base address.
      */
@@ -4631,6 +4725,12 @@ IEM_CIMPL_DEF_3(iemCImpl_lidt, uint8_t, iEffSeg, RTGCPTR, GCPtrEffSrc, IEMMODE,
         return iemRaiseGeneralProtectionFault0(pVCpu);
     Assert(!IEM_GET_CTX(pVCpu)->eflags.Bits.u1VM);
 
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IDTR_WRITES))
+    {
+        Log(("lidt: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_IDTR_WRITE, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     /*
      * Fetch the limit and base address.
      */
@@ -4716,6 +4816,13 @@ IEM_CIMPL_DEF_1(iemCImpl_lldt, uint16_t, uNewLdt)
      */
     if (!(uNewLdt & X86_SEL_MASK_OFF_RPL))
     {
+        /* Nested-guest SVM intercept. */
+        if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_LDTR_WRITES))
+        {
+            Log(("lldt: Guest intercept -> #VMEXIT\n"));
+            IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_LDTR_WRITE, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+        }
+
         Log(("lldt %04x: Loading NULL selector.\n",  uNewLdt));
         if (!IEM_FULL_VERIFICATION_ENABLED(pVCpu))
             CPUMSetGuestLDTR(pVCpu, uNewLdt);
@@ -4790,6 +4897,13 @@ IEM_CIMPL_DEF_1(iemCImpl_lldt, uint16_t, uNewLdt)
         return iemRaiseSelectorNotPresentBySelector(pVCpu, uNewLdt);
     }
 
+    /* Nested-guest SVM intercept. */
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_LDTR_WRITES))
+    {
+        Log(("lldt: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_LDTR_WRITE, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     /*
      * It checks out alright, update the registers.
      */
@@ -4841,6 +4955,11 @@ IEM_CIMPL_DEF_1(iemCImpl_ltr, uint16_t, uNewTr)
         Log(("ltr %04x - NULL selector -> #GP(0)\n", uNewTr));
         return iemRaiseGeneralProtectionFault0(pVCpu);
     }
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_TR_WRITES))
+    {
+        Log(("ltr: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_TR_WRITE, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
 
     /*
      * Read the descriptor.
@@ -4943,6 +5062,12 @@ IEM_CIMPL_DEF_2(iemCImpl_mov_Rd_Cd, uint8_t, iGReg, uint8_t, iCrReg)
         return iemRaiseGeneralProtectionFault0(pVCpu);
     Assert(!pCtx->eflags.Bits.u1VM);
 
+    if (IEM_IS_SVM_READ_CR_INTERCEPT_SET(pVCpu, iCrReg))
+    {
+        Log(("iemCImpl_load_Cr%#x: Guest intercept -> #VMEXIT\n", iCrReg));
+        IEM_RETURN_SVM_NST_GST_CRX_VMEXIT(pVCpu, SVM_EXIT_READ_CR0 + iCrReg, IEMACCESSCRX_MOV_CRX, iGReg);
+    }
+
     /* read it */
     uint64_t crX;
     switch (iCrReg)
@@ -4984,12 +5109,18 @@ IEM_CIMPL_DEF_2(iemCImpl_mov_Rd_Cd, uint8_t, iGReg, uint8_t, iCrReg)
  *
  * @param   iCrReg          The CRx register to write (valid).
  * @param   uNewCrX         The new value.
+ * @param   enmAccessCrx    The instruction that caused the CrX load.
+ * @param   iGReg           The general register in case of a 'mov CRx,GReg'
+ *                          instruction.
  */
-IEM_CIMPL_DEF_2(iemCImpl_load_CrX, uint8_t, iCrReg, uint64_t, uNewCrX)
+IEM_CIMPL_DEF_4(iemCImpl_load_CrX, uint8_t, iCrReg, uint64_t, uNewCrX, IEMACCESSCRX, enmAccessCrX, uint8_t, iGReg)
 {
     PCPUMCTX        pCtx  = IEM_GET_CTX(pVCpu);
     VBOXSTRICTRC    rcStrict;
     int             rc;
+#ifndef VBOX_WITH_NESTED_HWVIRT
+    RT_NOREF2(iGReg, enmAccessCrX);
+#endif
 
     /*
      * Try store it.
@@ -5061,6 +5192,26 @@ IEM_CIMPL_DEF_2(iemCImpl_load_CrX, uint8_t, iCrReg, uint64_t, uNewCrX)
             /** @todo check reserved PDPTR bits as AMD states. */
 
             /*
+             * SVM nested-guest CR0 write intercepts.
+             */
+            if (IEM_IS_SVM_WRITE_CR_INTERCEPT_SET(pVCpu, iCrReg))
+            {
+                Log(("iemCImpl_load_Cr%#x: Guest intercept -> #VMEXIT\n", iCrReg));
+                IEM_RETURN_SVM_NST_GST_CRX_VMEXIT(pVCpu, SVM_EXIT_WRITE_CR0, enmAccessCrX, iGReg);
+            }
+            if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_CR0_SEL_WRITES))
+            {
+                /* 'lmsw' intercepts regardless of whether the TS/MP bits are actually toggled. */
+                if (   enmAccessCrX == IEMACCESSCRX_LMSW
+                    || (uNewCrX & ~(X86_CR0_TS | X86_CR0_MP)) != (uOldCrX & ~(X86_CR0_TS | X86_CR0_MP)))
+                {
+                    Assert(enmAccessCrX != IEMACCESSCRX_CLTS);
+                    Log(("iemCImpl_load_Cr%#x: TS/MP bit changed or lmsw instr: Guest intercept -> #VMEXIT\n", iCrReg));
+                    IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_CR0_SEL_WRITE, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+                }
+            }
+
+            /*
              * Change CR0.
              */
             if (!IEM_VERIFICATION_ENABLED(pVCpu))
@@ -5119,9 +5270,16 @@ IEM_CIMPL_DEF_2(iemCImpl_load_CrX, uint8_t, iCrReg, uint64_t, uNewCrX)
          * CR2 can be changed without any restrictions.
          */
         case 2:
+        {
+            if (IEM_IS_SVM_WRITE_CR_INTERCEPT_SET(pVCpu, /*cr*/ 2))
+            {
+                Log(("iemCImpl_load_Cr%#x: Guest intercept -> #VMEXIT\n", iCrReg));
+                IEM_RETURN_SVM_NST_GST_CRX_VMEXIT(pVCpu, SVM_EXIT_WRITE_CR2, enmAccessCrX, iGReg);
+            }
             pCtx->cr2 = uNewCrX;
             rcStrict  = VINF_SUCCESS;
             break;
+        }
 
         /*
          * CR3 is relatively simple, although AMD and Intel have different
@@ -5154,6 +5312,12 @@ IEM_CIMPL_DEF_2(iemCImpl_load_CrX, uint8_t, iCrReg, uint64_t, uNewCrX)
                 uNewCrX &= fValid;
             }
 
+            if (IEM_IS_SVM_WRITE_CR_INTERCEPT_SET(pVCpu, /*cr*/ 3))
+            {
+                Log(("iemCImpl_load_Cr%#x: Guest intercept -> #VMEXIT\n", iCrReg));
+                IEM_RETURN_SVM_NST_GST_CRX_VMEXIT(pVCpu, SVM_EXIT_WRITE_CR3, enmAccessCrX, iGReg);
+            }
+
             /** @todo If we're in PAE mode we should check the PDPTRs for
              *        invalid bits. */
 
@@ -5217,6 +5381,11 @@ IEM_CIMPL_DEF_2(iemCImpl_load_CrX, uint8_t, iCrReg, uint64_t, uNewCrX)
                 return iemRaiseGeneralProtectionFault0(pVCpu);
             }
 
+            if (IEM_IS_SVM_WRITE_CR_INTERCEPT_SET(pVCpu, /*cr*/ 4))
+            {
+                Log(("iemCImpl_load_Cr%#x: Guest intercept -> #VMEXIT\n", iCrReg));
+                IEM_RETURN_SVM_NST_GST_CRX_VMEXIT(pVCpu, SVM_EXIT_WRITE_CR4, enmAccessCrX, iGReg);
+            }
 
             /*
              * Change it.
@@ -5270,6 +5439,12 @@ IEM_CIMPL_DEF_2(iemCImpl_load_CrX, uint8_t, iCrReg, uint64_t, uNewCrX)
                 return iemRaiseGeneralProtectionFault0(pVCpu);
             }
 
+            if (IEM_IS_SVM_WRITE_CR_INTERCEPT_SET(pVCpu, /*cr*/ 8))
+            {
+                Log(("iemCImpl_load_Cr%#x: Guest intercept -> #VMEXIT\n", iCrReg));
+                IEM_RETURN_SVM_NST_GST_CRX_VMEXIT(pVCpu, SVM_EXIT_WRITE_CR8, enmAccessCrX, iGReg);
+            }
+
             if (!IEM_FULL_VERIFICATION_ENABLED(pVCpu))
                 PDMApicSetTPR(pVCpu, (uint8_t)uNewCrX << 4);
             rcStrict = VINF_SUCCESS;
@@ -5312,7 +5487,7 @@ IEM_CIMPL_DEF_2(iemCImpl_mov_Cd_Rd, uint8_t, iCrReg, uint8_t, iGReg)
         uNewCrX = iemGRegFetchU64(pVCpu, iGReg);
     else
         uNewCrX = iemGRegFetchU32(pVCpu, iGReg);
-    return IEM_CIMPL_CALL_2(iemCImpl_load_CrX, iCrReg, uNewCrX);
+    return IEM_CIMPL_CALL_4(iemCImpl_load_CrX, iCrReg, uNewCrX, IEMACCESSCRX_MOV_CRX, iGReg);
 }
 
 
@@ -5334,7 +5509,7 @@ IEM_CIMPL_DEF_1(iemCImpl_lmsw, uint16_t, u16NewMsw)
      */
     uint64_t uNewCr0 = pCtx->cr0     & ~(X86_CR0_MP | X86_CR0_EM | X86_CR0_TS);
     uNewCr0 |= u16NewMsw & (X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS);
-    return IEM_CIMPL_CALL_2(iemCImpl_load_CrX, /*cr*/ 0, uNewCr0);
+    return IEM_CIMPL_CALL_4(iemCImpl_load_CrX, /*cr*/ 0, uNewCr0, IEMACCESSCRX_LMSW, UINT8_MAX /* iGReg */);
 }
 
 
@@ -5349,7 +5524,7 @@ IEM_CIMPL_DEF_0(iemCImpl_clts)
     PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
     uint64_t uNewCr0 = pCtx->cr0;
     uNewCr0 &= ~X86_CR0_TS;
-    return IEM_CIMPL_CALL_2(iemCImpl_load_CrX, /*cr*/ 0, uNewCr0);
+    return IEM_CIMPL_CALL_4(iemCImpl_load_CrX, /*cr*/ 0, uNewCr0, IEMACCESSCRX_CLTS, UINT8_MAX /* iGReg */);
 }
 
 
@@ -5411,6 +5586,17 @@ IEM_CIMPL_DEF_2(iemCImpl_mov_Rd_Dd, uint8_t, iGReg, uint8_t, iDrReg)
         IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* call checks */
     }
 
+    /** @todo SVM nested-guest intercept for DR8-DR15? */
+    /*
+     * Check for any SVM nested-guest intercepts for the DRx read.
+     */
+    if (IEM_IS_SVM_READ_DR_INTERCEPT_SET(pVCpu, iDrReg))
+    {
+        Log(("mov r%u,dr%u: Guest intercept -> #VMEXIT\n", iGReg, iDrReg));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_READ_DR0 + (iDrReg & 0xf),
+                               IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSvmDecodeAssist ? (iGReg & 7) : 0, 0 /* uExitInfo2 */);
+    }
+
     if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
         *(uint64_t *)iemGRegRef(pVCpu, iGReg) = drX;
     else
@@ -5501,6 +5687,17 @@ IEM_CIMPL_DEF_2(iemCImpl_mov_Dd_Rd, uint8_t, iDrReg, uint8_t, iGReg)
         IEM_NOT_REACHED_DEFAULT_CASE_RET();
     }
 
+    /** @todo SVM nested-guest intercept for DR8-DR15? */
+    /*
+     * Check for any SVM nested-guest intercepts for the DRx write.
+     */
+    if (IEM_IS_SVM_WRITE_DR_INTERCEPT_SET(pVCpu, iDrReg))
+    {
+        Log2(("mov dr%u,r%u: Guest intercept -> #VMEXIT\n", iDrReg, iGReg));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_WRITE_DR0 + (iDrReg & 0xf),
+                               IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSvmDecodeAssist ? (iGReg & 7) : 0, 0 /* uExitInfo2 */);
+    }
+
     /*
      * Do the actual setting.
      */
@@ -5530,6 +5727,13 @@ IEM_CIMPL_DEF_1(iemCImpl_invlpg, RTGCPTR, GCPtrPage)
         return iemRaiseGeneralProtectionFault0(pVCpu);
     Assert(!IEM_GET_CTX(pVCpu)->eflags.Bits.u1VM);
 
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_INVLPG))
+    {
+        Log(("invlpg: Guest intercept (%RGp) -> #VMEXIT\n", GCPtrPage));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_INVLPG,
+                               IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSvmDecodeAssist ? GCPtrPage : 0, 0 /* uExitInfo2 */);
+    }
+
     int rc = PGMInvalidatePage(pVCpu, GCPtrPage);
     iemRegAddToRipAndClearRF(pVCpu, cbInstr);
 
@@ -5564,6 +5768,12 @@ IEM_CIMPL_DEF_0(iemCImpl_rdtsc)
         return iemRaiseGeneralProtectionFault0(pVCpu);
     }
 
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_RDTSC))
+    {
+        Log(("rdtsc: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_RDTSC, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     /*
      * Do the job.
      */
@@ -5580,6 +5790,77 @@ IEM_CIMPL_DEF_0(iemCImpl_rdtsc)
 
 
 /**
+ * Implements RDTSC.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_rdtscp)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    /*
+     * Check preconditions.
+     */
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fRdTscP)
+        return iemRaiseUndefinedOpcode(pVCpu);
+
+    if (   (pCtx->cr4 & X86_CR4_TSD)
+        && pVCpu->iem.s.uCpl != 0)
+    {
+        Log(("rdtscp: CR4.TSD and CPL=%u -> #GP(0)\n", pVCpu->iem.s.uCpl));
+        return iemRaiseGeneralProtectionFault0(pVCpu);
+    }
+
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_RDTSCP))
+    {
+        Log(("rdtscp: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_RDTSCP, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    /*
+     * Do the job.
+     * Query the MSR first in case of trips to ring-3.
+     */
+    VBOXSTRICTRC rcStrict = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pCtx->rcx);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        /* Low dword of the TSC_AUX msr only. */
+        pCtx->rcx &= UINT32_C(0xffffffff);
+
+        uint64_t uTicks = TMCpuTickGet(pVCpu);
+        pCtx->rax = (uint32_t)uTicks;
+        pCtx->rdx = uTicks >> 32;
+#ifdef IEM_VERIFICATION_MODE_FULL
+        pVCpu->iem.s.fIgnoreRaxRdx = true;
+#endif
+        iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements RDPMC.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_rdpmc)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    if (   pVCpu->iem.s.uCpl != 0
+        && !(pCtx->cr4 & X86_CR4_PCE))
+        return iemRaiseGeneralProtectionFault0(pVCpu);
+
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_RDPMC))
+    {
+        Log(("rdpmc: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_RDPMC, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    /** @todo Implement RDPMC for the regular guest execution case (the above only
+     *        handles nested-guest intercepts). */
+    RT_NOREF(cbInstr);
+    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+}
+
+
+/**
  * Implements RDMSR.
  */
 IEM_CIMPL_DEF_0(iemCImpl_rdmsr)
@@ -5598,7 +5879,20 @@ IEM_CIMPL_DEF_0(iemCImpl_rdmsr)
      * Do the job.
      */
     RTUINT64U uValue;
-    VBOXSTRICTRC rcStrict = CPUMQueryGuestMsr(pVCpu, pCtx->ecx, &uValue.u);
+    VBOXSTRICTRC rcStrict;
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_MSR_PROT))
+    {
+        rcStrict = IEM_SVM_NST_GST_MSR_INTERCEPT(pVCpu, pCtx->ecx, false /* fWrite */);
+        if (rcStrict == VINF_SVM_VMEXIT)
+            return VINF_SUCCESS;
+        if (rcStrict != VINF_HM_INTERCEPT_NOT_ACTIVE)
+        {
+            Log(("IEM: SVM intercepted rdmsr(%#x) failed. rc=%Rrc\n", pCtx->ecx, VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
+    rcStrict = CPUMQueryGuestMsr(pVCpu, pCtx->ecx, &uValue.u);
     if (rcStrict == VINF_SUCCESS)
     {
         pCtx->rax = uValue.s.Lo;
@@ -5651,6 +5945,18 @@ IEM_CIMPL_DEF_0(iemCImpl_wrmsr)
     uValue.s.Hi = pCtx->edx;
 
     VBOXSTRICTRC rcStrict;
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_MSR_PROT))
+    {
+        rcStrict = IEM_SVM_NST_GST_MSR_INTERCEPT(pVCpu, pCtx->ecx, true /* fWrite */);
+        if (rcStrict == VINF_SVM_VMEXIT)
+            return VINF_SUCCESS;
+        if (rcStrict != VINF_HM_INTERCEPT_NOT_ACTIVE)
+        {
+            Log(("IEM: SVM intercepted rdmsr(%#x) failed. rc=%Rrc\n", pCtx->ecx, VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
     if (!IEM_VERIFICATION_ENABLED(pVCpu))
         rcStrict = CPUMSetGuestMsr(pVCpu, pCtx->ecx, uValue.u);
     else
@@ -5675,7 +5981,7 @@ IEM_CIMPL_DEF_0(iemCImpl_wrmsr)
     /* Deferred to ring-3. */
     if (rcStrict == VINF_CPUM_R3_MSR_WRITE)
     {
-        Log(("IEM: rdmsr(%#x) -> ring-3\n", pCtx->ecx));
+        Log(("IEM: wrmsr(%#x) -> ring-3\n", pCtx->ecx));
         return rcStrict;
     }
 #else /* IN_RING3 */
@@ -5709,6 +6015,23 @@ IEM_CIMPL_DEF_2(iemCImpl_in, uint16_t, u16Port, uint8_t, cbReg)
         return rcStrict;
 
     /*
+     * Check SVM nested-guest IO intercept.
+     */
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IOIO_PROT))
+    {
+        rcStrict = iemSvmHandleIOIntercept(pVCpu, u16Port, SVMIOIOTYPE_IN, cbReg, 0 /* N/A - cAddrSizeBits */,
+                                           0 /* N/A - iEffSeg */, false /* fRep */, false /* fStrIo */, cbInstr);
+        if (rcStrict == VINF_SVM_VMEXIT)
+            return VINF_SUCCESS;
+        if (rcStrict != VINF_HM_INTERCEPT_NOT_ACTIVE)
+        {
+            Log(("iemCImpl_in: iemSvmHandleIOIntercept failed (u16Port=%#x, cbReg=%u) rc=%Rrc\n", u16Port, cbReg,
+                 VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
+    /*
      * Perform the I/O.
      */
     uint32_t u32Value;
@@ -5779,6 +6102,23 @@ IEM_CIMPL_DEF_2(iemCImpl_out, uint16_t, u16Port, uint8_t, cbReg)
         return rcStrict;
 
     /*
+     * Check SVM nested-guest IO intercept.
+     */
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IOIO_PROT))
+    {
+        rcStrict = iemSvmHandleIOIntercept(pVCpu, u16Port, SVMIOIOTYPE_OUT, cbReg, 0 /* N/A - cAddrSizeBits */,
+                                           0 /* N/A - iEffSeg */, false /* fRep */, false /* fStrIo */, cbInstr);
+        if (rcStrict == VINF_SVM_VMEXIT)
+            return VINF_SUCCESS;
+        if (rcStrict != VINF_HM_INTERCEPT_NOT_ACTIVE)
+        {
+            Log(("iemCImpl_out: iemSvmHandleIOIntercept failed (u16Port=%#x, cbReg=%u) rc=%Rrc\n", u16Port, cbReg,
+                 VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
+    /*
      * Perform the I/O.
      */
     uint32_t u32Value;
@@ -5830,26 +6170,274 @@ IEM_CIMPL_DEF_1(iemCImpl_out_DX_eAX, uint8_t, cbReg)
 }
 
 
+#ifdef VBOX_WITH_NESTED_HWVIRT
 /**
- * Implements 'CLI'.
+ * Implements 'VMRUN'.
  */
-IEM_CIMPL_DEF_0(iemCImpl_cli)
+IEM_CIMPL_DEF_0(iemCImpl_vmrun)
 {
-    PCPUMCTX        pCtx    = IEM_GET_CTX(pVCpu);
-    uint32_t        fEfl    = IEMMISC_GET_EFL(pVCpu, pCtx);
-    uint32_t const  fEflOld = fEfl;
-    if (pCtx->cr0 & X86_CR0_PE)
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    IEM_SVM_INSTR_COMMON_CHECKS(pVCpu, vmload);
+
+    RTGCPHYS const GCPhysVmcb = pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT ? pCtx->rax : pCtx->eax;
+    if (   (GCPhysVmcb & X86_PAGE_4K_OFFSET_MASK)
+        || !PGMPhysIsGCPhysNormal(pVCpu->CTX_SUFF(pVM), GCPhysVmcb))
     {
-        uint8_t const uIopl = X86_EFL_GET_IOPL(fEfl);
-        if (!(fEfl & X86_EFL_VM))
-        {
-            if (pVCpu->iem.s.uCpl <= uIopl)
-                fEfl &= ~X86_EFL_IF;
-            else if (   pVCpu->iem.s.uCpl == 3
-                     && (pCtx->cr4 & X86_CR4_PVI) )
-                fEfl &= ~X86_EFL_VIF;
-            else
-                return iemRaiseGeneralProtectionFault0(pVCpu);
+        Log(("vmrun: VMCB physaddr (%#RGp) not valid -> #GP(0)\n", GCPhysVmcb));
+        return iemRaiseGeneralProtectionFault0(pVCpu);
+    }
+
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_VMRUN))
+    {
+        Log(("vmrun: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_VMRUN, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    VBOXSTRICTRC rcStrict = HMSvmVmrun(pVCpu, pCtx, GCPhysVmcb);
+    /* If VMRUN execution causes a #VMEXIT, we continue executing the instruction following the VMRUN. */
+    if (rcStrict == VINF_SVM_VMEXIT)
+    {
+        iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+        rcStrict = VINF_SUCCESS;
+    }
+    else if (rcStrict == VERR_SVM_VMEXIT_FAILED)
+        rcStrict = iemInitiateCpuShutdown(pVCpu);
+    return rcStrict;
+}
+
+
+/**
+ * Implements 'VMMCALL'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_vmmcall)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_VMMCALL))
+    {
+        Log(("vmmcall: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_VMMCALL, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    bool fUpdatedRipAndRF;
+    VBOXSTRICTRC rcStrict = HMSvmVmmcall(pVCpu, pCtx, &fUpdatedRipAndRF);
+    if (RT_SUCCESS(rcStrict))
+    {
+        if (!fUpdatedRipAndRF)
+            iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+        return rcStrict;
+    }
+
+    return iemRaiseUndefinedOpcode(pVCpu);
+}
+
+
+/**
+ * Implements 'VMLOAD'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_vmload)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    IEM_SVM_INSTR_COMMON_CHECKS(pVCpu, vmload);
+
+    RTGCPHYS const GCPhysVmcb = pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT ? pCtx->rax : pCtx->eax;
+    if (   (GCPhysVmcb & X86_PAGE_4K_OFFSET_MASK)
+        || !PGMPhysIsGCPhysNormal(pVCpu->CTX_SUFF(pVM), GCPhysVmcb))
+    {
+        Log(("vmload: VMCB physaddr (%#RGp) not valid -> #GP(0)\n", GCPhysVmcb));
+        return iemRaiseGeneralProtectionFault0(pVCpu);
+    }
+
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_VMLOAD))
+    {
+        Log(("vmload: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_VMLOAD, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    void *pvVmcb;
+    PGMPAGEMAPLOCK PgLockVmcb;
+    VBOXSTRICTRC rcStrict = iemMemPageMap(pVCpu, GCPhysVmcb, IEM_ACCESS_DATA_R, &pvVmcb, &PgLockVmcb);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        PCSVMVMCB pVmcb = (PCSVMVMCB)pvVmcb;
+        HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, &pVmcb->guest, FS, fs);
+        HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, &pVmcb->guest, GS, gs);
+        HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, &pVmcb->guest, TR, tr);
+        HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, &pVmcb->guest, LDTR, ldtr);
+
+        pCtx->msrKERNELGSBASE = pVmcb->guest.u64KernelGSBase;
+        pCtx->msrSTAR         = pVmcb->guest.u64STAR;
+        pCtx->msrLSTAR        = pVmcb->guest.u64LSTAR;
+        pCtx->msrCSTAR        = pVmcb->guest.u64CSTAR;
+        pCtx->msrSFMASK       = pVmcb->guest.u64SFMASK;
+
+        pCtx->SysEnter.cs     = pVmcb->guest.u64SysEnterCS;
+        pCtx->SysEnter.esp    = pVmcb->guest.u64SysEnterESP;
+        pCtx->SysEnter.eip    = pVmcb->guest.u64SysEnterEIP;
+
+        iemMemPageUnmap(pVCpu, GCPhysVmcb, IEM_ACCESS_DATA_R, pvVmcb, &PgLockVmcb);
+        iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements 'VMSAVE'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_vmsave)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    IEM_SVM_INSTR_COMMON_CHECKS(pVCpu, vmsave);
+
+    RTGCPHYS const GCPhysVmcb = pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT ? pCtx->rax : pCtx->eax;
+    if (   (GCPhysVmcb & X86_PAGE_4K_OFFSET_MASK)
+        || !PGMPhysIsGCPhysNormal(pVCpu->CTX_SUFF(pVM), GCPhysVmcb))
+    {
+        Log(("vmsave: VMCB physaddr (%#RGp) not valid -> #GP(0)\n", GCPhysVmcb));
+        return iemRaiseGeneralProtectionFault0(pVCpu);
+    }
+
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_VMSAVE))
+    {
+        Log(("vmsave: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_VMSAVE, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    void *pvVmcb;
+    PGMPAGEMAPLOCK PgLockVmcb;
+    VBOXSTRICTRC rcStrict = iemMemPageMap(pVCpu, GCPhysVmcb, IEM_ACCESS_DATA_RW, &pvVmcb, &PgLockVmcb);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        PSVMVMCB pVmcb = (PSVMVMCB)pvVmcb;
+        HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, FS, fs);
+        HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, GS, gs);
+        HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, TR, tr);
+        HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, LDTR, ldtr);
+
+        pVmcb->guest.u64KernelGSBase  = pCtx->msrKERNELGSBASE;
+        pVmcb->guest.u64STAR          = pCtx->msrSTAR;
+        pVmcb->guest.u64LSTAR         = pCtx->msrLSTAR;
+        pVmcb->guest.u64CSTAR         = pCtx->msrCSTAR;
+        pVmcb->guest.u64SFMASK        = pCtx->msrSFMASK;
+
+        pVmcb->guest.u64SysEnterCS    = pCtx->SysEnter.cs;
+        pVmcb->guest.u64SysEnterESP   = pCtx->SysEnter.esp;
+        pVmcb->guest.u64SysEnterEIP   = pCtx->SysEnter.eip;
+
+        iemMemPageUnmap(pVCpu, GCPhysVmcb, IEM_ACCESS_DATA_R, pvVmcb, &PgLockVmcb);
+        iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements 'CLGI'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_clgi)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    IEM_SVM_INSTR_COMMON_CHECKS(pVCpu, clgi);
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_CLGI))
+    {
+        Log(("clgi: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_CLGI, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    pCtx->hwvirt.svm.fGif = 0;
+    iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements 'STGI'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_stgi)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    IEM_SVM_INSTR_COMMON_CHECKS(pVCpu, stgi);
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_STGI))
+    {
+        Log2(("stgi: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_STGI, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    pCtx->hwvirt.svm.fGif = 1;
+    iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements 'INVLPGA'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_invlpga)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+    RTGCPTR  const GCPtrPage = pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT ? pCtx->rax : pCtx->eax;
+    /** @todo PGM needs virtual ASID support. */
+#if 0
+    uint32_t const uAsid     = pCtx->ecx;
+#endif
+
+    IEM_SVM_INSTR_COMMON_CHECKS(pVCpu, invlpga);
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_INVLPGA))
+    {
+        Log2(("invlpga: Guest intercept (%RGp) -> #VMEXIT\n", GCPtrPage));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_INVLPGA, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    PGMInvalidatePage(pVCpu, GCPtrPage);
+    iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements 'SKINIT'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_skinit)
+{
+    IEM_SVM_INSTR_COMMON_CHECKS(pVCpu, invlpga);
+
+    uint32_t uIgnore;
+    uint32_t fFeaturesECX;
+    CPUMGetGuestCpuId(pVCpu, 0x80000001, 0, &uIgnore, &uIgnore, &fFeaturesECX, &uIgnore);
+    if (!(fFeaturesECX & X86_CPUID_AMD_FEATURE_ECX_SKINIT))
+        return iemRaiseUndefinedOpcode(pVCpu);
+
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_SKINIT))
+    {
+        Log2(("skinit: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_SKINIT, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    RT_NOREF(cbInstr);
+    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT */
+
+/**
+ * Implements 'CLI'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_cli)
+{
+    PCPUMCTX        pCtx    = IEM_GET_CTX(pVCpu);
+    uint32_t        fEfl    = IEMMISC_GET_EFL(pVCpu, pCtx);
+    uint32_t const  fEflOld = fEfl;
+    if (pCtx->cr0 & X86_CR0_PE)
+    {
+        uint8_t const uIopl = X86_EFL_GET_IOPL(fEfl);
+        if (!(fEfl & X86_EFL_VM))
+        {
+            if (pVCpu->iem.s.uCpl <= uIopl)
+                fEfl &= ~X86_EFL_IF;
+            else if (   pVCpu->iem.s.uCpl == 3
+                     && (pCtx->cr4 & X86_CR4_PVI) )
+                fEfl &= ~X86_EFL_VIF;
+            else
+                return iemRaiseGeneralProtectionFault0(pVCpu);
         }
         /* V8086 */
         else if (uIopl == 3)
@@ -5926,6 +6514,13 @@ IEM_CIMPL_DEF_0(iemCImpl_hlt)
 {
     if (pVCpu->iem.s.uCpl != 0)
         return iemRaiseGeneralProtectionFault0(pVCpu);
+
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_HLT))
+    {
+        Log2(("hlt: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_HLT, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     iemRegAddToRipAndClearRF(pVCpu, cbInstr);
     return VINF_EM_HALT;
 }
@@ -5974,6 +6569,12 @@ IEM_CIMPL_DEF_1(iemCImpl_monitor, uint8_t, iEffSeg)
     if (rcStrict != VINF_SUCCESS)
         return rcStrict;
 
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_MONITOR))
+    {
+        Log2(("monitor: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_MONITOR, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     /*
      * Call EM to prepare the monitor/wait.
      */
@@ -6032,6 +6633,21 @@ IEM_CIMPL_DEF_0(iemCImpl_mwait)
     }
 
     /*
+     * Check SVM nested-guest mwait intercepts.
+     */
+    if (   IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_MWAIT_ARMED)
+        && EMMonitorIsArmed(pVCpu))
+    {
+        Log2(("mwait: Guest intercept (monitor hardware armed) -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_MWAIT_ARMED, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_MWAIT))
+    {
+        Log2(("mwait: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_MWAIT, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
+    /*
      * Call EM to prepare the monitor/wait.
      */
     VBOXSTRICTRC rcStrict = EMMonitorWaitPerform(pVCpu, uEax, uEcx);
@@ -6077,6 +6693,12 @@ IEM_CIMPL_DEF_0(iemCImpl_cpuid)
 {
     PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
 
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_CPUID))
+    {
+        Log2(("cpuid: Guest intercept -> #VMEXIT\n"));
+        IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_CPUID, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+    }
+
     CPUMGetGuestCpuId(pVCpu, pCtx->eax, pCtx->ecx, &pCtx->eax, &pCtx->ebx, &pCtx->ecx, &pCtx->edx);
     pCtx->rax &= UINT32_C(0xffffffff);
     pCtx->rbx &= UINT32_C(0xffffffff);
@@ -6201,6 +6823,147 @@ IEM_CIMPL_DEF_0(iemCImpl_das)
 }
 
 
+/**
+ * Implements 'AAA'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_aaa)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    if (IEM_IS_GUEST_CPU_AMD(pVCpu))
+    {
+        if (   pCtx->eflags.Bits.u1AF
+            || (pCtx->ax & 0xf) >= 10)
+        {
+            iemAImpl_add_u16(&pCtx->ax, 0x106, &pCtx->eflags.u32);
+            pCtx->eflags.Bits.u1AF = 1;
+            pCtx->eflags.Bits.u1CF = 1;
+#ifdef IEM_VERIFICATION_MODE_FULL
+            pVCpu->iem.s.fUndefinedEFlags |= X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF, X86_EFL_OF;
+#endif
+        }
+        else
+        {
+            iemHlpUpdateArithEFlagsU16(pVCpu, pCtx->ax, X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF, X86_EFL_OF);
+            pCtx->eflags.Bits.u1AF = 0;
+            pCtx->eflags.Bits.u1CF = 0;
+        }
+        pCtx->ax &= UINT16_C(0xff0f);
+    }
+    else
+    {
+        if (   pCtx->eflags.Bits.u1AF
+            || (pCtx->ax & 0xf) >= 10)
+        {
+            pCtx->ax += UINT16_C(0x106);
+            pCtx->eflags.Bits.u1AF = 1;
+            pCtx->eflags.Bits.u1CF = 1;
+        }
+        else
+        {
+            pCtx->eflags.Bits.u1AF = 0;
+            pCtx->eflags.Bits.u1CF = 0;
+        }
+        pCtx->ax &= UINT16_C(0xff0f);
+        iemHlpUpdateArithEFlagsU8(pVCpu, pCtx->al, X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF, X86_EFL_OF);
+    }
+
+    iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements 'AAS'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_aas)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    if (IEM_IS_GUEST_CPU_AMD(pVCpu))
+    {
+        if (   pCtx->eflags.Bits.u1AF
+            || (pCtx->ax & 0xf) >= 10)
+        {
+            iemAImpl_sub_u16(&pCtx->ax, 0x106, &pCtx->eflags.u32);
+            pCtx->eflags.Bits.u1AF = 1;
+            pCtx->eflags.Bits.u1CF = 1;
+#ifdef IEM_VERIFICATION_MODE_FULL
+            pVCpu->iem.s.fUndefinedEFlags |= X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF, X86_EFL_OF;
+#endif
+        }
+        else
+        {
+            iemHlpUpdateArithEFlagsU16(pVCpu, pCtx->ax, X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF, X86_EFL_OF);
+            pCtx->eflags.Bits.u1AF = 0;
+            pCtx->eflags.Bits.u1CF = 0;
+        }
+        pCtx->ax &= UINT16_C(0xff0f);
+    }
+    else
+    {
+        if (   pCtx->eflags.Bits.u1AF
+            || (pCtx->ax & 0xf) >= 10)
+        {
+            pCtx->ax -= UINT16_C(0x106);
+            pCtx->eflags.Bits.u1AF = 1;
+            pCtx->eflags.Bits.u1CF = 1;
+        }
+        else
+        {
+            pCtx->eflags.Bits.u1AF = 0;
+            pCtx->eflags.Bits.u1CF = 0;
+        }
+        pCtx->ax &= UINT16_C(0xff0f);
+        iemHlpUpdateArithEFlagsU8(pVCpu, pCtx->al, X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF, X86_EFL_OF);
+    }
+
+    iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements the 16-bit version of 'BOUND'.
+ *
+ * @note    We have separate 16-bit and 32-bit variants of this function due to
+ *          the decoder using unsigned parameters, whereas we want signed one to
+ *          do the job.  This is significant for a recompiler.
+ */
+IEM_CIMPL_DEF_3(iemCImpl_bound_16, int16_t, idxArray, int16_t, idxLowerBound, int16_t, idxUpperBound)
+{
+    /*
+     * Check if the index is inside the bounds, otherwise raise #BR.
+     */
+    if (   idxArray >= idxLowerBound
+        && idxArray <= idxUpperBound)
+    {
+        iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+        return VINF_SUCCESS;
+    }
+
+    return iemRaiseBoundRangeExceeded(pVCpu);
+}
+
+
+/**
+ * Implements the 32-bit version of 'BOUND'.
+ */
+IEM_CIMPL_DEF_3(iemCImpl_bound_32, int32_t, idxArray, int32_t, idxLowerBound, int32_t, idxUpperBound)
+{
+    /*
+     * Check if the index is inside the bounds, otherwise raise #BR.
+     */
+    if (   idxArray >= idxLowerBound
+        && idxArray <= idxUpperBound)
+    {
+        iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+        return VINF_SUCCESS;
+    }
+
+    return iemRaiseBoundRangeExceeded(pVCpu);
+}
+
 
 
 /*
@@ -6283,6 +7046,12 @@ IEM_CIMPL_DEF_0(iemCImpl_xsetbv)
     PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
     if (pCtx->cr4 & X86_CR4_OSXSAVE)
     {
+        if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_XSETBV))
+        {
+            Log2(("xsetbv: Guest intercept -> #VMEXIT\n"));
+            IEM_RETURN_SVM_NST_GST_VMEXIT(pVCpu, SVM_EXIT_XSETBV, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+        }
+
         if (pVCpu->iem.s.uCpl == 0)
         {
             uint32_t uEcx = pCtx->ecx;
@@ -6317,10 +7086,114 @@ IEM_CIMPL_DEF_0(iemCImpl_xsetbv)
     return iemRaiseUndefinedOpcode(pVCpu);
 }
 
+#ifdef IN_RING3
 
+/** Argument package for iemCImpl_cmpxchg16b_fallback_rendezvous_callback. */
+struct IEMCIMPLCX16ARGS
+{
+    PRTUINT128U     pu128Dst;
+    PRTUINT128U     pu128RaxRdx;
+    PRTUINT128U     pu128RbxRcx;
+    uint32_t       *pEFlags;
+# ifdef VBOX_STRICT
+    uint32_t        cCalls;
+# endif
+};
 
 /**
- * Implements 'FINIT' and 'FNINIT'.
+ * @callback_method_impl{FNVMMEMTRENDEZVOUS,
+ *                       Worker for iemCImpl_cmpxchg16b_fallback_rendezvous}
+ */
+static DECLCALLBACK(VBOXSTRICTRC) iemCImpl_cmpxchg16b_fallback_rendezvous_callback(PVM pVM, PVMCPU pVCpu, void *pvUser)
+{
+    RT_NOREF(pVM, pVCpu);
+    struct IEMCIMPLCX16ARGS *pArgs = (struct IEMCIMPLCX16ARGS *)pvUser;
+# ifdef VBOX_STRICT
+    Assert(pArgs->cCalls == 0);
+    pArgs->cCalls++;
+# endif
+
+    iemAImpl_cmpxchg16b_fallback(pArgs->pu128Dst, pArgs->pu128RaxRdx, pArgs->pu128RbxRcx, pArgs->pEFlags);
+    return VINF_SUCCESS;
+}
+
+#endif /* IN_RING3 */
+
+/**
+ * Implements 'CMPXCHG16B' fallback using rendezvous.
+ */
+IEM_CIMPL_DEF_4(iemCImpl_cmpxchg16b_fallback_rendezvous, PRTUINT128U, pu128Dst, PRTUINT128U, pu128RaxRdx,
+                PRTUINT128U, pu128RbxRcx, uint32_t *, pEFlags)
+{
+#ifdef IN_RING3
+    struct IEMCIMPLCX16ARGS Args;
+    Args.pu128Dst       = pu128Dst;
+    Args.pu128RaxRdx    = pu128RaxRdx;
+    Args.pu128RbxRcx    = pu128RbxRcx;
+    Args.pEFlags        = pEFlags;
+# ifdef VBOX_STRICT
+    Args.cCalls         = 0;
+# endif
+    VBOXSTRICTRC rcStrict = VMMR3EmtRendezvous(pVCpu->CTX_SUFF(pVM), VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE,
+                                               iemCImpl_cmpxchg16b_fallback_rendezvous_callback, &Args);
+    Assert(Args.cCalls == 1);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        /* Duplicated tail code. */
+        rcStrict = iemMemCommitAndUnmap(pVCpu, pu128Dst, IEM_ACCESS_DATA_RW);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            PCPUMCTX pCtx = pVCpu->iem.s.CTX_SUFF(pCtx);
+            pCtx->eflags.u = *pEFlags; /* IEM_MC_COMMIT_EFLAGS */
+            if (!(*pEFlags & X86_EFL_ZF))
+            {
+                pCtx->rax = pu128RaxRdx->s.Lo;
+                pCtx->rdx = pu128RaxRdx->s.Hi;
+            }
+            iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+        }
+    }
+    return rcStrict;
+#else
+    RT_NOREF(pVCpu, cbInstr, pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
+    return VERR_IEM_ASPECT_NOT_IMPLEMENTED; /* This should get us to ring-3 for now.  Should perhaps be replaced later. */
+#endif
+}
+
+
+/**
+ * Implements 'CLFLUSH' and 'CLFLUSHOPT'.
+ *
+ * This is implemented in C because it triggers a load like behviour without
+ * actually reading anything.  Since that's not so common, it's implemented
+ * here.
+ *
+ * @param   iEffSeg         The effective segment.
+ * @param   GCPtrEff        The address of the image.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_clflush_clflushopt, uint8_t, iEffSeg, RTGCPTR, GCPtrEff)
+{
+    /*
+     * Pretend to do a load w/o reading (see also iemCImpl_monitor and iemMemMap).
+     */
+    VBOXSTRICTRC rcStrict = iemMemApplySegment(pVCpu, IEM_ACCESS_TYPE_READ | IEM_ACCESS_WHAT_DATA, iEffSeg, 1, &GCPtrEff);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        RTGCPHYS GCPhysMem;
+        rcStrict = iemMemPageTranslateAndCheckAccess(pVCpu, GCPtrEff, IEM_ACCESS_TYPE_READ | IEM_ACCESS_WHAT_DATA, &GCPhysMem);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+            return VINF_SUCCESS;
+        }
+    }
+
+    return rcStrict;
+}
+
+
+/**
+ * Implements 'FINIT' and 'FNINIT'.
  *
  * @param   fCheckXcpts     Whether to check for umasked pending exceptions or
  *                          not.
@@ -6406,7 +7279,7 @@ IEM_CIMPL_DEF_3(iemCImpl_fxsave, uint8_t, iEffSeg, RTGCPTR, GCPtrEff, IEMMODE, e
     pDst->FTW           = pSrc->FTW & UINT16_C(0xff);
     pDst->FOP           = pSrc->FOP;
     pDst->MXCSR         = pSrc->MXCSR;
-    pDst->MXCSR_MASK    = pSrc->MXCSR_MASK;
+    pDst->MXCSR_MASK    = CPUMGetGuestMxCsrMask(pVCpu->CTX_SUFF(pVM));
     for (uint32_t i = 0; i < RT_ELEMENTS(pDst->aRegs); i++)
     {
         /** @todo Testcase: What actually happens to the 6 reserved bytes? I'm clearing
@@ -6502,7 +7375,7 @@ IEM_CIMPL_DEF_3(iemCImpl_fxrstor, uint8_t, iEffSeg, RTGCPTR, GCPtrEff, IEMMODE,
      * Check the state for stuff which will #GP(0).
      */
     uint32_t const fMXCSR      = pSrc->MXCSR;
-    uint32_t const fMXCSR_MASK = pDst->MXCSR_MASK ? pDst->MXCSR_MASK : UINT32_C(0xffbf);
+    uint32_t const fMXCSR_MASK = CPUMGetGuestMxCsrMask(pVCpu->CTX_SUFF(pVM));
     if (fMXCSR & ~fMXCSR_MASK)
     {
         Log(("fxrstor: MXCSR=%#x (MXCSR_MASK=%#x) -> #GP(0)\n", fMXCSR, fMXCSR_MASK));
@@ -6574,6 +7447,483 @@ IEM_CIMPL_DEF_3(iemCImpl_fxrstor, uint8_t, iEffSeg, RTGCPTR, GCPtrEff, IEMMODE,
 
 
 /**
+ * Implements 'XSAVE'.
+ *
+ * @param   iEffSeg         The effective segment.
+ * @param   GCPtrEff        The address of the image.
+ * @param   enmEffOpSize    The operand size (only REX.W really matters).
+ */
+IEM_CIMPL_DEF_3(iemCImpl_xsave, uint8_t, iEffSeg, RTGCPTR, GCPtrEff, IEMMODE, enmEffOpSize)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    /*
+     * Raise exceptions.
+     */
+    if (!(pCtx->cr4 & X86_CR4_OSXSAVE))
+        return iemRaiseUndefinedOpcode(pVCpu);
+    if (pCtx->cr0 & X86_CR0_TS)
+        return iemRaiseDeviceNotAvailable(pVCpu);
+    if (GCPtrEff & 63)
+    {
+        /** @todo CPU/VM detection possible! \#AC might not be signal for
+         * all/any misalignment sizes, intel says its an implementation detail. */
+        if (   (pCtx->cr0 & X86_CR0_AM)
+            && pCtx->eflags.Bits.u1AC
+            && pVCpu->iem.s.uCpl == 3)
+            return iemRaiseAlignmentCheckException(pVCpu);
+        return iemRaiseGeneralProtectionFault0(pVCpu);
+    }
+
+    /*
+     * Calc the requested mask
+     */
+    uint64_t const fReqComponents = RT_MAKE_U64(pCtx->eax, pCtx->edx) & pCtx->aXcr[0];
+    AssertLogRelReturn(!(fReqComponents & ~(XSAVE_C_X87 | XSAVE_C_SSE | XSAVE_C_YMM)), VERR_IEM_ASPECT_NOT_IMPLEMENTED);
+    uint64_t const fXInUse        = pCtx->aXcr[0];
+
+/** @todo figure out the exact protocol for the memory access.  Currently we
+ *        just need this crap to work halfways to make it possible to test
+ *        AVX instructions. */
+/** @todo figure out the XINUSE and XMODIFIED   */
+
+    /*
+     * Access the x87 memory state.
+     */
+    /* The x87+SSE state.  */
+    void *pvMem512;
+    VBOXSTRICTRC rcStrict = iemMemMap(pVCpu, &pvMem512, 512, iEffSeg, GCPtrEff, IEM_ACCESS_DATA_W | IEM_ACCESS_PARTIAL_WRITE);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+    PX86FXSTATE  pDst = (PX86FXSTATE)pvMem512;
+    PCX86FXSTATE pSrc = &pCtx->CTX_SUFF(pXState)->x87;
+
+    /* The header.  */
+    PX86XSAVEHDR pHdr;
+    rcStrict = iemMemMap(pVCpu, (void **)&pHdr, sizeof(&pHdr), iEffSeg, GCPtrEff + 512, IEM_ACCESS_DATA_RW);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    /*
+     * Store the X87 state.
+     */
+    if (fReqComponents & XSAVE_C_X87)
+    {
+        /* common for all formats */
+        pDst->FCW    = pSrc->FCW;
+        pDst->FSW    = pSrc->FSW;
+        pDst->FTW    = pSrc->FTW & UINT16_C(0xff);
+        pDst->FOP    = pSrc->FOP;
+        pDst->FPUIP  = pSrc->FPUIP;
+        pDst->CS     = pSrc->CS;
+        pDst->FPUDP  = pSrc->FPUDP;
+        pDst->DS     = pSrc->DS;
+        if (enmEffOpSize == IEMMODE_64BIT)
+        {
+            /* Save upper 16-bits of FPUIP (IP:CS:Rsvd1) and FPUDP (DP:DS:Rsvd2). */
+            pDst->Rsrvd1 = pSrc->Rsrvd1;
+            pDst->Rsrvd2 = pSrc->Rsrvd2;
+            pDst->au32RsrvdForSoftware[0] = 0;
+        }
+        else
+        {
+            pDst->Rsrvd1 = 0;
+            pDst->Rsrvd2 = 0;
+            pDst->au32RsrvdForSoftware[0] = X86_FXSTATE_RSVD_32BIT_MAGIC;
+        }
+        for (uint32_t i = 0; i < RT_ELEMENTS(pDst->aRegs); i++)
+        {
+            /** @todo Testcase: What actually happens to the 6 reserved bytes? I'm clearing
+             *        them for now... */
+            pDst->aRegs[i].au32[0] = pSrc->aRegs[i].au32[0];
+            pDst->aRegs[i].au32[1] = pSrc->aRegs[i].au32[1];
+            pDst->aRegs[i].au32[2] = pSrc->aRegs[i].au32[2] & UINT32_C(0xffff);
+            pDst->aRegs[i].au32[3] = 0;
+        }
+
+    }
+
+    if (fReqComponents & (XSAVE_C_SSE | XSAVE_C_YMM))
+    {
+        pDst->MXCSR         = pSrc->MXCSR;
+        pDst->MXCSR_MASK    = CPUMGetGuestMxCsrMask(pVCpu->CTX_SUFF(pVM));
+    }
+
+    if (fReqComponents & XSAVE_C_SSE)
+    {
+        /* XMM registers. */
+        uint32_t cXmmRegs = enmEffOpSize == IEMMODE_64BIT ? 16 : 8;
+        for (uint32_t i = 0; i < cXmmRegs; i++)
+            pDst->aXMM[i] = pSrc->aXMM[i];
+        /** @todo Testcase: What happens to the reserved XMM registers? Untouched,
+         *        right? */
+    }
+
+    /* Commit the x87 state bits. (probably wrong) */
+    rcStrict = iemMemCommitAndUnmap(pVCpu, pvMem512, IEM_ACCESS_DATA_W | IEM_ACCESS_PARTIAL_WRITE);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    /*
+     * Store AVX state.
+     */
+    if (fReqComponents & XSAVE_C_YMM)
+    {
+        /** @todo testcase: xsave64 vs xsave32 wrt XSAVE_C_YMM. */
+        AssertLogRelReturn(pCtx->aoffXState[XSAVE_C_YMM_BIT] != UINT16_MAX, VERR_IEM_IPE_9);
+        PCX86XSAVEYMMHI pCompSrc = CPUMCTX_XSAVE_C_PTR(pCtx, XSAVE_C_YMM_BIT, PCX86XSAVEYMMHI);
+        PX86XSAVEYMMHI  pCompDst;
+        rcStrict = iemMemMap(pVCpu, (void **)&pCompDst, sizeof(*pCompDst), iEffSeg, GCPtrEff + pCtx->aoffXState[XSAVE_C_YMM_BIT],
+                             IEM_ACCESS_DATA_W | IEM_ACCESS_PARTIAL_WRITE);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+
+        uint32_t cXmmRegs = enmEffOpSize == IEMMODE_64BIT ? 16 : 8;
+        for (uint32_t i = 0; i < cXmmRegs; i++)
+            pCompDst->aYmmHi[i] = pCompSrc->aYmmHi[i];
+
+        rcStrict = iemMemCommitAndUnmap(pVCpu, pCompDst, IEM_ACCESS_DATA_W | IEM_ACCESS_PARTIAL_WRITE);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+    }
+
+    /*
+     * Update the header.
+     */
+    pHdr->bmXState = (pHdr->bmXState & ~fReqComponents)
+                   | (fReqComponents & fXInUse);
+
+    rcStrict = iemMemCommitAndUnmap(pVCpu, pHdr, IEM_ACCESS_DATA_RW);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements 'XRSTOR'.
+ *
+ * @param   iEffSeg         The effective segment.
+ * @param   GCPtrEff        The address of the image.
+ * @param   enmEffOpSize    The operand size (only REX.W really matters).
+ */
+IEM_CIMPL_DEF_3(iemCImpl_xrstor, uint8_t, iEffSeg, RTGCPTR, GCPtrEff, IEMMODE, enmEffOpSize)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    /*
+     * Raise exceptions.
+     */
+    if (!(pCtx->cr4 & X86_CR4_OSXSAVE))
+        return iemRaiseUndefinedOpcode(pVCpu);
+    if (pCtx->cr0 & X86_CR0_TS)
+        return iemRaiseDeviceNotAvailable(pVCpu);
+    if (GCPtrEff & 63)
+    {
+        /** @todo CPU/VM detection possible! \#AC might not be signal for
+         * all/any misalignment sizes, intel says its an implementation detail. */
+        if (   (pCtx->cr0 & X86_CR0_AM)
+            && pCtx->eflags.Bits.u1AC
+            && pVCpu->iem.s.uCpl == 3)
+            return iemRaiseAlignmentCheckException(pVCpu);
+        return iemRaiseGeneralProtectionFault0(pVCpu);
+    }
+
+/** @todo figure out the exact protocol for the memory access.  Currently we
+ *        just need this crap to work halfways to make it possible to test
+ *        AVX instructions. */
+/** @todo figure out the XINUSE and XMODIFIED   */
+
+    /*
+     * Access the x87 memory state.
+     */
+    /* The x87+SSE state.  */
+    void *pvMem512;
+    VBOXSTRICTRC rcStrict = iemMemMap(pVCpu, &pvMem512, 512, iEffSeg, GCPtrEff, IEM_ACCESS_DATA_R);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+    PCX86FXSTATE pSrc = (PCX86FXSTATE)pvMem512;
+    PX86FXSTATE  pDst = &pCtx->CTX_SUFF(pXState)->x87;
+
+    /*
+     * Calc the requested mask
+     */
+    PX86XSAVEHDR  pHdrDst = &pCtx->CTX_SUFF(pXState)->Hdr;
+    PCX86XSAVEHDR pHdrSrc;
+    rcStrict = iemMemMap(pVCpu, (void **)&pHdrSrc, sizeof(&pHdrSrc), iEffSeg, GCPtrEff + 512, IEM_ACCESS_DATA_R);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    uint64_t const fReqComponents = RT_MAKE_U64(pCtx->eax, pCtx->edx) & pCtx->aXcr[0];
+    AssertLogRelReturn(!(fReqComponents & ~(XSAVE_C_X87 | XSAVE_C_SSE | XSAVE_C_YMM)), VERR_IEM_ASPECT_NOT_IMPLEMENTED);
+    //uint64_t const fXInUse        = pCtx->aXcr[0];
+    uint64_t const fRstorMask     = pHdrSrc->bmXState;
+    uint64_t const fCompMask      = pHdrSrc->bmXComp;
+
+    AssertLogRelReturn(!(fCompMask & XSAVE_C_X), VERR_IEM_ASPECT_NOT_IMPLEMENTED);
+
+    uint32_t const cXmmRegs = enmEffOpSize == IEMMODE_64BIT ? 16 : 8;
+
+    /* We won't need this any longer. */
+    rcStrict = iemMemCommitAndUnmap(pVCpu, (void *)pHdrSrc, IEM_ACCESS_DATA_R);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    /*
+     * Store the X87 state.
+     */
+    if (fReqComponents & XSAVE_C_X87)
+    {
+        if (fRstorMask & XSAVE_C_X87)
+        {
+            pDst->FCW    = pSrc->FCW;
+            pDst->FSW    = pSrc->FSW;
+            pDst->FTW    = pSrc->FTW & UINT16_C(0xff);
+            pDst->FOP    = pSrc->FOP;
+            pDst->FPUIP  = pSrc->FPUIP;
+            pDst->CS     = pSrc->CS;
+            pDst->FPUDP  = pSrc->FPUDP;
+            pDst->DS     = pSrc->DS;
+            if (enmEffOpSize == IEMMODE_64BIT)
+            {
+                /* Save upper 16-bits of FPUIP (IP:CS:Rsvd1) and FPUDP (DP:DS:Rsvd2). */
+                pDst->Rsrvd1 = pSrc->Rsrvd1;
+                pDst->Rsrvd2 = pSrc->Rsrvd2;
+            }
+            else
+            {
+                pDst->Rsrvd1 = 0;
+                pDst->Rsrvd2 = 0;
+            }
+            for (uint32_t i = 0; i < RT_ELEMENTS(pDst->aRegs); i++)
+            {
+                pDst->aRegs[i].au32[0] = pSrc->aRegs[i].au32[0];
+                pDst->aRegs[i].au32[1] = pSrc->aRegs[i].au32[1];
+                pDst->aRegs[i].au32[2] = pSrc->aRegs[i].au32[2] & UINT32_C(0xffff);
+                pDst->aRegs[i].au32[3] = 0;
+            }
+        }
+        else
+        {
+            pDst->FCW   = 0x37f;
+            pDst->FSW   = 0;
+            pDst->FTW   = 0x00;         /* 0 - empty. */
+            pDst->FPUDP = 0;
+            pDst->DS    = 0; //??
+            pDst->Rsrvd2= 0;
+            pDst->FPUIP = 0;
+            pDst->CS    = 0; //??
+            pDst->Rsrvd1= 0;
+            pDst->FOP   = 0;
+            for (uint32_t i = 0; i < RT_ELEMENTS(pSrc->aRegs); i++)
+            {
+                pDst->aRegs[i].au32[0] = 0;
+                pDst->aRegs[i].au32[1] = 0;
+                pDst->aRegs[i].au32[2] = 0;
+                pDst->aRegs[i].au32[3] = 0;
+            }
+        }
+        pHdrDst->bmXState |= XSAVE_C_X87; /* playing safe for now */
+    }
+
+    /* MXCSR */
+    if (fReqComponents & (XSAVE_C_SSE | XSAVE_C_YMM))
+    {
+        if (fRstorMask & (XSAVE_C_SSE | XSAVE_C_YMM))
+            pDst->MXCSR = pSrc->MXCSR;
+        else
+            pDst->MXCSR = 0x1f80;
+    }
+
+    /* XMM registers. */
+    if (fReqComponents & XSAVE_C_SSE)
+    {
+        if (fRstorMask & XSAVE_C_SSE)
+        {
+            for (uint32_t i = 0; i < cXmmRegs; i++)
+                pDst->aXMM[i] = pSrc->aXMM[i];
+            /** @todo Testcase: What happens to the reserved XMM registers? Untouched,
+             *        right? */
+        }
+        else
+        {
+            for (uint32_t i = 0; i < cXmmRegs; i++)
+            {
+                pDst->aXMM[i].au64[0] = 0;
+                pDst->aXMM[i].au64[1] = 0;
+            }
+        }
+        pHdrDst->bmXState |= XSAVE_C_SSE; /* playing safe for now */
+    }
+
+    /* Unmap the x87 state bits (so we've don't run out of mapping). */
+    rcStrict = iemMemCommitAndUnmap(pVCpu, pvMem512, IEM_ACCESS_DATA_R);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    /*
+     * Restore AVX state.
+     */
+    if (fReqComponents & XSAVE_C_YMM)
+    {
+        AssertLogRelReturn(pCtx->aoffXState[XSAVE_C_YMM_BIT] != UINT16_MAX, VERR_IEM_IPE_9);
+        PX86XSAVEYMMHI  pCompDst = CPUMCTX_XSAVE_C_PTR(pCtx, XSAVE_C_YMM_BIT, PX86XSAVEYMMHI);
+
+        if (fRstorMask & XSAVE_C_YMM)
+        {
+            /** @todo testcase: xsave64 vs xsave32 wrt XSAVE_C_YMM. */
+            PCX86XSAVEYMMHI pCompSrc;
+            rcStrict = iemMemMap(pVCpu, (void **)&pCompSrc, sizeof(*pCompDst),
+                                 iEffSeg, GCPtrEff + pCtx->aoffXState[XSAVE_C_YMM_BIT], IEM_ACCESS_DATA_R);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+
+            for (uint32_t i = 0; i < cXmmRegs; i++)
+            {
+                pCompDst->aYmmHi[i].au64[0] = pCompSrc->aYmmHi[i].au64[0];
+                pCompDst->aYmmHi[i].au64[1] = pCompSrc->aYmmHi[i].au64[1];
+            }
+
+            rcStrict = iemMemCommitAndUnmap(pVCpu, (void *)pCompSrc, IEM_ACCESS_DATA_R);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+        }
+        else
+        {
+            for (uint32_t i = 0; i < cXmmRegs; i++)
+            {
+                pCompDst->aYmmHi[i].au64[0] = 0;
+                pCompDst->aYmmHi[i].au64[1] = 0;
+            }
+        }
+        pHdrDst->bmXState |= XSAVE_C_YMM; /* playing safe for now */
+    }
+
+    iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+
+
+/**
+ * Implements 'STMXCSR'.
+ *
+ * @param   GCPtrEff        The address of the image.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_stmxcsr, uint8_t, iEffSeg, RTGCPTR, GCPtrEff)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    /*
+     * Raise exceptions.
+     */
+    if (   !(pCtx->cr0 & X86_CR0_EM)
+        && (pCtx->cr4 & X86_CR4_OSFXSR))
+    {
+        if (!(pCtx->cr0 & X86_CR0_TS))
+        {
+            /*
+             * Do the job.
+             */
+            VBOXSTRICTRC rcStrict = iemMemStoreDataU32(pVCpu, iEffSeg, GCPtrEff, pCtx->CTX_SUFF(pXState)->x87.MXCSR);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+                return VINF_SUCCESS;
+            }
+            return rcStrict;
+        }
+        return iemRaiseDeviceNotAvailable(pVCpu);
+    }
+    return iemRaiseUndefinedOpcode(pVCpu);
+}
+
+
+/**
+ * Implements 'VSTMXCSR'.
+ *
+ * @param   GCPtrEff        The address of the image.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_vstmxcsr, uint8_t, iEffSeg, RTGCPTR, GCPtrEff)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    /*
+     * Raise exceptions.
+     */
+    if (   (   !IEM_IS_GUEST_CPU_AMD(pVCpu)
+            ? (pCtx->aXcr[0] & (XSAVE_C_SSE | XSAVE_C_YMM)) == (XSAVE_C_SSE | XSAVE_C_YMM)
+            : !(pCtx->cr0 & X86_CR0_EM)) /* AMD Jaguar CPU (f0x16,m0,s1) behaviour */
+        && (pCtx->cr4 & X86_CR4_OSXSAVE))
+    {
+        if (!(pCtx->cr0 & X86_CR0_TS))
+        {
+            /*
+             * Do the job.
+             */
+            VBOXSTRICTRC rcStrict = iemMemStoreDataU32(pVCpu, iEffSeg, GCPtrEff, pCtx->CTX_SUFF(pXState)->x87.MXCSR);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+                return VINF_SUCCESS;
+            }
+            return rcStrict;
+        }
+        return iemRaiseDeviceNotAvailable(pVCpu);
+    }
+    return iemRaiseUndefinedOpcode(pVCpu);
+}
+
+
+/**
+ * Implements 'LDMXCSR'.
+ *
+ * @param   GCPtrEff        The address of the image.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_ldmxcsr, uint8_t, iEffSeg, RTGCPTR, GCPtrEff)
+{
+    PCPUMCTX pCtx = IEM_GET_CTX(pVCpu);
+
+    /*
+     * Raise exceptions.
+     */
+    /** @todo testcase - order of LDMXCSR faults.  Does \#PF, \#GP and \#SS
+     *        happen after or before \#UD and \#EM? */
+    if (   !(pCtx->cr0 & X86_CR0_EM)
+        && (pCtx->cr4 & X86_CR4_OSFXSR))
+    {
+        if (!(pCtx->cr0 & X86_CR0_TS))
+        {
+            /*
+             * Do the job.
+             */
+            uint32_t fNewMxCsr;
+            VBOXSTRICTRC rcStrict = iemMemFetchDataU32(pVCpu, &fNewMxCsr, iEffSeg, GCPtrEff);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                uint32_t const fMxCsrMask = CPUMGetGuestMxCsrMask(pVCpu->CTX_SUFF(pVM));
+                if (!(fNewMxCsr & ~fMxCsrMask))
+                {
+                    pCtx->CTX_SUFF(pXState)->x87.MXCSR = fNewMxCsr;
+                    iemRegAddToRipAndClearRF(pVCpu, cbInstr);
+                    return VINF_SUCCESS;
+                }
+                Log(("lddmxcsr: New MXCSR=%#RX32 & ~MASK=%#RX32 = %#RX32 -> #GP(0)\n",
+                     fNewMxCsr, fMxCsrMask, fNewMxCsr & ~fMxCsrMask));
+                return iemRaiseGeneralProtectionFault0(pVCpu);
+            }
+            return rcStrict;
+        }
+        return iemRaiseDeviceNotAvailable(pVCpu);
+    }
+    return iemRaiseUndefinedOpcode(pVCpu);
+}
+
+
+/**
  * Commmon routine for fnstenv and fnsave.
  *
  * @param   uPtr                Where to store the state.
diff --git a/src/VBox/VMM/VMMAll/IEMAllCImplStrInstr.cpp.h b/src/VBox/VMM/VMMAll/IEMAllCImplStrInstr.cpp.h
index 295eb31..1040c52 100644
--- a/src/VBox/VMM/VMMAll/IEMAllCImplStrInstr.cpp.h
+++ b/src/VBox/VMM/VMMAll/IEMAllCImplStrInstr.cpp.h
@@ -1217,6 +1217,23 @@ IEM_CIMPL_DEF_1(RT_CONCAT4(iemCImpl_ins_op,OP_SIZE,_addr,ADDR_SIZE), bool, fIoCh
             return rcStrict;
     }
 
+    /*
+     * Check SVM nested-guest IO intercept.
+     */
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IOIO_PROT))
+    {
+        rcStrict = iemSvmHandleIOIntercept(pVCpu, pCtx->dx, SVMIOIOTYPE_IN, OP_SIZE / 8, ADDR_SIZE, X86_SREG_ES, false /* fRep */,
+                                           true /* fStrIo */, cbInstr);
+        if (rcStrict == VINF_SVM_VMEXIT)
+            return VINF_SUCCESS;
+        if (rcStrict != VINF_HM_INTERCEPT_NOT_ACTIVE)
+        {
+            Log(("iemCImpl_ins_op: iemSvmHandleIOIntercept failed (u16Port=%#x, cbReg=%u) rc=%Rrc\n", pCtx->dx, OP_SIZE / 8,
+                 VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
     OP_TYPE        *puMem;
     rcStrict = iemMemMap(pVCpu, (void **)&puMem, OP_SIZE / 8, X86_SREG_ES, pCtx->ADDR_rDI, IEM_ACCESS_DATA_W);
     if (rcStrict != VINF_SUCCESS)
@@ -1270,6 +1287,23 @@ IEM_CIMPL_DEF_1(RT_CONCAT4(iemCImpl_rep_ins_op,OP_SIZE,_addr,ADDR_SIZE), bool, f
             return rcStrict;
     }
 
+    /*
+     * Check SVM nested-guest IO intercept.
+     */
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IOIO_PROT))
+    {
+        rcStrict = iemSvmHandleIOIntercept(pVCpu, u16Port, SVMIOIOTYPE_IN, OP_SIZE / 8, ADDR_SIZE, X86_SREG_ES, true /* fRep */,
+                                           true /* fStrIo */, cbInstr);
+        if (rcStrict == VINF_SVM_VMEXIT)
+            return VINF_SUCCESS;
+        if (rcStrict != VINF_HM_INTERCEPT_NOT_ACTIVE)
+        {
+            Log(("iemCImpl_rep_ins_op: iemSvmHandleIOIntercept failed (u16Port=%#x, cbReg=%u) rc=%Rrc\n", u16Port, OP_SIZE / 8,
+                 VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
     ADDR_TYPE       uCounterReg = pCtx->ADDR_rCX;
     if (uCounterReg == 0)
     {
@@ -1454,6 +1488,23 @@ IEM_CIMPL_DEF_2(RT_CONCAT4(iemCImpl_outs_op,OP_SIZE,_addr,ADDR_SIZE), uint8_t, i
             return rcStrict;
     }
 
+    /*
+     * Check SVM nested-guest IO intercept.
+     */
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IOIO_PROT))
+    {
+        rcStrict = iemSvmHandleIOIntercept(pVCpu, pCtx->dx, SVMIOIOTYPE_OUT, OP_SIZE / 8, ADDR_SIZE, iEffSeg, false /* fRep */,
+                                           true /* fStrIo */, cbInstr);
+        if (rcStrict == VINF_SVM_VMEXIT)
+            return VINF_SUCCESS;
+        if (rcStrict != VINF_HM_INTERCEPT_NOT_ACTIVE)
+        {
+            Log(("iemCImpl_outs_op: iemSvmHandleIOIntercept failed (u16Port=%#x, cbReg=%u) rc=%Rrc\n", pCtx->dx, OP_SIZE / 8,
+                 VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
     OP_TYPE uValue;
     rcStrict = RT_CONCAT(iemMemFetchDataU,OP_SIZE)(pVCpu, &uValue, iEffSeg, pCtx->ADDR_rSI);
     if (rcStrict == VINF_SUCCESS)
@@ -1497,6 +1548,23 @@ IEM_CIMPL_DEF_2(RT_CONCAT4(iemCImpl_rep_outs_op,OP_SIZE,_addr,ADDR_SIZE), uint8_
             return rcStrict;
     }
 
+    /*
+     * Check SVM nested-guest IO intercept.
+     */
+    if (IEM_IS_SVM_CTRL_INTERCEPT_SET(pVCpu, SVM_CTRL_INTERCEPT_IOIO_PROT))
+    {
+        rcStrict = iemSvmHandleIOIntercept(pVCpu, u16Port, SVMIOIOTYPE_OUT, OP_SIZE / 8, ADDR_SIZE, iEffSeg, true /* fRep */,
+                                           true /* fStrIo */, cbInstr);
+        if (rcStrict == VINF_SVM_VMEXIT)
+            return VINF_SUCCESS;
+        if (rcStrict != VINF_HM_INTERCEPT_NOT_ACTIVE)
+        {
+            Log(("iemCImpl_rep_outs_op: iemSvmHandleIOIntercept failed (u16Port=%#x, cbReg=%u) rc=%Rrc\n", u16Port, OP_SIZE / 8,
+                 VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
     ADDR_TYPE       uCounterReg = pCtx->ADDR_rCX;
     if (uCounterReg == 0)
     {
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h
index 1aec818..c93edfe 100644
--- a/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h
@@ -534,7 +534,7 @@ FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rAX_Iz, PCIEMOPBINSIZES, pImpl)
 /** Opcodes 0xf1, 0xd6. */
 FNIEMOP_DEF(iemOp_Invalid)
 {
-    IEMOP_MNEMONIC("Invalid");
+    IEMOP_MNEMONIC(Invalid, "Invalid");
     return IEMOP_RAISE_INVALID_OPCODE();
 }
 
@@ -543,17595 +543,213 @@ FNIEMOP_DEF(iemOp_Invalid)
 FNIEMOPRM_DEF(iemOp_InvalidWithRM)
 {
     RT_NOREF_PV(bRm);
-    IEMOP_MNEMONIC("InvalidWithRM");
+    IEMOP_MNEMONIC(InvalidWithRm, "InvalidWithRM");
     return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
-
-/** @name ..... opcodes.
- *
- * @{
- */
-
-/** @}  */
-
-
-/** @name Two byte opcodes (first byte 0x0f).
- *
- * @{
- */
-
-/** Opcode 0x0f 0x00 /0. */
-FNIEMOPRM_DEF(iemOp_Grp6_sldt)
-{
-    IEMOP_MNEMONIC("sldt Rv/Mw");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_1(OP_SLDT, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Ldtr);
-                IEM_MC_FETCH_LDTR_U16(u16Ldtr);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Ldtr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Ldtr);
-                IEM_MC_FETCH_LDTR_U32(u32Ldtr);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Ldtr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Ldtr);
-                IEM_MC_FETCH_LDTR_U64(u64Ldtr);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Ldtr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint16_t, u16Ldtr);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DECODED_NL_1(OP_SLDT, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        IEM_MC_FETCH_LDTR_U16(u16Ldtr);
-        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Ldtr);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /1. */
-FNIEMOPRM_DEF(iemOp_Grp6_str)
-{
-    IEMOP_MNEMONIC("str Rv/Mw");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_1(OP_STR, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Tr);
-                IEM_MC_FETCH_TR_U16(u16Tr);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Tr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Tr);
-                IEM_MC_FETCH_TR_U32(u32Tr);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Tr);
-                IEM_MC_FETCH_TR_U64(u64Tr);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint16_t, u16Tr);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DECODED_NL_1(OP_STR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        IEM_MC_FETCH_TR_U16(u16Tr);
-        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Tr);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /2. */
-FNIEMOPRM_DEF(iemOp_Grp6_lldt)
-{
-    IEMOP_MNEMONIC("lldt Ew");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_1(OP_LLDT, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS);
-        IEM_MC_BEGIN(1, 0);
-        IEM_MC_ARG(uint16_t, u16Sel, 0);
-        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_lldt, u16Sel);
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(1, 1);
-        IEM_MC_ARG(uint16_t, u16Sel, 0);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DECODED_NL_1(OP_LLDT, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS);
-        IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO(); /** @todo test order */
-        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_lldt, u16Sel);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /3. */
-FNIEMOPRM_DEF(iemOp_Grp6_ltr)
-{
-    IEMOP_MNEMONIC("ltr Ew");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(1, 0);
-        IEM_MC_ARG(uint16_t, u16Sel, 0);
-        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_ltr, u16Sel);
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(1, 1);
-        IEM_MC_ARG(uint16_t, u16Sel, 0);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO(); /** @todo test ordre */
-        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_ltr, u16Sel);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /3. */
-FNIEMOP_DEF_2(iemOpCommonGrp6VerX, uint8_t, bRm, bool, fWrite)
-{
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_1(fWrite ? OP_VERW : OP_VERR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        IEM_MC_BEGIN(2, 0);
-        IEM_MC_ARG(uint16_t,    u16Sel,            0);
-        IEM_MC_ARG_CONST(bool,  fWriteArg, fWrite, 1);
-        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_2(iemCImpl_VerX, u16Sel, fWriteArg);
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(2, 1);
-        IEM_MC_ARG(uint16_t,    u16Sel,            0);
-        IEM_MC_ARG_CONST(bool,  fWriteArg, fWrite, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DECODED_NL_1(fWrite ? OP_VERW : OP_VERR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-        IEM_MC_CALL_CIMPL_2(iemCImpl_VerX, u16Sel, fWriteArg);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /4. */
-FNIEMOPRM_DEF(iemOp_Grp6_verr)
-{
-    IEMOP_MNEMONIC("verr Ew");
-    IEMOP_HLP_MIN_286();
-    return FNIEMOP_CALL_2(iemOpCommonGrp6VerX, bRm, false);
-}
-
-
-/** Opcode 0x0f 0x00 /5. */
-FNIEMOPRM_DEF(iemOp_Grp6_verw)
-{
-    IEMOP_MNEMONIC("verr Ew");
-    IEMOP_HLP_MIN_286();
-    return FNIEMOP_CALL_2(iemOpCommonGrp6VerX, bRm, true);
-}
-
-
-/**
- * Group 6 jump table.
- */
-IEM_STATIC const PFNIEMOPRM g_apfnGroup6[8] =
-{
-    iemOp_Grp6_sldt,
-    iemOp_Grp6_str,
-    iemOp_Grp6_lldt,
-    iemOp_Grp6_ltr,
-    iemOp_Grp6_verr,
-    iemOp_Grp6_verw,
-    iemOp_InvalidWithRM,
-    iemOp_InvalidWithRM
-};
-
-/** Opcode 0x0f 0x00. */
-FNIEMOP_DEF(iemOp_Grp6)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    return FNIEMOP_CALL_1(g_apfnGroup6[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK], bRm);
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF_1(iemOp_Grp7_sgdt, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("sgdt Ms");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_64BIT_OP_SIZE();
-    IEM_MC_BEGIN(2, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_2(iemCImpl_sgdt, iEffSeg, GCPtrEffSrc);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF(iemOp_Grp7_vmcall)
-{
-    IEMOP_BITCH_ABOUT_STUB();
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF(iemOp_Grp7_vmlaunch)
-{
-    IEMOP_BITCH_ABOUT_STUB();
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF(iemOp_Grp7_vmresume)
-{
-    IEMOP_BITCH_ABOUT_STUB();
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF(iemOp_Grp7_vmxoff)
-{
-    IEMOP_BITCH_ABOUT_STUB();
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /1. */
-FNIEMOP_DEF_1(iemOp_Grp7_sidt, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("sidt Ms");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_64BIT_OP_SIZE();
-    IEM_MC_BEGIN(2, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_2(iemCImpl_sidt, iEffSeg, GCPtrEffSrc);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 /1. */
-FNIEMOP_DEF(iemOp_Grp7_monitor)
-{
-    IEMOP_MNEMONIC("monitor");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX(); /** @todo Verify that monitor is allergic to lock prefixes. */
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_monitor, pVCpu->iem.s.iEffSeg);
-}
-
-
-/** Opcode 0x0f 0x01 /1. */
-FNIEMOP_DEF(iemOp_Grp7_mwait)
-{
-    IEMOP_MNEMONIC("mwait"); /** @todo Verify that mwait is allergic to lock prefixes. */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_mwait);
-}
-
-
-/** Opcode 0x0f 0x01 /2. */
-FNIEMOP_DEF_1(iemOp_Grp7_lgdt, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("lgdt");
-    IEMOP_HLP_64BIT_OP_SIZE();
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
-    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSizeArg,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_lgdt, iEffSeg, GCPtrEffSrc, enmEffOpSizeArg);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 0xd0. */
-FNIEMOP_DEF(iemOp_Grp7_xgetbv)
-{
-    IEMOP_MNEMONIC("xgetbv");
-    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXSaveRstor)
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_xgetbv);
-    }
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 0xd1. */
-FNIEMOP_DEF(iemOp_Grp7_xsetbv)
-{
-    IEMOP_MNEMONIC("xsetbv");
-    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXSaveRstor)
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_xsetbv);
-    }
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /3. */
-FNIEMOP_DEF_1(iemOp_Grp7_lidt, uint8_t, bRm)
-{
-    IEMMODE enmEffOpSize = pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
-                         ? IEMMODE_64BIT
-                         : pVCpu->iem.s.enmEffOpSize;
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                            0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                        1);
-    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSizeArg,/*=*/enmEffOpSize,  2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_lidt, iEffSeg, GCPtrEffSrc, enmEffOpSizeArg);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 0xd8. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmrun);
-
-/** Opcode 0x0f 0x01 0xd9. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmmcall);
-
-/** Opcode 0x0f 0x01 0xda. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmload);
-
-/** Opcode 0x0f 0x01 0xdb. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmsave);
-
-/** Opcode 0x0f 0x01 0xdc. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_stgi);
-
-/** Opcode 0x0f 0x01 0xdd. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_clgi);
-
-/** Opcode 0x0f 0x01 0xde. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_skinit);
-
-/** Opcode 0x0f 0x01 0xdf. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_invlpga);
-
-/** Opcode 0x0f 0x01 /4. */
-FNIEMOP_DEF_1(iemOp_Grp7_smsw, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("smsw");
-    IEMOP_HLP_MIN_286();
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Tmp);
-                IEM_MC_FETCH_CR0_U16(u16Tmp);
-                if (IEM_GET_TARGET_CPU(pVCpu) > IEMTARGETCPU_386)
-                { /* likely */ }
-                else if (IEM_GET_TARGET_CPU(pVCpu) >= IEMTARGETCPU_386)
-                    IEM_MC_OR_LOCAL_U16(u16Tmp, 0xffe0);
-                else
-                    IEM_MC_OR_LOCAL_U16(u16Tmp, 0xfff0);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Tmp);
-                IEM_MC_FETCH_CR0_U32(u32Tmp);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_FETCH_CR0_U64(u64Tmp);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* Ignore operand size here, memory refs are always 16-bit. */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint16_t, u16Tmp);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_CR0_U16(u16Tmp);
-        if (IEM_GET_TARGET_CPU(pVCpu) > IEMTARGETCPU_386)
-        { /* likely */ }
-        else if (pVCpu->iem.s.uTargetCpu >= IEMTARGETCPU_386)
-            IEM_MC_OR_LOCAL_U16(u16Tmp, 0xffe0);
-        else
-            IEM_MC_OR_LOCAL_U16(u16Tmp, 0xfff0);
-        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Tmp);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-        return VINF_SUCCESS;
-    }
-}
-
-
-/** Opcode 0x0f 0x01 /6. */
-FNIEMOP_DEF_1(iemOp_Grp7_lmsw, uint8_t, bRm)
-{
-    /* The operand size is effectively ignored, all is 16-bit and only the
-       lower 3-bits are used. */
-    IEMOP_MNEMONIC("lmsw");
-    IEMOP_HLP_MIN_286();
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(1, 0);
-        IEM_MC_ARG(uint16_t, u16Tmp, 0);
-        IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_lmsw, u16Tmp);
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(1, 1);
-        IEM_MC_ARG(uint16_t, u16Tmp, 0);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_lmsw, u16Tmp);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 /7. */
-FNIEMOP_DEF_1(iemOp_Grp7_invlpg, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("invlpg");
-    IEMOP_HLP_MIN_486();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(1, 1);
-    IEM_MC_ARG(RTGCPTR, GCPtrEffDst, 0);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEM_MC_CALL_CIMPL_1(iemCImpl_invlpg, GCPtrEffDst);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 /7. */
-FNIEMOP_DEF(iemOp_Grp7_swapgs)
-{
-    IEMOP_MNEMONIC("swapgs");
-    IEMOP_HLP_ONLY_64BIT();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_swapgs);
-}
-
-
-/** Opcode 0x0f 0x01 /7. */
-FNIEMOP_DEF(iemOp_Grp7_rdtscp)
-{
-    NOREF(pVCpu);
-    IEMOP_BITCH_ABOUT_STUB();
-    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
-}
-
-
-/** Opcode 0x0f 0x01. */
-FNIEMOP_DEF(iemOp_Grp7)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_sgdt, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_vmcall);
-                case 2: return FNIEMOP_CALL(iemOp_Grp7_vmlaunch);
-                case 3: return FNIEMOP_CALL(iemOp_Grp7_vmresume);
-                case 4: return FNIEMOP_CALL(iemOp_Grp7_vmxoff);
-            }
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        case 1:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_sidt, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 0: return FNIEMOP_CALL(iemOp_Grp7_monitor);
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_mwait);
-            }
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        case 2:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_lgdt, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 0: return FNIEMOP_CALL(iemOp_Grp7_xgetbv);
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_xsetbv);
-            }
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        case 3:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_lidt, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 0: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmrun);
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmmcall);
-                case 2: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmload);
-                case 3: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmsave);
-                case 4: return FNIEMOP_CALL(iemOp_Grp7_Amd_stgi);
-                case 5: return FNIEMOP_CALL(iemOp_Grp7_Amd_clgi);
-                case 6: return FNIEMOP_CALL(iemOp_Grp7_Amd_skinit);
-                case 7: return FNIEMOP_CALL(iemOp_Grp7_Amd_invlpga);
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-
-        case 4:
-            return FNIEMOP_CALL_1(iemOp_Grp7_smsw, bRm);
-
-        case 5:
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        case 6:
-            return FNIEMOP_CALL_1(iemOp_Grp7_lmsw, bRm);
-
-        case 7:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_invlpg, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 0: return FNIEMOP_CALL(iemOp_Grp7_swapgs);
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_rdtscp);
-            }
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-/** Opcode 0x0f 0x00 /3. */
-FNIEMOP_DEF_1(iemOpCommonLarLsl_Gv_Ew, bool, fIsLar)
-{
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_REG, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,  pu16Dst,           0);
-                IEM_MC_ARG(uint16_t,    u16Sel,            1);
-                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
-
-                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u16, pu16Dst, u16Sel, fIsLarArg);
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_32BIT:
-            case IEMMODE_64BIT:
-            {
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,  pu64Dst,           0);
-                IEM_MC_ARG(uint16_t,    u16Sel,            1);
-                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u64, pu64Dst, u16Sel, fIsLarArg);
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint16_t *,  pu16Dst,           0);
-                IEM_MC_ARG(uint16_t,    u16Sel,            1);
-                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
-                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_MEM, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-
-                IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u16, pu16Dst, u16Sel, fIsLarArg);
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_32BIT:
-            case IEMMODE_64BIT:
-            {
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint64_t *,  pu64Dst,           0);
-                IEM_MC_ARG(uint16_t,    u16Sel,            1);
-                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
-                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_MEM, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-/** @todo testcase: make sure it's a 16-bit read. */
-
-                IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u64, pu64Dst, u16Sel, fIsLarArg);
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-
-/** Opcode 0x0f 0x02. */
-FNIEMOP_DEF(iemOp_lar_Gv_Ew)
-{
-    IEMOP_MNEMONIC("lar Gv,Ew");
-    return FNIEMOP_CALL_1(iemOpCommonLarLsl_Gv_Ew, true);
-}
-
-
-/** Opcode 0x0f 0x03. */
-FNIEMOP_DEF(iemOp_lsl_Gv_Ew)
-{
-    IEMOP_MNEMONIC("lsl Gv,Ew");
-    return FNIEMOP_CALL_1(iemOpCommonLarLsl_Gv_Ew, false);
-}
-
-
-/** Opcode 0x0f 0x05. */
-FNIEMOP_DEF(iemOp_syscall)
-{
-    IEMOP_MNEMONIC("syscall"); /** @todo 286 LOADALL   */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_syscall);
-}
-
-
-/** Opcode 0x0f 0x06. */
-FNIEMOP_DEF(iemOp_clts)
-{
-    IEMOP_MNEMONIC("clts");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_clts);
-}
-
-
-/** Opcode 0x0f 0x07. */
-FNIEMOP_DEF(iemOp_sysret)
-{
-    IEMOP_MNEMONIC("sysret");  /** @todo 386 LOADALL   */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_sysret);
-}
-
-
-/** Opcode 0x0f 0x08. */
-FNIEMOP_STUB(iemOp_invd);
-// IEMOP_HLP_MIN_486();
-
-
-/** Opcode 0x0f 0x09. */
-FNIEMOP_DEF(iemOp_wbinvd)
-{
-    IEMOP_MNEMONIC("wbinvd");
-    IEMOP_HLP_MIN_486();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS; /* ignore for now */
-}
-
-
-/** Opcode 0x0f 0x0b. */
-FNIEMOP_DEF(iemOp_ud2)
-{
-    IEMOP_MNEMONIC("ud2");
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-/** Opcode 0x0f 0x0d. */
-FNIEMOP_DEF(iemOp_nop_Ev_GrpP)
-{
-    /* AMD prefetch group, Intel implements this as NOP Ev (and so do we). */
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->f3DNowPrefetch)
-    {
-        IEMOP_MNEMONIC("GrpP");
-        return IEMOP_RAISE_INVALID_OPCODE();
-    }
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_MNEMONIC("GrpP");
-        return IEMOP_RAISE_INVALID_OPCODE();
-    }
-
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 2: /* Aliased to /0 for the time being. */
-        case 4: /* Aliased to /0 for the time being. */
-        case 5: /* Aliased to /0 for the time being. */
-        case 6: /* Aliased to /0 for the time being. */
-        case 7: /* Aliased to /0 for the time being. */
-        case 0: IEMOP_MNEMONIC("prefetch"); break;
-        case 1: IEMOP_MNEMONIC("prefetchw"); break;
-        case 3: IEMOP_MNEMONIC("prefetchw"); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    /* Currently a NOP. */
-    NOREF(GCPtrEffSrc);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x0e. */
-FNIEMOP_STUB(iemOp_femms);
-
-
-/** Opcode 0x0f 0x0f 0x0c. */
-FNIEMOP_STUB(iemOp_3Dnow_pi2fw_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x0d. */
-FNIEMOP_STUB(iemOp_3Dnow_pi2fd_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x1c. */
-FNIEMOP_STUB(iemOp_3Dnow_pf2fw_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x1d. */
-FNIEMOP_STUB(iemOp_3Dnow_pf2fd_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x8a. */
-FNIEMOP_STUB(iemOp_3Dnow_pfnacc_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x8e. */
-FNIEMOP_STUB(iemOp_3Dnow_pfpnacc_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x90. */
-FNIEMOP_STUB(iemOp_3Dnow_pfcmpge_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x94. */
-FNIEMOP_STUB(iemOp_3Dnow_pfmin_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x96. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrcp_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x97. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrsqrt_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x9a. */
-FNIEMOP_STUB(iemOp_3Dnow_pfsub_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x9e. */
-FNIEMOP_STUB(iemOp_3Dnow_pfadd_PQ_Qq);
-
-/** Opcode 0x0f 0x0f 0xa0. */
-FNIEMOP_STUB(iemOp_3Dnow_pfcmpgt_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xa4. */
-FNIEMOP_STUB(iemOp_3Dnow_pfmax_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xa6. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrcpit1_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xa7. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrsqit1_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xaa. */
-FNIEMOP_STUB(iemOp_3Dnow_pfsubr_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xae. */
-FNIEMOP_STUB(iemOp_3Dnow_pfacc_PQ_Qq);
-
-/** Opcode 0x0f 0x0f 0xb0. */
-FNIEMOP_STUB(iemOp_3Dnow_pfcmpeq_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xb4. */
-FNIEMOP_STUB(iemOp_3Dnow_pfmul_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xb6. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrcpit2_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xb7. */
-FNIEMOP_STUB(iemOp_3Dnow_pmulhrw_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xbb. */
-FNIEMOP_STUB(iemOp_3Dnow_pswapd_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xbf. */
-FNIEMOP_STUB(iemOp_3Dnow_pavgusb_PQ_Qq);
-
-
-/** Opcode 0x0f 0x0f. */
-FNIEMOP_DEF(iemOp_3Dnow)
-{
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->f3DNow)
-    {
-        IEMOP_MNEMONIC("3Dnow");
-        return IEMOP_RAISE_INVALID_OPCODE();
-    }
-
-    /* This is pretty sparse, use switch instead of table. */
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    switch (b)
-    {
-        case 0x0c: return FNIEMOP_CALL(iemOp_3Dnow_pi2fw_Pq_Qq);
-        case 0x0d: return FNIEMOP_CALL(iemOp_3Dnow_pi2fd_Pq_Qq);
-        case 0x1c: return FNIEMOP_CALL(iemOp_3Dnow_pf2fw_Pq_Qq);
-        case 0x1d: return FNIEMOP_CALL(iemOp_3Dnow_pf2fd_Pq_Qq);
-        case 0x8a: return FNIEMOP_CALL(iemOp_3Dnow_pfnacc_Pq_Qq);
-        case 0x8e: return FNIEMOP_CALL(iemOp_3Dnow_pfpnacc_Pq_Qq);
-        case 0x90: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpge_Pq_Qq);
-        case 0x94: return FNIEMOP_CALL(iemOp_3Dnow_pfmin_Pq_Qq);
-        case 0x96: return FNIEMOP_CALL(iemOp_3Dnow_pfrcp_Pq_Qq);
-        case 0x97: return FNIEMOP_CALL(iemOp_3Dnow_pfrsqrt_Pq_Qq);
-        case 0x9a: return FNIEMOP_CALL(iemOp_3Dnow_pfsub_Pq_Qq);
-        case 0x9e: return FNIEMOP_CALL(iemOp_3Dnow_pfadd_PQ_Qq);
-        case 0xa0: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpgt_Pq_Qq);
-        case 0xa4: return FNIEMOP_CALL(iemOp_3Dnow_pfmax_Pq_Qq);
-        case 0xa6: return FNIEMOP_CALL(iemOp_3Dnow_pfrcpit1_Pq_Qq);
-        case 0xa7: return FNIEMOP_CALL(iemOp_3Dnow_pfrsqit1_Pq_Qq);
-        case 0xaa: return FNIEMOP_CALL(iemOp_3Dnow_pfsubr_Pq_Qq);
-        case 0xae: return FNIEMOP_CALL(iemOp_3Dnow_pfacc_PQ_Qq);
-        case 0xb0: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpeq_Pq_Qq);
-        case 0xb4: return FNIEMOP_CALL(iemOp_3Dnow_pfmul_Pq_Qq);
-        case 0xb6: return FNIEMOP_CALL(iemOp_3Dnow_pfrcpit2_Pq_Qq);
-        case 0xb7: return FNIEMOP_CALL(iemOp_3Dnow_pmulhrw_Pq_Qq);
-        case 0xbb: return FNIEMOP_CALL(iemOp_3Dnow_pswapd_Pq_Qq);
-        case 0xbf: return FNIEMOP_CALL(iemOp_3Dnow_pavgusb_PQ_Qq);
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x10. */
-FNIEMOP_STUB(iemOp_movups_Vps_Wps__movupd_Vpd_Wpd__movss_Vss_Wss__movsd_Vsd_Wsd);
-
-
-/** Opcode 0x0f 0x11. */
-FNIEMOP_DEF(iemOp_movups_Wps_Vps__movupd_Wpd_Vpd__movss_Wss_Vss__movsd_Vsd_Wsd)
-{
-    /* Quick hack. Need to restructure all of this later some time. */
-    uint32_t const fRelevantPrefix = pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ);
-    if (fRelevantPrefix == 0)
-    {
-        IEMOP_MNEMONIC("movups Wps,Vps");
-        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-        if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        {
-            /*
-             * Register, register.
-             */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-            IEM_MC_BEGIN(0, 0);
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-            IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
-                                  ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            /*
-             * Memory, register.
-             */
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint128_t,                 uSrc); /** @todo optimize this one day... */
-            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ - yes it generally is! */
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-            IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else if (fRelevantPrefix == IEM_OP_PRF_REPNZ)
-    {
-        IEMOP_MNEMONIC("movsd Wsd,Vsd");
-        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-        if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        {
-            /*
-             * Register, register.
-             */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t,                  uSrc);
-
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-            IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_XREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            /*
-             * Memory, register.
-             */
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t,                  uSrc);
-            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-            IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else if (fRelevantPrefix == IEM_OP_PRF_REPZ /*0xf3*/)
-    {
-        IEMOP_MNEMONIC("movss Wss,Vss");
-        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-        if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        {
-            /*
-             * Register, register.
-             */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t,                  uSrc);
-
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-            IEM_MC_FETCH_XREG_U32(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_XREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            /*
-             * Memory, register.
-             */
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint32_t,                  uSrc);
-            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-            IEM_MC_FETCH_XREG_U32(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else
-    {
-        IEMOP_BITCH_ABOUT_STUB();
-        return VERR_IEM_INSTR_NOT_IMPLEMENTED;
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x12. */
-FNIEMOP_STUB(iemOp_movlps_Vq_Mq__movhlps_Vq_Uq__movlpd_Vq_Mq__movsldup_Vq_Wq__movddup_Vq_Wq); //NEXT
-
-
-/** Opcode 0x0f 0x13. */
-FNIEMOP_DEF(iemOp_movlps_Mq_Vq__movlpd_Mq_Vq)
-{
-    /* Quick hack. Need to restructure all of this later some time. */
-    if (pVCpu->iem.s.fPrefixes == IEM_OP_PRF_SIZE_OP)
-    {
-        IEMOP_MNEMONIC("movlpd Mq,Vq");
-        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-        if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        {
-#if 0
-            /*
-             * Register, register.
-             */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t,                  uSrc);
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-            IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_XREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-#else
-            return IEMOP_RAISE_INVALID_OPCODE();
-#endif
-        }
-        else
-        {
-            /*
-             * Memory, register.
-             */
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t,                  uSrc);
-            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ - yes it generally is! */
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-            IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        return VINF_SUCCESS;
-    }
-
-    IEMOP_BITCH_ABOUT_STUB();
-    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
-}
-
-
-/** Opcode 0x0f 0x14. */
-FNIEMOP_STUB(iemOp_unpckhlps_Vps_Wq__unpcklpd_Vpd_Wq);
-/** Opcode 0x0f 0x15. */
-FNIEMOP_STUB(iemOp_unpckhps_Vps_Wq__unpckhpd_Vpd_Wq);
-/** Opcode 0x0f 0x16. */
-FNIEMOP_STUB(iemOp_movhps_Vq_Mq__movlhps_Vq_Uq__movhpd_Vq_Mq__movshdup_Vq_Wq); //NEXT
-/** Opcode 0x0f 0x17. */
-FNIEMOP_STUB(iemOp_movhps_Mq_Vq__movhpd_Mq_Vq); //NEXT
-
-
-/** Opcode 0x0f 0x18. */
-FNIEMOP_DEF(iemOp_prefetch_Grp16)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 4: /* Aliased to /0 for the time being according to AMD. */
-            case 5: /* Aliased to /0 for the time being according to AMD. */
-            case 6: /* Aliased to /0 for the time being according to AMD. */
-            case 7: /* Aliased to /0 for the time being according to AMD. */
-            case 0: IEMOP_MNEMONIC("prefetchNTA m8"); break;
-            case 1: IEMOP_MNEMONIC("prefetchT0  m8"); break;
-            case 2: IEMOP_MNEMONIC("prefetchT1  m8"); break;
-            case 3: IEMOP_MNEMONIC("prefetchT2  m8"); break;
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        /* Currently a NOP. */
-        NOREF(GCPtrEffSrc);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-        return VINF_SUCCESS;
-    }
-
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x19..0x1f. */
-FNIEMOP_DEF(iemOp_nop_Ev)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        /* Currently a NOP. */
-        NOREF(GCPtrEffSrc);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x20. */
-FNIEMOP_DEF(iemOp_mov_Rd_Cd)
-{
-    /* mod is ignored, as is operand size overrides. */
-    IEMOP_MNEMONIC("mov Rd,Cd");
-    IEMOP_HLP_MIN_386();
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
-    else
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    uint8_t iCrReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK)
-    {
-        /* The lock prefix can be used to encode CR8 accesses on some CPUs. */
-        if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCr8In32Bit)
-            return IEMOP_RAISE_INVALID_OPCODE(); /* #UD takes precedence over #GP(), see test. */
-        iCrReg |= 8;
-    }
-    switch (iCrReg)
-    {
-        case 0: case 2: case 3: case 4: case 8:
-            break;
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-    IEMOP_HLP_DONE_DECODING();
-
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Rd_Cd, (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB, iCrReg);
-}
-
-
-/** Opcode 0x0f 0x21. */
-FNIEMOP_DEF(iemOp_mov_Rd_Dd)
-{
-    IEMOP_MNEMONIC("mov Rd,Dd");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REX_R)
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Rd_Dd,
-                                   (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB,
-                                   ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK));
-}
-
-
-/** Opcode 0x0f 0x22. */
-FNIEMOP_DEF(iemOp_mov_Cd_Rd)
-{
-    /* mod is ignored, as is operand size overrides. */
-    IEMOP_MNEMONIC("mov Cd,Rd");
-    IEMOP_HLP_MIN_386();
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
-    else
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    uint8_t iCrReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK)
-    {
-        /* The lock prefix can be used to encode CR8 accesses on some CPUs. */
-        if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCr8In32Bit)
-            return IEMOP_RAISE_INVALID_OPCODE(); /* #UD takes precedence over #GP(), see test. */
-        iCrReg |= 8;
-    }
-    switch (iCrReg)
-    {
-        case 0: case 2: case 3: case 4: case 8:
-            break;
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-    IEMOP_HLP_DONE_DECODING();
-
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Cd_Rd, iCrReg, (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0x23. */
-FNIEMOP_DEF(iemOp_mov_Dd_Rd)
-{
-    IEMOP_MNEMONIC("mov Dd,Rd");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REX_R)
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Dd_Rd,
-                                   ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK),
-                                   (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0x24. */
-FNIEMOP_DEF(iemOp_mov_Rd_Td)
-{
-    IEMOP_MNEMONIC("mov Rd,Td");
-    /** @todo works on 386 and 486. */
-    /* The RM byte is not considered, see testcase. */
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x26. */
-FNIEMOP_DEF(iemOp_mov_Td_Rd)
-{
-    IEMOP_MNEMONIC("mov Td,Rd");
-    /** @todo works on 386 and 486. */
-    /* The RM byte is not considered, see testcase. */
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x28. */
-FNIEMOP_DEF(iemOp_movaps_Vps_Wps__movapd_Vpd_Wpd)
-{
-    IEMOP_MNEMONIC(!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? "movaps r,mr" : "movapd r,mr");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * Register, register.
-         */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-        IEM_MC_BEGIN(0, 0);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
-                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * Register, memory.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint128_t,                 uSrc); /** @todo optimize this one day... */
-        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ */
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-
-        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x29. */
-FNIEMOP_DEF(iemOp_movaps_Wps_Vps__movapd_Wpd_Vpd)
-{
-    IEMOP_MNEMONIC(!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? "movaps mr,r" : "movapd mr,r");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * Register, register.
-         */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-        IEM_MC_BEGIN(0, 0);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
-                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * Memory, register.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint128_t,                 uSrc); /** @todo optimize this one day... */
-        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ */
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x2a. */
-FNIEMOP_STUB(iemOp_cvtpi2ps_Vps_Qpi__cvtpi2pd_Vpd_Qpi__cvtsi2ss_Vss_Ey__cvtsi2sd_Vsd_Ey); //NEXT
-
-
-/** Opcode 0x0f 0x2b. */
-FNIEMOP_DEF(iemOp_movntps_Mps_Vps__movntpd_Mpd_Vpd)
-{
-    IEMOP_MNEMONIC(!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? "movntps mr,r" : "movntpd mr,r");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * memory, register.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint128_t,                 uSrc); /** @todo optimize this one day... */
-        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ */
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-
-        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    /* The register, register encoding is invalid. */
-    else
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x2c. */
-FNIEMOP_STUB(iemOp_cvttps2pi_Ppi_Wps__cvttpd2pi_Ppi_Wpd__cvttss2si_Gy_Wss__cvttsd2si_Yu_Wsd); //NEXT
-/** Opcode 0x0f 0x2d. */
-FNIEMOP_STUB(iemOp_cvtps2pi_Ppi_Wps__cvtpd2pi_QpiWpd__cvtss2si_Gy_Wss__cvtsd2si_Gy_Wsd);
-/** Opcode 0x0f 0x2e. */
-FNIEMOP_STUB(iemOp_ucomiss_Vss_Wss__ucomisd_Vsd_Wsd); //NEXT
-/** Opcode 0x0f 0x2f. */
-FNIEMOP_STUB(iemOp_comiss_Vss_Wss__comisd_Vsd_Wsd);
-
-
-/** Opcode 0x0f 0x30. */
-FNIEMOP_DEF(iemOp_wrmsr)
-{
-    IEMOP_MNEMONIC("wrmsr");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_wrmsr);
-}
-
-
-/** Opcode 0x0f 0x31. */
-FNIEMOP_DEF(iemOp_rdtsc)
-{
-    IEMOP_MNEMONIC("rdtsc");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rdtsc);
-}
-
-
-/** Opcode 0x0f 0x33. */
-FNIEMOP_DEF(iemOp_rdmsr)
-{
-    IEMOP_MNEMONIC("rdmsr");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rdmsr);
-}
-
-
-/** Opcode 0x0f 0x34. */
-FNIEMOP_STUB(iemOp_rdpmc);
-/** Opcode 0x0f 0x34. */
-FNIEMOP_STUB(iemOp_sysenter);
-/** Opcode 0x0f 0x35. */
-FNIEMOP_STUB(iemOp_sysexit);
-/** Opcode 0x0f 0x37. */
-FNIEMOP_STUB(iemOp_getsec);
-/** Opcode 0x0f 0x38. */
-FNIEMOP_UD_STUB(iemOp_3byte_Esc_A4); /* Here there be dragons... */
-/** Opcode 0x0f 0x3a. */
-FNIEMOP_UD_STUB(iemOp_3byte_Esc_A5); /* Here there be dragons... */
-
-
-/**
- * Implements a conditional move.
- *
- * Wish there was an obvious way to do this where we could share and reduce
- * code bloat.
- *
- * @param   a_Cnd       The conditional "microcode" operation.
- */
-#define CMOV_X(a_Cnd) \
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); \
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT)) \
-    { \
-        switch (pVCpu->iem.s.enmEffOpSize) \
-        { \
-            case IEMMODE_16BIT: \
-                IEM_MC_BEGIN(0, 1); \
-                IEM_MC_LOCAL(uint16_t, u16Tmp); \
-                a_Cnd { \
-                    IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
-                    IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            case IEMMODE_32BIT: \
-                IEM_MC_BEGIN(0, 1); \
-                IEM_MC_LOCAL(uint32_t, u32Tmp); \
-                a_Cnd { \
-                    IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
-                    IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp); \
-                } IEM_MC_ELSE() { \
-                    IEM_MC_CLEAR_HIGH_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            case IEMMODE_64BIT: \
-                IEM_MC_BEGIN(0, 1); \
-                IEM_MC_LOCAL(uint64_t, u64Tmp); \
-                a_Cnd { \
-                    IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
-                    IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            IEM_NOT_REACHED_DEFAULT_CASE_RET(); \
-        } \
-    } \
-    else \
-    { \
-        switch (pVCpu->iem.s.enmEffOpSize) \
-        { \
-            case IEMMODE_16BIT: \
-                IEM_MC_BEGIN(0, 2); \
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
-                IEM_MC_LOCAL(uint16_t, u16Tmp); \
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
-                IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
-                a_Cnd { \
-                    IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            case IEMMODE_32BIT: \
-                IEM_MC_BEGIN(0, 2); \
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
-                IEM_MC_LOCAL(uint32_t, u32Tmp); \
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
-                IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
-                a_Cnd { \
-                    IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp); \
-                } IEM_MC_ELSE() { \
-                    IEM_MC_CLEAR_HIGH_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            case IEMMODE_64BIT: \
-                IEM_MC_BEGIN(0, 2); \
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
-                IEM_MC_LOCAL(uint64_t, u64Tmp); \
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
-                IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
-                a_Cnd { \
-                    IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            IEM_NOT_REACHED_DEFAULT_CASE_RET(); \
-        } \
-    } do {} while (0)
-
-
-
-/** Opcode 0x0f 0x40. */
-FNIEMOP_DEF(iemOp_cmovo_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovo Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x41. */
-FNIEMOP_DEF(iemOp_cmovno_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovno Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x42. */
-FNIEMOP_DEF(iemOp_cmovc_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovc Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF));
-}
-
-
-/** Opcode 0x0f 0x43. */
-FNIEMOP_DEF(iemOp_cmovnc_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnc Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_CF));
-}
-
-
-/** Opcode 0x0f 0x44. */
-FNIEMOP_DEF(iemOp_cmove_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmove Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF));
-}
-
-
-/** Opcode 0x0f 0x45. */
-FNIEMOP_DEF(iemOp_cmovne_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovne Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_ZF));
-}
-
-
-/** Opcode 0x0f 0x46. */
-FNIEMOP_DEF(iemOp_cmovbe_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovbe Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF));
-}
-
-
-/** Opcode 0x0f 0x47. */
-FNIEMOP_DEF(iemOp_cmovnbe_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnbe Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_NO_BITS_SET(X86_EFL_CF | X86_EFL_ZF));
-}
-
-
-/** Opcode 0x0f 0x48. */
-FNIEMOP_DEF(iemOp_cmovs_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovs Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF));
-}
-
-
-/** Opcode 0x0f 0x49. */
-FNIEMOP_DEF(iemOp_cmovns_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovns Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_SF));
-}
-
-
-/** Opcode 0x0f 0x4a. */
-FNIEMOP_DEF(iemOp_cmovp_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovp Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF));
-}
-
-
-/** Opcode 0x0f 0x4b. */
-FNIEMOP_DEF(iemOp_cmovnp_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnp Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_PF));
-}
-
-
-/** Opcode 0x0f 0x4c. */
-FNIEMOP_DEF(iemOp_cmovl_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovl Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x4d. */
-FNIEMOP_DEF(iemOp_cmovnl_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnl Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BITS_EQ(X86_EFL_SF, X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x4e. */
-FNIEMOP_DEF(iemOp_cmovle_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovle Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x4f. */
-FNIEMOP_DEF(iemOp_cmovnle_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnle Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF));
-}
-
-#undef CMOV_X
-
-/** Opcode 0x0f 0x50. */
-FNIEMOP_STUB(iemOp_movmskps_Gy_Ups__movmskpd_Gy_Upd);
-/** Opcode 0x0f 0x51. */
-FNIEMOP_STUB(iemOp_sqrtps_Wps_Vps__sqrtpd_Wpd_Vpd__sqrtss_Vss_Wss__sqrtsd_Vsd_Wsd);
-/** Opcode 0x0f 0x52. */
-FNIEMOP_STUB(iemOp_rsqrtps_Wps_Vps__rsqrtss_Vss_Wss);
-/** Opcode 0x0f 0x53. */
-FNIEMOP_STUB(iemOp_rcpps_Wps_Vps__rcpss_Vs_Wss);
-/** Opcode 0x0f 0x54. */
-FNIEMOP_STUB(iemOp_andps_Vps_Wps__andpd_Wpd_Vpd);
-/** Opcode 0x0f 0x55. */
-FNIEMOP_STUB(iemOp_andnps_Vps_Wps__andnpd_Wpd_Vpd);
-/** Opcode 0x0f 0x56. */
-FNIEMOP_STUB(iemOp_orps_Wpd_Vpd__orpd_Wpd_Vpd);
-/** Opcode 0x0f 0x57. */
-FNIEMOP_STUB(iemOp_xorps_Vps_Wps__xorpd_Wpd_Vpd);
-/** Opcode 0x0f 0x58. */
-FNIEMOP_STUB(iemOp_addps_Vps_Wps__addpd_Vpd_Wpd__addss_Vss_Wss__addsd_Vsd_Wsd); //NEXT
-/** Opcode 0x0f 0x59. */
-FNIEMOP_STUB(iemOp_mulps_Vps_Wps__mulpd_Vpd_Wpd__mulss_Vss__Wss__mulsd_Vsd_Wsd);//NEXT
-/** Opcode 0x0f 0x5a. */
-FNIEMOP_STUB(iemOp_cvtps2pd_Vpd_Wps__cvtpd2ps_Vps_Wpd__cvtss2sd_Vsd_Wss__cvtsd2ss_Vss_Wsd);
-/** Opcode 0x0f 0x5b. */
-FNIEMOP_STUB(iemOp_cvtdq2ps_Vps_Wdq__cvtps2dq_Vdq_Wps__cvtps2dq_Vdq_Wps);
-/** Opcode 0x0f 0x5c. */
-FNIEMOP_STUB(iemOp_subps_Vps_Wps__subpd_Vps_Wdp__subss_Vss_Wss__subsd_Vsd_Wsd);
-/** Opcode 0x0f 0x5d. */
-FNIEMOP_STUB(iemOp_minps_Vps_Wps__minpd_Vpd_Wpd__minss_Vss_Wss__minsd_Vsd_Wsd);
-/** Opcode 0x0f 0x5e. */
-FNIEMOP_STUB(iemOp_divps_Vps_Wps__divpd_Vpd_Wpd__divss_Vss_Wss__divsd_Vsd_Wsd);
-/** Opcode 0x0f 0x5f. */
-FNIEMOP_STUB(iemOp_maxps_Vps_Wps__maxpd_Vpd_Wpd__maxss_Vss_Wss__maxsd_Vsd_Wsd);
-
-
-/**
- * Common worker for SSE2 and MMX instructions on the forms:
- *      pxxxx xmm1, xmm2/mem128
- *      pxxxx mm1, mm2/mem32
- *
- * The 2nd operand is the first half of a register, which in the memory case
- * means a 32-bit memory access for MMX and 128-bit aligned 64-bit or 128-bit
- * memory accessed for MMX.
- *
- * Exceptions type 4.
- */
-FNIEMOP_DEF_1(iemOpCommonMmxSse_LowLow_To_Full, PCIEMOPMEDIAF1L1, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint128_t *,          pDst, 0);
-                IEM_MC_ARG(uint64_t const *,     pSrc, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_XREG_U64_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint128_t *,                 pDst,       0);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U64_ALIGN_U128(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            if (!pImpl->pfnU64)
-                return IEMOP_RAISE_INVALID_OPCODE();
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint64_t *,          pDst, 0);
-                IEM_MC_ARG(uint32_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_REF_MREG_U32_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint64_t *,                  pDst,       0);
-                IEM_MC_LOCAL(uint32_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint32_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U32(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x60. */
-FNIEMOP_DEF(iemOp_punpcklbw_Pq_Qd__punpcklbw_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpcklbw");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_LowLow_To_Full, &g_iemAImpl_punpcklbw);
-}
-
-
-/** Opcode 0x0f 0x61. */
-FNIEMOP_DEF(iemOp_punpcklwd_Pq_Qd__punpcklwd_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpcklwd"); /** @todo AMD mark the MMX version as 3DNow!. Intel says MMX CPUID req. */
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_LowLow_To_Full, &g_iemAImpl_punpcklwd);
-}
-
-
-/** Opcode 0x0f 0x62. */
-FNIEMOP_DEF(iemOp_punpckldq_Pq_Qd__punpckldq_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckldq");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_LowLow_To_Full, &g_iemAImpl_punpckldq);
-}
-
-
-/** Opcode 0x0f 0x63. */
-FNIEMOP_STUB(iemOp_packsswb_Pq_Qq__packsswb_Vdq_Wdq);
-/** Opcode 0x0f 0x64. */
-FNIEMOP_STUB(iemOp_pcmpgtb_Pq_Qq__pcmpgtb_Vdq_Wdq);
-/** Opcode 0x0f 0x65. */
-FNIEMOP_STUB(iemOp_pcmpgtw_Pq_Qq__pcmpgtw_Vdq_Wdq);
-/** Opcode 0x0f 0x66. */
-FNIEMOP_STUB(iemOp_pcmpgtd_Pq_Qq__pcmpgtd_Vdq_Wdq);
-/** Opcode 0x0f 0x67. */
-FNIEMOP_STUB(iemOp_packuswb_Pq_Qq__packuswb_Vdq_Wdq);
-
-
-/**
- * Common worker for SSE2 and MMX instructions on the forms:
- *      pxxxx xmm1, xmm2/mem128
- *      pxxxx mm1, mm2/mem64
- *
- * The 2nd operand is the second half of a register, which in the memory case
- * means a 64-bit memory access for MMX, and for MMX a 128-bit aligned access
- * where it may read the full 128 bits or only the upper 64 bits.
- *
- * Exceptions type 4.
- */
-FNIEMOP_DEF_1(iemOpCommonMmxSse_HighHigh_To_Full, PCIEMOPMEDIAF1H1, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint128_t *,          pDst, 0);
-                IEM_MC_ARG(uint128_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint128_t *,                 pDst,       0);
-                IEM_MC_LOCAL(uint128_t,                 uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint128_t const *, pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); /* Most CPUs probably only right high qword */
-
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            if (!pImpl->pfnU64)
-                return IEMOP_RAISE_INVALID_OPCODE();
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint64_t *,          pDst, 0);
-                IEM_MC_ARG(uint64_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint64_t *,                  pDst,       0);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x68. */
-FNIEMOP_DEF(iemOp_punpckhbw_Pq_Qq__punpckhbw_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckhbw");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_HighHigh_To_Full, &g_iemAImpl_punpckhbw);
-}
-
-
-/** Opcode 0x0f 0x69. */
-FNIEMOP_DEF(iemOp_punpckhwd_Pq_Qd__punpckhwd_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckhwd");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_HighHigh_To_Full, &g_iemAImpl_punpckhwd);
-}
-
-
-/** Opcode 0x0f 0x6a. */
-FNIEMOP_DEF(iemOp_punpckhdq_Pq_Qd__punpckhdq_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckhdq");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_HighHigh_To_Full, &g_iemAImpl_punpckhdq);
-}
-
-/** Opcode 0x0f 0x6b. */
-FNIEMOP_STUB(iemOp_packssdw_Pq_Qd__packssdq_Vdq_Wdq);
-
-
-/** Opcode 0x0f 0x6c. */
-FNIEMOP_DEF(iemOp_punpcklqdq_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpcklqdq");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_LowLow_To_Full, &g_iemAImpl_punpcklqdq);
-}
-
-
-/** Opcode 0x0f 0x6d. */
-FNIEMOP_DEF(iemOp_punpckhqdq_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckhqdq");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_HighHigh_To_Full, &g_iemAImpl_punpckhqdq);
-}
-
-
-/** Opcode 0x0f 0x6e. */
-FNIEMOP_DEF(iemOp_movd_q_Pd_Ey__movd_q_Vy_Ey)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            IEMOP_MNEMONIC("movd/q Wd/q,Ed/q");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* XMM, greg*/
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                    IEM_MC_STORE_XREG_U64_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                    IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* XMM, [mem] */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT(); /** @todo order */
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                    IEM_MC_STORE_XREG_U64_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                    IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("movq/d Pd/q,Ed/q");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* MMX, greg */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                    IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                else
-                    IEM_MC_FETCH_GREG_U32_ZX_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* MMX, [mem] */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                    IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                    IEM_MC_STORE_MREG_U32_ZX_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x6f. */
-FNIEMOP_DEF(iemOp_movq_Pq_Qq__movdqa_Vdq_Wdq__movdqu_Vdq_Wdq)
-{
-    bool fAligned = false;
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE aligned */
-            fAligned = true;
-        case IEM_OP_PRF_REPZ: /* SSE unaligned */
-            if (fAligned)
-                IEMOP_MNEMONIC("movdqa Vdq,Wdq");
-            else
-                IEMOP_MNEMONIC("movdqu Vdq,Wdq");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 0);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
-                                      (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint128_t,  u128Tmp);
-                IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                if (fAligned)
-                    IEM_MC_FETCH_MEM_U128_ALIGN_SSE(u128Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                else
-                    IEM_MC_FETCH_MEM_U128(u128Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u128Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("movq Pq,Qq");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                IEM_MC_FETCH_MREG_U64(u64Tmp, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x70. The immediate here is evil! */
-FNIEMOP_DEF(iemOp_pshufw_Pq_Qq_Ib__pshufd_Vdq_Wdq_Ib__pshufhw_Vdq_Wdq_Ib__pshuflq_Vdq_Wdq_Ib)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-        case IEM_OP_PRF_REPNZ:   /* SSE */
-        case IEM_OP_PRF_REPZ:    /* SSE */
-        {
-            PFNIEMAIMPLMEDIAPSHUF pfnAImpl;
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-            {
-                case IEM_OP_PRF_SIZE_OP:
-                    IEMOP_MNEMONIC("pshufd Vdq,Wdq,Ib");
-                    pfnAImpl = iemAImpl_pshufd;
-                    break;
-                case IEM_OP_PRF_REPNZ:
-                    IEMOP_MNEMONIC("pshuflw Vdq,Wdq,Ib");
-                    pfnAImpl = iemAImpl_pshuflw;
-                    break;
-                case IEM_OP_PRF_REPZ:
-                    IEMOP_MNEMONIC("pshufhw Vdq,Wdq,Ib");
-                    pfnAImpl = iemAImpl_pshufhw;
-                    break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint128_t *,         pDst, 0);
-                IEM_MC_ARG(uint128_t const *,   pSrc, 1);
-                IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_SSE_AIMPL_3(pfnAImpl, pDst, pSrc, bEvilArg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint128_t *,                 pDst,       0);
-                IEM_MC_LOCAL(uint128_t,                 uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint128_t const *, pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
-                IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-
-                IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_SSE_AIMPL_3(pfnAImpl, pDst, pSrc, bEvilArg);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-        }
-
-        case 0: /* MMX Extension */
-            IEMOP_MNEMONIC("pshufw Pq,Qq,Ib");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,          pDst, 0);
-                IEM_MC_ARG(uint64_t const *,    pSrc, 1);
-                IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_CALL_MMX_AIMPL_3(iemAImpl_pshufw, pDst, pSrc, bEvilArg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,                  pDst,       0);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
-                IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
-
-                IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_CALL_MMX_AIMPL_3(iemAImpl_pshufw, pDst, pSrc, bEvilArg);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x71 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp12_psrlw_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x71 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp12_psrlw_Udq_Ib, uint8_t, bRm);
-
-/** Opcode 0x0f 0x71 11/4. */
-FNIEMOP_STUB_1(iemOp_Grp12_psraw_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x71 11/4. */
-FNIEMOP_STUB_1(iemOp_Grp12_psraw_Udq_Ib, uint8_t, bRm);
-
-/** Opcode 0x0f 0x71 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp12_psllw_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x71 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp12_psllw_Udq_Ib, uint8_t, bRm);
-
-
-/** Opcode 0x0f 0x71. */
-FNIEMOP_DEF(iemOp_Grp12)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 1: case 3: case 5: case 7:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 2:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp12_psrlw_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp12_psrlw_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 4:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp12_psraw_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp12_psraw_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 6:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp12_psllw_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp12_psllw_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x0f 0x72 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp13_psrld_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x72 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp13_psrld_Udq_Ib, uint8_t, bRm);
-
-/** Opcode 0x0f 0x72 11/4. */
-FNIEMOP_STUB_1(iemOp_Grp13_psrad_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x72 11/4. */
-FNIEMOP_STUB_1(iemOp_Grp13_psrad_Udq_Ib, uint8_t, bRm);
-
-/** Opcode 0x0f 0x72 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp13_pslld_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x72 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp13_pslld_Udq_Ib, uint8_t, bRm);
-
-
-/** Opcode 0x0f 0x72. */
-FNIEMOP_DEF(iemOp_Grp13)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 1: case 3: case 5: case 7:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 2:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp13_psrld_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp13_psrld_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 4:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp13_psrad_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp13_psrad_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 6:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp13_pslld_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp13_pslld_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x0f 0x73 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp14_psrlq_Nq_Ib,   uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x73 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp14_psrlq_Udq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x73 11/3. */
-FNIEMOP_STUB_1(iemOp_Grp14_psrldq_Udq_Ib, uint8_t, bRm); //NEXT
-
-/** Opcode 0x0f 0x73 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp14_psllq_Nq_Ib,   uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x73 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp14_psllq_Udq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x73 11/7. */
-FNIEMOP_STUB_1(iemOp_Grp14_pslldq_Udq_Ib, uint8_t, bRm); //NEXT
-
-
-/** Opcode 0x0f 0x73. */
-FNIEMOP_DEF(iemOp_Grp14)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 1: case 4: case 5:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 2:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp14_psrlq_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp14_psrlq_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 3:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp14_psrldq_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 6:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp14_psllq_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp14_psllq_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 7:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp14_pslldq_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/**
- * Common worker for SSE2 and MMX instructions on the forms:
- *      pxxx    mm1, mm2/mem64
- *      pxxx    xmm1, xmm2/mem128
- *
- * Proper alignment of the 128-bit operand is enforced.
- * Exceptions type 4. SSE2 and MMX cpuid checks.
- */
-FNIEMOP_DEF_1(iemOpCommonMmxSse2_FullFull_To_Full, PCIEMOPMEDIAF2, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint128_t *,          pDst, 0);
-                IEM_MC_ARG(uint128_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint128_t *,                 pDst,       0);
-                IEM_MC_LOCAL(uint128_t,                 uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint128_t const *, pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint64_t *,          pDst, 0);
-                IEM_MC_ARG(uint64_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint64_t *,                  pDst,       0);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x74. */
-FNIEMOP_DEF(iemOp_pcmpeqb_Pq_Qq__pcmpeqb_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("pcmpeqb");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqb);
-}
-
-
-/** Opcode 0x0f 0x75. */
-FNIEMOP_DEF(iemOp_pcmpeqw_Pq_Qq__pcmpeqw_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("pcmpeqw");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqw);
-}
-
-
-/** Opcode 0x0f 0x76. */
-FNIEMOP_DEF(iemOp_pcmped_Pq_Qq__pcmpeqd_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("pcmpeqd");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqd);
-}
-
-
-/** Opcode 0x0f 0x77. */
-FNIEMOP_STUB(iemOp_emms);
-/** Opcode 0x0f 0x78. */
-FNIEMOP_UD_STUB(iemOp_vmread_AmdGrp17);
-/** Opcode 0x0f 0x79. */
-FNIEMOP_UD_STUB(iemOp_vmwrite);
-/** Opcode 0x0f 0x7c. */
-FNIEMOP_STUB(iemOp_haddpd_Vdp_Wpd__haddps_Vps_Wps);
-/** Opcode 0x0f 0x7d. */
-FNIEMOP_STUB(iemOp_hsubpd_Vpd_Wpd__hsubps_Vps_Wps);
-
-
-/** Opcode 0x0f 0x7e. */
-FNIEMOP_DEF(iemOp_movd_q_Ey_Pd__movd_q_Ey_Vy__movq_Vq_Wq)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            IEMOP_MNEMONIC("movd/q Ed/q,Wd/q");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* greg, XMM */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_XREG_U64(u64Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                    IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_XREG_U32(u32Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                    IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* [mem], XMM */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_XREG_U64(u64Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                    IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_XREG_U32(u32Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                    IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("movq/d Ed/q,Pd/q");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* greg, MMX */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                    IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_MREG_U32(u32Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                    IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* [mem], MMX */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                    IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_MREG_U32(u32Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                    IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x7f. */
-FNIEMOP_DEF(iemOp_movq_Qq_Pq__movq_movdqa_Wdq_Vdq__movdqu_Wdq_Vdq)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    bool fAligned = false;
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE aligned */
-            fAligned = true;
-        case IEM_OP_PRF_REPZ: /* SSE unaligned */
-            if (fAligned)
-                IEMOP_MNEMONIC("movdqa Wdq,Vdq");
-            else
-                IEMOP_MNEMONIC("movdqu Wdq,Vdq");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 0);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
-                                      ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint128_t,  u128Tmp);
-                IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-                IEM_MC_FETCH_XREG_U128(u128Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (fAligned)
-                    IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u128Tmp);
-                else
-                    IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u128Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("movq Qq,Pq");
-
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_STORE_MREG_U64(bRm & X86_MODRM_RM_MASK, u64Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-
-                IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-
-/** Opcode 0x0f 0x80. */
-FNIEMOP_DEF(iemOp_jo_Jv)
-{
-    IEMOP_MNEMONIC("jo  Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x81. */
-FNIEMOP_DEF(iemOp_jno_Jv)
-{
-    IEMOP_MNEMONIC("jno Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x82. */
-FNIEMOP_DEF(iemOp_jc_Jv)
-{
-    IEMOP_MNEMONIC("jc/jb/jnae Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x83. */
-FNIEMOP_DEF(iemOp_jnc_Jv)
-{
-    IEMOP_MNEMONIC("jnc/jnb/jae Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x84. */
-FNIEMOP_DEF(iemOp_je_Jv)
-{
-    IEMOP_MNEMONIC("je/jz Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x85. */
-FNIEMOP_DEF(iemOp_jne_Jv)
-{
-    IEMOP_MNEMONIC("jne/jnz Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x86. */
-FNIEMOP_DEF(iemOp_jbe_Jv)
-{
-    IEMOP_MNEMONIC("jbe/jna Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x87. */
-FNIEMOP_DEF(iemOp_jnbe_Jv)
-{
-    IEMOP_MNEMONIC("jnbe/ja Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x88. */
-FNIEMOP_DEF(iemOp_js_Jv)
-{
-    IEMOP_MNEMONIC("js  Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x89. */
-FNIEMOP_DEF(iemOp_jns_Jv)
-{
-    IEMOP_MNEMONIC("jns Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8a. */
-FNIEMOP_DEF(iemOp_jp_Jv)
-{
-    IEMOP_MNEMONIC("jp  Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8b. */
-FNIEMOP_DEF(iemOp_jnp_Jv)
-{
-    IEMOP_MNEMONIC("jo  Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8c. */
-FNIEMOP_DEF(iemOp_jl_Jv)
-{
-    IEMOP_MNEMONIC("jl/jnge Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8d. */
-FNIEMOP_DEF(iemOp_jnl_Jv)
-{
-    IEMOP_MNEMONIC("jnl/jge Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8e. */
-FNIEMOP_DEF(iemOp_jle_Jv)
-{
-    IEMOP_MNEMONIC("jle/jng Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8f. */
-FNIEMOP_DEF(iemOp_jnle_Jv)
-{
-    IEMOP_MNEMONIC("jnle/jg Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x90. */
-FNIEMOP_DEF(iemOp_seto_Eb)
-{
-    IEMOP_MNEMONIC("seto Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x91. */
-FNIEMOP_DEF(iemOp_setno_Eb)
-{
-    IEMOP_MNEMONIC("setno Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x92. */
-FNIEMOP_DEF(iemOp_setc_Eb)
-{
-    IEMOP_MNEMONIC("setc Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x93. */
-FNIEMOP_DEF(iemOp_setnc_Eb)
-{
-    IEMOP_MNEMONIC("setnc Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x94. */
-FNIEMOP_DEF(iemOp_sete_Eb)
-{
-    IEMOP_MNEMONIC("sete Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x95. */
-FNIEMOP_DEF(iemOp_setne_Eb)
-{
-    IEMOP_MNEMONIC("setne Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x96. */
-FNIEMOP_DEF(iemOp_setbe_Eb)
-{
-    IEMOP_MNEMONIC("setbe Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x97. */
-FNIEMOP_DEF(iemOp_setnbe_Eb)
-{
-    IEMOP_MNEMONIC("setnbe Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x98. */
-FNIEMOP_DEF(iemOp_sets_Eb)
-{
-    IEMOP_MNEMONIC("sets Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x99. */
-FNIEMOP_DEF(iemOp_setns_Eb)
-{
-    IEMOP_MNEMONIC("setns Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9a. */
-FNIEMOP_DEF(iemOp_setp_Eb)
-{
-    IEMOP_MNEMONIC("setnp Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9b. */
-FNIEMOP_DEF(iemOp_setnp_Eb)
-{
-    IEMOP_MNEMONIC("setnp Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9c. */
-FNIEMOP_DEF(iemOp_setl_Eb)
-{
-    IEMOP_MNEMONIC("setl Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9d. */
-FNIEMOP_DEF(iemOp_setnl_Eb)
-{
-    IEMOP_MNEMONIC("setnl Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9e. */
-FNIEMOP_DEF(iemOp_setle_Eb)
-{
-    IEMOP_MNEMONIC("setle Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9f. */
-FNIEMOP_DEF(iemOp_setnle_Eb)
-{
-    IEMOP_MNEMONIC("setnle Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common 'push segment-register' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonPushSReg, uint8_t, iReg)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (iReg < X86_SREG_FS)
-        IEMOP_HLP_NO_64BIT();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint16_t, u16Value);
-            IEM_MC_FETCH_SREG_U16(u16Value, iReg);
-            IEM_MC_PUSH_U16(u16Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_FETCH_SREG_ZX_U32(u32Value, iReg);
-            IEM_MC_PUSH_U32_SREG(u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_FETCH_SREG_ZX_U64(u64Value, iReg);
-            IEM_MC_PUSH_U64(u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-    }
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xa0. */
-FNIEMOP_DEF(iemOp_push_fs)
-{
-    IEMOP_MNEMONIC("push fs");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_FS);
-}
-
-
-/** Opcode 0x0f 0xa1. */
-FNIEMOP_DEF(iemOp_pop_fs)
-{
-    IEMOP_MNEMONIC("pop fs");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_FS, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x0f 0xa2. */
-FNIEMOP_DEF(iemOp_cpuid)
-{
-    IEMOP_MNEMONIC("cpuid");
-    IEMOP_HLP_MIN_486(); /* not all 486es. */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_cpuid);
-}
-
-
-/**
- * Common worker for iemOp_bt_Ev_Gv, iemOp_btc_Ev_Gv, iemOp_btr_Ev_Gv and
- * iemOp_bts_Ev_Gv.
- */
-FNIEMOP_DEF_1(iemOpCommonBit_Ev_Gv, PCIEMOPBINSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register destination. */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,        u16Src,                 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_AND_LOCAL_U16(u16Src, 0xf);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,        u32Src,                 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_AND_LOCAL_U32(u32Src, 0x1f);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,        u64Src,                 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_AND_LOCAL_U64(u64Src, 0x3f);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory destination. */
-
-        uint32_t fAccess;
-        if (pImpl->pfnLockedU16)
-            fAccess = IEM_ACCESS_DATA_RW;
-        else /* BT */
-            fAccess = IEM_ACCESS_DATA_R;
-
-        NOREF(fAccess);
-
-        /** @todo test negative bit offsets! */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,                u16Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-                IEM_MC_LOCAL(int16_t,               i16AddrAdj);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_ASSIGN(i16AddrAdj, u16Src);
-                IEM_MC_AND_ARG_U16(u16Src, 0x0f);
-                IEM_MC_SAR_LOCAL_S16(i16AddrAdj, 4);
-                IEM_MC_SHL_LOCAL_S16(i16AddrAdj, 1);
-                IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(GCPtrEffDst, i16AddrAdj);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,                u32Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-                IEM_MC_LOCAL(int32_t,               i32AddrAdj);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_ASSIGN(i32AddrAdj, u32Src);
-                IEM_MC_AND_ARG_U32(u32Src, 0x1f);
-                IEM_MC_SAR_LOCAL_S32(i32AddrAdj, 5);
-                IEM_MC_SHL_LOCAL_S32(i32AddrAdj, 2);
-                IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(GCPtrEffDst, i32AddrAdj);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,                u64Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-                IEM_MC_LOCAL(int64_t,               i64AddrAdj);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_ASSIGN(i64AddrAdj, u64Src);
-                IEM_MC_AND_ARG_U64(u64Src, 0x3f);
-                IEM_MC_SAR_LOCAL_S64(i64AddrAdj, 6);
-                IEM_MC_SHL_LOCAL_S64(i64AddrAdj, 3);
-                IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(GCPtrEffDst, i64AddrAdj);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x0f 0xa3. */
-FNIEMOP_DEF(iemOp_bt_Ev_Gv)
-{
-    IEMOP_MNEMONIC("bt  Gv,Gv");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_bt);
-}
-
-
-/**
- * Common worker for iemOp_shrd_Ev_Gv_Ib and iemOp_shld_Ev_Gv_Ib.
- */
-FNIEMOP_DEF_1(iemOpCommonShldShrd_Ib, PCIEMOPSHIFTDBLSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF | X86_EFL_OF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,        u16Src,                 1);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,        u32Src,                 1);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,        u64Src,                 1);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,                u16Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,                u32Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,                u64Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/**
- * Common worker for iemOp_shrd_Ev_Gv_CL and iemOp_shld_Ev_Gv_CL.
- */
-FNIEMOP_DEF_1(iemOpCommonShldShrd_CL, PCIEMOPSHIFTDBLSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF | X86_EFL_OF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,        u16Src,                 1);
-                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,        u32Src,                 1);
-                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,        u64Src,                 1);
-                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,                u16Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,                u32Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,                u64Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-
-/** Opcode 0x0f 0xa4. */
-FNIEMOP_DEF(iemOp_shld_Ev_Gv_Ib)
-{
-    IEMOP_MNEMONIC("shld Ev,Gv,Ib");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, &g_iemAImpl_shld);
-}
-
-
-/** Opcode 0x0f 0xa5. */
-FNIEMOP_DEF(iemOp_shld_Ev_Gv_CL)
-{
-    IEMOP_MNEMONIC("shld Ev,Gv,CL");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, &g_iemAImpl_shld);
-}
-
-
-/** Opcode 0x0f 0xa8. */
-FNIEMOP_DEF(iemOp_push_gs)
-{
-    IEMOP_MNEMONIC("push gs");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_GS);
-}
-
-
-/** Opcode 0x0f 0xa9. */
-FNIEMOP_DEF(iemOp_pop_gs)
-{
-    IEMOP_MNEMONIC("pop gs");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_GS, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x0f 0xaa. */
-FNIEMOP_STUB(iemOp_rsm);
-//IEMOP_HLP_MIN_386();
-
-
-/** Opcode 0x0f 0xab. */
-FNIEMOP_DEF(iemOp_bts_Ev_Gv)
-{
-    IEMOP_MNEMONIC("bts Ev,Gv");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_bts);
-}
-
-
-/** Opcode 0x0f 0xac. */
-FNIEMOP_DEF(iemOp_shrd_Ev_Gv_Ib)
-{
-    IEMOP_MNEMONIC("shrd Ev,Gv,Ib");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, &g_iemAImpl_shrd);
-}
-
-
-/** Opcode 0x0f 0xad. */
-FNIEMOP_DEF(iemOp_shrd_Ev_Gv_CL)
-{
-    IEMOP_MNEMONIC("shrd Ev,Gv,CL");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, &g_iemAImpl_shrd);
-}
-
-
-/** Opcode 0x0f 0xae mem/0. */
-FNIEMOP_DEF_1(iemOp_Grp15_fxsave,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fxsave m512");
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fFxSaveRstor)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
-    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_fxsave, iEffSeg, GCPtrEff, enmEffOpSize);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xae mem/1. */
-FNIEMOP_DEF_1(iemOp_Grp15_fxrstor,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fxrstor m512");
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fFxSaveRstor)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
-    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_fxrstor, iEffSeg, GCPtrEff, enmEffOpSize);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xae mem/2. */
-FNIEMOP_STUB_1(iemOp_Grp15_ldmxcsr,  uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/3. */
-FNIEMOP_STUB_1(iemOp_Grp15_stmxcsr,  uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/4. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_xsave,    uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/5. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_xrstor,   uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_xsaveopt, uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/7. */
-FNIEMOP_STUB_1(iemOp_Grp15_clflush,  uint8_t, bRm);
-
-
-/** Opcode 0x0f 0xae 11b/5. */
-FNIEMOP_DEF_1(iemOp_Grp15_lfence,   uint8_t, bRm)
-{
-    RT_NOREF_PV(bRm);
-    IEMOP_MNEMONIC("lfence");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(0, 0);
-    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_lfence);
-    else
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xae 11b/6. */
-FNIEMOP_DEF_1(iemOp_Grp15_mfence,   uint8_t, bRm)
-{
-    RT_NOREF_PV(bRm);
-    IEMOP_MNEMONIC("mfence");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(0, 0);
-    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_mfence);
-    else
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xae 11b/7. */
-FNIEMOP_DEF_1(iemOp_Grp15_sfence,   uint8_t, bRm)
-{
-    RT_NOREF_PV(bRm);
-    IEMOP_MNEMONIC("sfence");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(0, 0);
-    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_sfence);
-    else
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xf3 0x0f 0xae 11b/0. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_rdfsbase, uint8_t, bRm);
-
-/** Opcode 0xf3 0x0f 0xae 11b/1. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_rdgsbase, uint8_t, bRm);
-
-/** Opcode 0xf3 0x0f 0xae 11b/2. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_wrfsbase, uint8_t, bRm);
-
-/** Opcode 0xf3 0x0f 0xae 11b/3. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_wrgsbase, uint8_t, bRm);
-
-
-/** Opcode 0x0f 0xae. */
-FNIEMOP_DEF(iemOp_Grp15)
-{
-    IEMOP_HLP_MIN_586(); /* Not entirely accurate nor needed, but useful for debugging 286 code. */
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_Grp15_fxsave,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_Grp15_fxrstor, bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_Grp15_ldmxcsr, bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_Grp15_stmxcsr, bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_Grp15_xsave,   bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_Grp15_xrstor,  bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_Grp15_xsaveopt,bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_Grp15_clflush, bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ | IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_LOCK))
-        {
-            case 0:
-                switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-                {
-                    case 0: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 1: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 2: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 3: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 4: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 5: return FNIEMOP_CALL_1(iemOp_Grp15_lfence, bRm);
-                    case 6: return FNIEMOP_CALL_1(iemOp_Grp15_mfence, bRm);
-                    case 7: return FNIEMOP_CALL_1(iemOp_Grp15_sfence, bRm);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-
-            case IEM_OP_PRF_REPZ:
-                switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-                {
-                    case 0: return FNIEMOP_CALL_1(iemOp_Grp15_rdfsbase, bRm);
-                    case 1: return FNIEMOP_CALL_1(iemOp_Grp15_rdgsbase, bRm);
-                    case 2: return FNIEMOP_CALL_1(iemOp_Grp15_wrfsbase, bRm);
-                    case 3: return FNIEMOP_CALL_1(iemOp_Grp15_wrgsbase, bRm);
-                    case 4: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 5: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 6: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 7: return IEMOP_RAISE_INVALID_OPCODE();
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-
-            default:
-                return IEMOP_RAISE_INVALID_OPCODE();
-        }
-    }
-}
-
-
-/** Opcode 0x0f 0xaf. */
-FNIEMOP_DEF(iemOp_imul_Gv_Ev)
-{
-    IEMOP_MNEMONIC("imul Gv,Ev");
-    IEMOP_HLP_MIN_386();
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_imul_two);
-}
-
-
-/** Opcode 0x0f 0xb0. */
-FNIEMOP_DEF(iemOp_cmpxchg_Eb_Gb)
-{
-    IEMOP_MNEMONIC("cmpxchg Eb,Gb");
-    IEMOP_HLP_MIN_486();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING();
-        IEM_MC_BEGIN(4, 0);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
-        IEM_MC_ARG(uint8_t *,       pu8Al,                  1);
-        IEM_MC_ARG(uint8_t,         u8Src,                  2);
-        IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_GREG_U8(pu8Al, X86_GREG_xAX);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8, pu8Dst, pu8Al, u8Src, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8_locked, pu8Dst, pu8Al, u8Src, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(4, 3);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
-        IEM_MC_ARG(uint8_t *,       pu8Al,                  1);
-        IEM_MC_ARG(uint8_t,         u8Src,                  2);
-        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
-        IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
-        IEM_MC_LOCAL(uint8_t,       u8Al);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING();
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_FETCH_GREG_U8(u8Al, X86_GREG_xAX);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        IEM_MC_REF_LOCAL(pu8Al, u8Al);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8, pu8Dst, pu8Al, u8Src, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8_locked, pu8Dst, pu8Al, u8Src, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_STORE_GREG_U8(X86_GREG_xAX, u8Al);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-/** Opcode 0x0f 0xb1. */
-FNIEMOP_DEF(iemOp_cmpxchg_Ev_Gv)
-{
-    IEMOP_MNEMONIC("cmpxchg Ev,Gv");
-    IEMOP_HLP_MIN_486();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t *,      pu16Ax,                 1);
-                IEM_MC_ARG(uint16_t,        u16Src,                 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U16(pu16Ax, X86_GREG_xAX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16, pu16Dst, pu16Ax, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16_locked, pu16Dst, pu16Ax, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t *,      pu32Eax,                1);
-                IEM_MC_ARG(uint32_t,        u32Src,                 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U32(pu32Eax, X86_GREG_xAX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32, pu32Dst, pu32Eax, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32_locked, pu32Dst, pu32Eax, u32Src, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Eax);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t *,      pu64Rax,                1);
-#ifdef RT_ARCH_X86
-                IEM_MC_ARG(uint64_t *,      pu64Src,                2);
-#else
-                IEM_MC_ARG(uint64_t,        u64Src,                 2);
-#endif
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U64(pu64Rax, X86_GREG_xAX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-#ifdef RT_ARCH_X86
-                IEM_MC_REF_GREG_U64(pu64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, pu64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, pu64Src, pEFlags);
-#else
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, u64Src, pEFlags);
-#endif
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 3);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t *,      pu16Ax,                 1);
-                IEM_MC_ARG(uint16_t,        u16Src,                 2);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
-                IEM_MC_LOCAL(uint16_t,      u16Ax);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING();
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U16(u16Ax, X86_GREG_xAX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_REF_LOCAL(pu16Ax, u16Ax);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16, pu16Dst, pu16Ax, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16_locked, pu16Dst, pu16Ax, u16Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U16(X86_GREG_xAX, u16Ax);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 3);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t *,      pu32Eax,                 1);
-                IEM_MC_ARG(uint32_t,        u32Src,                 2);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
-                IEM_MC_LOCAL(uint32_t,      u32Eax);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING();
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U32(u32Eax, X86_GREG_xAX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_REF_LOCAL(pu32Eax, u32Eax);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32, pu32Dst, pu32Eax, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32_locked, pu32Dst, pu32Eax, u32Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u32Eax);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 3);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t *,      pu64Rax,                1);
-#ifdef RT_ARCH_X86
-                IEM_MC_ARG(uint64_t *,      pu64Src,                2);
-#else
-                IEM_MC_ARG(uint64_t,        u64Src,                 2);
-#endif
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
-                IEM_MC_LOCAL(uint64_t,      u64Rax);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING();
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_FETCH_GREG_U64(u64Rax, X86_GREG_xAX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_REF_LOCAL(pu64Rax, u64Rax);
-#ifdef RT_ARCH_X86
-                IEM_MC_REF_GREG_U64(pu64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, pu64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, pu64Src, pEFlags);
-#else
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, u64Src, pEFlags);
-#endif
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U64(X86_GREG_xAX, u64Rax);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-FNIEMOP_DEF_2(iemOpCommonLoadSRegAndGreg, uint8_t, iSegReg, uint8_t, bRm)
-{
-    Assert((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT)); /* Caller checks this */
-    uint8_t const iGReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(5, 1);
-            IEM_MC_ARG(uint16_t,        uSel,                                    0);
-            IEM_MC_ARG(uint16_t,        offSeg,                                  1);
-            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
-            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
-            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
-            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
-            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 2);
-            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(5, 1);
-            IEM_MC_ARG(uint16_t,        uSel,                                    0);
-            IEM_MC_ARG(uint32_t,        offSeg,                                  1);
-            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
-            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
-            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
-            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U32(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
-            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 4);
-            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(5, 1);
-            IEM_MC_ARG(uint16_t,        uSel,                                    0);
-            IEM_MC_ARG(uint64_t,        offSeg,                                  1);
-            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
-            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
-            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
-            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            if (IEM_IS_GUEST_CPU_AMD(pVCpu)) /** @todo testcase: rev 3.15 of the amd manuals claims it only loads a 32-bit greg. */
-                IEM_MC_FETCH_MEM_U32_SX_U64(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
-            else
-                IEM_MC_FETCH_MEM_U64(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
-            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 8);
-            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x0f 0xb2. */
-FNIEMOP_DEF(iemOp_lss_Gv_Mp)
-{
-    IEMOP_MNEMONIC("lss Gv,Mp");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_SS, bRm);
-}
-
-
-/** Opcode 0x0f 0xb3. */
-FNIEMOP_DEF(iemOp_btr_Ev_Gv)
-{
-    IEMOP_MNEMONIC("btr Ev,Gv");
-    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_btr);
-}
-
-
-/** Opcode 0x0f 0xb4. */
-FNIEMOP_DEF(iemOp_lfs_Gv_Mp)
-{
-    IEMOP_MNEMONIC("lfs Gv,Mp");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_FS, bRm);
-}
-
-
-/** Opcode 0x0f 0xb5. */
-FNIEMOP_DEF(iemOp_lgs_Gv_Mp)
-{
-    IEMOP_MNEMONIC("lgs Gv,Mp");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_GS, bRm);
-}
-
-
-/** Opcode 0x0f 0xb6. */
-FNIEMOP_DEF(iemOp_movzx_Gv_Eb)
-{
-    IEMOP_MNEMONIC("movzx Gv,Eb");
-    IEMOP_HLP_MIN_386();
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_FETCH_GREG_U8_ZX_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_FETCH_GREG_U8_ZX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_FETCH_GREG_U8_ZX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_ZX_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_ZX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_ZX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x0f 0xb7. */
-FNIEMOP_DEF(iemOp_movzx_Gv_Ew)
-{
-    IEMOP_MNEMONIC("movzx Gv,Ew");
-    IEMOP_HLP_MIN_386();
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Not entirely sure how the operand size prefix is handled here,
-     *        assuming that it will be ignored. Would be nice to have a few
-     *        test for this. */
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        {
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_FETCH_GREG_U16_ZX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_FETCH_GREG_U16_ZX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        {
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16_ZX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16_ZX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xb8. */
-FNIEMOP_STUB(iemOp_popcnt_Gv_Ev_jmpe);
-
-
-/** Opcode 0x0f 0xb9. */
-FNIEMOP_DEF(iemOp_Grp10)
-{
-    Log(("iemOp_Grp10 -> #UD\n"));
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0xba. */
-FNIEMOP_DEF(iemOp_Grp8)
-{
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    PCIEMOPBINSIZES pImpl;
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 1: case 2: case 3:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 4: pImpl = &g_iemAImpl_bt;  IEMOP_MNEMONIC("bt  Ev,Ib"); break;
-        case 5: pImpl = &g_iemAImpl_bts; IEMOP_MNEMONIC("bts Ev,Ib"); break;
-        case 6: pImpl = &g_iemAImpl_btr; IEMOP_MNEMONIC("btr Ev,Ib"); break;
-        case 7: pImpl = &g_iemAImpl_btc; IEMOP_MNEMONIC("btc Ev,Ib"); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register destination. */
-        uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
-                IEM_MC_ARG_CONST(uint16_t,  u16Src, /*=*/ u8Bit & 0x0f, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
-                IEM_MC_ARG_CONST(uint32_t,  u32Src, /*=*/ u8Bit & 0x1f, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
-                IEM_MC_ARG_CONST(uint64_t,  u64Src, /*=*/ u8Bit & 0x3f, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory destination. */
-
-        uint32_t fAccess;
-        if (pImpl->pfnLockedU16)
-            fAccess = IEM_ACCESS_DATA_RW;
-        else /* BT */
-            fAccess = IEM_ACCESS_DATA_R;
-
-        /** @todo test negative bit offsets! */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,                u16Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
-                IEM_MC_ASSIGN(u16Src, u8Bit & 0x0f);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu16Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,                u32Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
-                IEM_MC_ASSIGN(u32Src, u8Bit & 0x1f);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu32Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,                u64Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
-                IEM_MC_ASSIGN(u64Src, u8Bit & 0x3f);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu64Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-
-}
-
-
-/** Opcode 0x0f 0xbb. */
-FNIEMOP_DEF(iemOp_btc_Ev_Gv)
-{
-    IEMOP_MNEMONIC("btc Ev,Gv");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_btc);
-}
-
-
-/** Opcode 0x0f 0xbc. */
-FNIEMOP_DEF(iemOp_bsf_Gv_Ev)
-{
-    IEMOP_MNEMONIC("bsf Gv,Ev");
-    IEMOP_HLP_MIN_386();
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_bsf);
-}
-
-
-/** Opcode 0x0f 0xbd. */
-FNIEMOP_DEF(iemOp_bsr_Gv_Ev)
-{
-    IEMOP_MNEMONIC("bsr Gv,Ev");
-    IEMOP_HLP_MIN_386();
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_bsr);
-}
-
-
-/** Opcode 0x0f 0xbe. */
-FNIEMOP_DEF(iemOp_movsx_Gv_Eb)
-{
-    IEMOP_MNEMONIC("movsx Gv,Eb");
-    IEMOP_HLP_MIN_386();
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_FETCH_GREG_U8_SX_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_FETCH_GREG_U8_SX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_FETCH_GREG_U8_SX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_SX_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_SX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_SX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x0f 0xbf. */
-FNIEMOP_DEF(iemOp_movsx_Gv_Ew)
-{
-    IEMOP_MNEMONIC("movsx Gv,Ew");
-    IEMOP_HLP_MIN_386();
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Not entirely sure how the operand size prefix is handled here,
-     *        assuming that it will be ignored. Would be nice to have a few
-     *        test for this. */
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        {
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_FETCH_GREG_U16_SX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_FETCH_GREG_U16_SX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        {
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16_SX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16_SX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xc0. */
-FNIEMOP_DEF(iemOp_xadd_Eb_Gb)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_HLP_MIN_486();
-    IEMOP_MNEMONIC("xadd Eb,Gb");
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
-        IEM_MC_ARG(uint8_t *,  pu8Reg,  1);
-        IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_GREG_U8(pu8Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8, pu8Dst, pu8Reg, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         */
-        IEM_MC_BEGIN(3, 3);
-        IEM_MC_ARG(uint8_t *,   pu8Dst,          0);
-        IEM_MC_ARG(uint8_t *,   pu8Reg,          1);
-        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-        IEM_MC_LOCAL(uint8_t,  u8RegCopy);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_GREG_U8(u8RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_LOCAL(pu8Reg, u8RegCopy);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8, pu8Dst, pu8Reg, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8_locked, pu8Dst, pu8Reg, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_STORE_GREG_U8(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u8RegCopy);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-        return VINF_SUCCESS;
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xc1. */
-FNIEMOP_DEF(iemOp_xadd_Ev_Gv)
-{
-    IEMOP_MNEMONIC("xadd Ev,Gv");
-    IEMOP_HLP_MIN_486();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *, pu16Dst,  0);
-                IEM_MC_ARG(uint16_t *, pu16Reg,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U16(pu16Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16, pu16Dst, pu16Reg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *, pu32Dst,  0);
-                IEM_MC_ARG(uint32_t *, pu32Reg,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U32(pu32Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32, pu32Dst, pu32Reg, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Reg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *, pu64Dst,  0);
-                IEM_MC_ARG(uint64_t *, pu64Reg,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U64(pu64Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64, pu64Dst, pu64Reg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 3);
-                IEM_MC_ARG(uint16_t *,  pu16Dst,         0);
-                IEM_MC_ARG(uint16_t *,  pu16Reg,         1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(uint16_t,  u16RegCopy);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U16(u16RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_LOCAL(pu16Reg, u16RegCopy);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16, pu16Dst, pu16Reg, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16_locked, pu16Dst, pu16Reg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16RegCopy);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 3);
-                IEM_MC_ARG(uint32_t *,  pu32Dst,         0);
-                IEM_MC_ARG(uint32_t *,  pu32Reg,         1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(uint32_t,  u32RegCopy);
-                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U32(u32RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_LOCAL(pu32Reg, u32RegCopy);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32, pu32Dst, pu32Reg, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32_locked, pu32Dst, pu32Reg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32RegCopy);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 3);
-                IEM_MC_ARG(uint64_t *,  pu64Dst,         0);
-                IEM_MC_ARG(uint64_t *,  pu64Reg,         1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(uint64_t,  u64RegCopy);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U64(u64RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_LOCAL(pu64Reg, u64RegCopy);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64, pu64Dst, pu64Reg, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64_locked, pu64Dst, pu64Reg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64RegCopy);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-/** Opcode 0x0f 0xc2. */
-FNIEMOP_STUB(iemOp_cmpps_Vps_Wps_Ib__cmppd_Vpd_Wpd_Ib__cmpss_Vss_Wss_Ib__cmpsd_Vsd_Wsd_Ib);
-
-
-/** Opcode 0x0f 0xc3. */
-FNIEMOP_DEF(iemOp_movnti_My_Gy)
-{
-    IEMOP_MNEMONIC("movnti My,Gy");
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /* Only the register -> memory form makes sense, assuming #UD for the other form. */
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-                    return IEMOP_RAISE_INVALID_OPCODE();
-
-                IEM_MC_FETCH_GREG_U32(u32Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-                    return IEMOP_RAISE_INVALID_OPCODE();
-
-                IEM_MC_FETCH_GREG_U64(u64Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_16BIT:
-                /** @todo check this form.   */
-                return IEMOP_RAISE_INVALID_OPCODE();
-        }
-    }
-    else
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xc4. */
-FNIEMOP_STUB(iemOp_pinsrw_Pq_Ry_Mw_Ib__pinsrw_Vdq_Ry_Mw_Ib);
-
-/** Opcode 0x0f 0xc5. */
-FNIEMOP_STUB(iemOp_pextrw_Gd_Nq_Ib__pextrw_Gd_Udq_Ib);
-
-/** Opcode 0x0f 0xc6. */
-FNIEMOP_STUB(iemOp_shufps_Vps_Wps_Ib__shufdp_Vpd_Wpd_Ib);
-
-
-/** Opcode 0x0f 0xc7 !11/1. */
-FNIEMOP_DEF_1(iemOp_Grp9_cmpxchg8b_Mq, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("cmpxchg8b Mq");
-
-    IEM_MC_BEGIN(4, 3);
-    IEM_MC_ARG(uint64_t *, pu64MemDst,     0);
-    IEM_MC_ARG(PRTUINT64U, pu64EaxEdx,     1);
-    IEM_MC_ARG(PRTUINT64U, pu64EbxEcx,     2);
-    IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 3);
-    IEM_MC_LOCAL(RTUINT64U, u64EaxEdx);
-    IEM_MC_LOCAL(RTUINT64U, u64EbxEcx);
-    IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING();
-    IEM_MC_MEM_MAP(pu64MemDst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-
-    IEM_MC_FETCH_GREG_U32(u64EaxEdx.s.Lo, X86_GREG_xAX);
-    IEM_MC_FETCH_GREG_U32(u64EaxEdx.s.Hi, X86_GREG_xDX);
-    IEM_MC_REF_LOCAL(pu64EaxEdx, u64EaxEdx);
-
-    IEM_MC_FETCH_GREG_U32(u64EbxEcx.s.Lo, X86_GREG_xBX);
-    IEM_MC_FETCH_GREG_U32(u64EbxEcx.s.Hi, X86_GREG_xCX);
-    IEM_MC_REF_LOCAL(pu64EbxEcx, u64EbxEcx);
-
-    IEM_MC_FETCH_EFLAGS(EFlags);
-    if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-        IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg8b, pu64MemDst, pu64EaxEdx, pu64EbxEcx, pEFlags);
-    else
-        IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg8b_locked, pu64MemDst, pu64EaxEdx, pu64EbxEcx, pEFlags);
-
-    IEM_MC_MEM_COMMIT_AND_UNMAP(pu64MemDst, IEM_ACCESS_DATA_RW);
-    IEM_MC_COMMIT_EFLAGS(EFlags);
-    IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_ZF)
-        /** @todo Testcase: Check effect of cmpxchg8b on bits 63:32 in rax and rdx. */
-        IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u64EaxEdx.s.Lo);
-        IEM_MC_STORE_GREG_U32(X86_GREG_xDX, u64EaxEdx.s.Hi);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode REX.W 0x0f 0xc7 !11/1. */
-FNIEMOP_DEF_1(iemOp_Grp9_cmpxchg16b_Mdq, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("cmpxchg16b Mdq");
-    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCmpXchg16b)
-    {
-        RT_NOREF(bRm);
-        IEMOP_BITCH_ABOUT_STUB();
-        return VERR_IEM_INSTR_NOT_IMPLEMENTED;
-    }
-    Log(("cmpxchg16b -> #UD\n"));
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0xc7 11/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_rdrand_Rv, uint8_t, bRm);
-
-/** Opcode 0x0f 0xc7 !11/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_vmptrld_Mq, uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0xc7 !11/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_vmclear_Mq, uint8_t, bRm);
-
-/** Opcode 0xf3 0x0f 0xc7 !11/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_vmxon_Mq, uint8_t, bRm);
-
-/** Opcode [0xf3] 0x0f 0xc7 !11/7. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_vmptrst_Mq, uint8_t, bRm);
-
-
-/** Opcode 0x0f 0xc7. */
-FNIEMOP_DEF(iemOp_Grp9)
-{
-    /** @todo Testcase: Check mixing 0x66 and 0xf3. Check the effect of 0xf2. */
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 2: case 3: case 4: case 5:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 1:
-            /** @todo Testcase: Check prefix effects on cmpxchg8b/16b. */
-            if (   (bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT)
-                || (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ))) /** @todo Testcase: AMD seems to express a different idea here wrt prefixes. */
-                return IEMOP_RAISE_INVALID_OPCODE();
-            if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                return FNIEMOP_CALL_1(iemOp_Grp9_cmpxchg16b_Mdq, bRm);
-            return FNIEMOP_CALL_1(iemOp_Grp9_cmpxchg8b_Mq, bRm);
-        case 6:
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp9_rdrand_Rv, bRm);
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ))
-            {
-                case 0:
-                    return FNIEMOP_CALL_1(iemOp_Grp9_vmptrld_Mq, bRm);
-                case IEM_OP_PRF_SIZE_OP:
-                    return FNIEMOP_CALL_1(iemOp_Grp9_vmclear_Mq, bRm);
-                case IEM_OP_PRF_REPZ:
-                    return FNIEMOP_CALL_1(iemOp_Grp9_vmxon_Mq, bRm);
-                default:
-                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 7:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ))
-            {
-                case 0:
-                case IEM_OP_PRF_REPZ:
-                    return FNIEMOP_CALL_1(iemOp_Grp9_vmptrst_Mq, bRm);
-                default:
-                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/**
- * Common 'bswap register' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonBswapGReg, uint8_t, iReg)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(1, 0);
-            IEM_MC_ARG(uint32_t *,  pu32Dst, 0);
-            IEM_MC_REF_GREG_U32(pu32Dst, iReg);     /* Don't clear the high dword! */
-            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u16, pu32Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(1, 0);
-            IEM_MC_ARG(uint32_t *,  pu32Dst, 0);
-            IEM_MC_REF_GREG_U32(pu32Dst, iReg);
-            IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u32, pu32Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(1, 0);
-            IEM_MC_ARG(uint64_t *,  pu64Dst, 0);
-            IEM_MC_REF_GREG_U64(pu64Dst, iReg);
-            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u64, pu64Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x0f 0xc8. */
-FNIEMOP_DEF(iemOp_bswap_rAX_r8)
-{
-    IEMOP_MNEMONIC("bswap rAX/r8");
-    /* Note! Intel manuals states that R8-R15 can be accessed by using a REX.X
-             prefix.  REX.B is the correct prefix it appears.  For a parallel
-             case, see iemOp_mov_AL_Ib and iemOp_mov_eAX_Iv. */
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xAX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xc9. */
-FNIEMOP_DEF(iemOp_bswap_rCX_r9)
-{
-    IEMOP_MNEMONIC("bswap rCX/r9");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xCX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xca. */
-FNIEMOP_DEF(iemOp_bswap_rDX_r10)
-{
-    IEMOP_MNEMONIC("bswap rDX/r9");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xDX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xcb. */
-FNIEMOP_DEF(iemOp_bswap_rBX_r11)
-{
-    IEMOP_MNEMONIC("bswap rBX/r9");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xBX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xcc. */
-FNIEMOP_DEF(iemOp_bswap_rSP_r12)
-{
-    IEMOP_MNEMONIC("bswap rSP/r12");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xSP | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xcd. */
-FNIEMOP_DEF(iemOp_bswap_rBP_r13)
-{
-    IEMOP_MNEMONIC("bswap rBP/r13");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xBP | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xce. */
-FNIEMOP_DEF(iemOp_bswap_rSI_r14)
-{
-    IEMOP_MNEMONIC("bswap rSI/r14");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xSI | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xcf. */
-FNIEMOP_DEF(iemOp_bswap_rDI_r15)
-{
-    IEMOP_MNEMONIC("bswap rDI/r15");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xDI | pVCpu->iem.s.uRexB);
-}
-
-
-
-/** Opcode 0x0f 0xd0. */
-FNIEMOP_STUB(iemOp_addsubpd_Vpd_Wpd__addsubps_Vps_Wps);
-/** Opcode 0x0f 0xd1. */
-FNIEMOP_STUB(iemOp_psrlw_Pp_Qp__psrlw_Vdp_Wdq);
-/** Opcode 0x0f 0xd2. */
-FNIEMOP_STUB(iemOp_psrld_Pq_Qq__psrld_Vdq_Wdq);
-/** Opcode 0x0f 0xd3. */
-FNIEMOP_STUB(iemOp_psrlq_Pq_Qq__psrlq_Vdq_Wdq);
-/** Opcode 0x0f 0xd4. */
-FNIEMOP_STUB(iemOp_paddq_Pq_Qq__paddq_Vdq_Wdq);
-/** Opcode 0x0f 0xd5. */
-FNIEMOP_STUB(iemOp_pmulq_Pq_Qq__pmullw_Vdq_Wdq);
-/** Opcode 0x0f 0xd6. */
-FNIEMOP_STUB(iemOp_movq_Wq_Vq__movq2dq_Vdq_Nq__movdq2q_Pq_Uq);
-
-
-/** Opcode 0x0f 0xd7. */
-FNIEMOP_DEF(iemOp_pmovmskb_Gd_Nq__pmovmskb_Gd_Udq)
-{
-    /* Docs says register only. */
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT)) /** @todo test that this is registers only. */
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    /* Note! Taking the lazy approch here wrt the high 32-bits of the GREG. */
-    /** @todo testcase: Check that the instruction implicitly clears the high
-     *        bits in 64-bit mode.  The REX.W is first necessary when VLMAX > 256
-     *        and opcode modifications are made to work with the whole width (not
-     *        just 128). */
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            IEMOP_MNEMONIC("pmovmskb Gd,Nq");
-            IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_SSE | DISOPTYPE_HARMLESS);
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_ARG(uint64_t *,           pDst, 0);
-            IEM_MC_ARG(uint128_t const *,    pSrc, 1);
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_PREPARE_SSE_USAGE();
-            IEM_MC_REF_GREG_U64(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_pmovmskb_u128, pDst, pSrc);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("pmovmskb Gd,Udq");
-            IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_MMX | DISOPTYPE_HARMLESS);
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_ARG(uint64_t *,          pDst, 0);
-            IEM_MC_ARG(uint64_t const *,    pSrc, 1);
-            IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
-            IEM_MC_PREPARE_FPU_USAGE();
-            IEM_MC_REF_GREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-            IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-            IEM_MC_CALL_MMX_AIMPL_2(iemAImpl_pmovmskb_u64, pDst, pSrc);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0xd8. */
-FNIEMOP_STUB(iemOp_psubusb_Pq_Qq__psubusb_Vdq_Wdq);
-/** Opcode 0x0f 0xd9. */
-FNIEMOP_STUB(iemOp_psubusw_Pq_Qq__psubusw_Vdq_Wdq);
-/** Opcode 0x0f 0xda. */
-FNIEMOP_STUB(iemOp_pminub_Pq_Qq__pminub_Vdq_Wdq);
-/** Opcode 0x0f 0xdb. */
-FNIEMOP_STUB(iemOp_pand_Pq_Qq__pand_Vdq_Wdq);
-/** Opcode 0x0f 0xdc. */
-FNIEMOP_STUB(iemOp_paddusb_Pq_Qq__paddusb_Vdq_Wdq);
-/** Opcode 0x0f 0xdd. */
-FNIEMOP_STUB(iemOp_paddusw_Pq_Qq__paddusw_Vdq_Wdq);
-/** Opcode 0x0f 0xde. */
-FNIEMOP_STUB(iemOp_pmaxub_Pq_Qq__pamxub_Vdq_Wdq);
-/** Opcode 0x0f 0xdf. */
-FNIEMOP_STUB(iemOp_pandn_Pq_Qq__pandn_Vdq_Wdq);
-/** Opcode 0x0f 0xe0. */
-FNIEMOP_STUB(iemOp_pavgb_Pq_Qq__pavgb_Vdq_Wdq);
-/** Opcode 0x0f 0xe1. */
-FNIEMOP_STUB(iemOp_psraw_Pq_Qq__psraw_Vdq_Wdq);
-/** Opcode 0x0f 0xe2. */
-FNIEMOP_STUB(iemOp_psrad_Pq_Qq__psrad_Vdq_Wdq);
-/** Opcode 0x0f 0xe3. */
-FNIEMOP_STUB(iemOp_pavgw_Pq_Qq__pavgw_Vdq_Wdq);
-/** Opcode 0x0f 0xe4. */
-FNIEMOP_STUB(iemOp_pmulhuw_Pq_Qq__pmulhuw_Vdq_Wdq);
-/** Opcode 0x0f 0xe5. */
-FNIEMOP_STUB(iemOp_pmulhw_Pq_Qq__pmulhw_Vdq_Wdq);
-/** Opcode 0x0f 0xe6. */
-FNIEMOP_STUB(iemOp_cvttpd2dq_Vdq_Wdp__cvtdq2pd_Vdq_Wpd__cvtpd2dq_Vdq_Wpd);
-
-
-/** Opcode 0x0f 0xe7. */
-FNIEMOP_DEF(iemOp_movntq_Mq_Pq__movntdq_Mdq_Vdq)
-{
-    IEMOP_MNEMONIC(!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? "movntq mr,r" : "movntdq mr,r");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * Register, memory.
-         */
-/** @todo check when the REPNZ/Z bits kick in. Same as lock, probably... */
-        switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-        {
-
-            case IEM_OP_PRF_SIZE_OP: /* SSE */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint128_t,                 uSrc);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-                IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case 0: /* MMX */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-
-                IEM_MC_FETCH_MREG_U64(uSrc, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            default:
-                return IEMOP_RAISE_INVALID_OPCODE();
-        }
-    }
-    /* The register, register encoding is invalid. */
-    else
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xe8. */
-FNIEMOP_STUB(iemOp_psubsb_Pq_Qq__psubsb_Vdq_Wdq);
-/** Opcode 0x0f 0xe9. */
-FNIEMOP_STUB(iemOp_psubsw_Pq_Qq__psubsw_Vdq_Wdq);
-/** Opcode 0x0f 0xea. */
-FNIEMOP_STUB(iemOp_pminsw_Pq_Qq__pminsw_Vdq_Wdq);
-/** Opcode 0x0f 0xeb. */
-FNIEMOP_STUB(iemOp_por_Pq_Qq__por_Vdq_Wdq);
-/** Opcode 0x0f 0xec. */
-FNIEMOP_STUB(iemOp_paddsb_Pq_Qq__paddsb_Vdq_Wdq);
-/** Opcode 0x0f 0xed. */
-FNIEMOP_STUB(iemOp_paddsw_Pq_Qq__paddsw_Vdq_Wdq);
-/** Opcode 0x0f 0xee. */
-FNIEMOP_STUB(iemOp_pmaxsw_Pq_Qq__pmaxsw_Vdq_Wdq);
-
-
-/** Opcode 0x0f 0xef. */
-FNIEMOP_DEF(iemOp_pxor_Pq_Qq__pxor_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("pxor");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse2_FullFull_To_Full, &g_iemAImpl_pxor);
-}
-
-
-/** Opcode 0x0f 0xf0. */
-FNIEMOP_STUB(iemOp_lddqu_Vdq_Mdq);
-/** Opcode 0x0f 0xf1. */
-FNIEMOP_STUB(iemOp_psllw_Pq_Qq__pslw_Vdq_Wdq);
-/** Opcode 0x0f 0xf2. */
-FNIEMOP_STUB(iemOp_psld_Pq_Qq__pslld_Vdq_Wdq);
-/** Opcode 0x0f 0xf3. */
-FNIEMOP_STUB(iemOp_psllq_Pq_Qq__pslq_Vdq_Wdq);
-/** Opcode 0x0f 0xf4. */
-FNIEMOP_STUB(iemOp_pmuludq_Pq_Qq__pmuludq_Vdq_Wdq);
-/** Opcode 0x0f 0xf5. */
-FNIEMOP_STUB(iemOp_pmaddwd_Pq_Qq__pmaddwd_Vdq_Wdq);
-/** Opcode 0x0f 0xf6. */
-FNIEMOP_STUB(iemOp_psadbw_Pq_Qq__psadbw_Vdq_Wdq);
-/** Opcode 0x0f 0xf7. */
-FNIEMOP_STUB(iemOp_maskmovq_Pq_Nq__maskmovdqu_Vdq_Udq);
-/** Opcode 0x0f 0xf8. */
-FNIEMOP_STUB(iemOp_psubb_Pq_Qq_psubb_Vdq_Wdq); //NEXT
-/** Opcode 0x0f 0xf9. */
-FNIEMOP_STUB(iemOp_psubw_Pq_Qq__psubw_Vdq_Wdq);
-/** Opcode 0x0f 0xfa. */
-FNIEMOP_STUB(iemOp_psubd_Pq_Qq__psubd_Vdq_Wdq);
-/** Opcode 0x0f 0xfb. */
-FNIEMOP_STUB(iemOp_psubq_Pq_Qq__psbuq_Vdq_Wdq);
-/** Opcode 0x0f 0xfc. */
-FNIEMOP_STUB(iemOp_paddb_Pq_Qq__paddb_Vdq_Wdq);
-/** Opcode 0x0f 0xfd. */
-FNIEMOP_STUB(iemOp_paddw_Pq_Qq__paddw_Vdq_Wdq);
-/** Opcode 0x0f 0xfe. */
-FNIEMOP_STUB(iemOp_paddd_Pq_Qq__paddd_Vdq_Wdq);
-
-
-IEM_STATIC const PFNIEMOP g_apfnTwoByteMap[256] =
-{
-    /* 0x00 */  iemOp_Grp6,
-    /* 0x01 */  iemOp_Grp7,
-    /* 0x02 */  iemOp_lar_Gv_Ew,
-    /* 0x03 */  iemOp_lsl_Gv_Ew,
-    /* 0x04 */  iemOp_Invalid,
-    /* 0x05 */  iemOp_syscall,
-    /* 0x06 */  iemOp_clts,
-    /* 0x07 */  iemOp_sysret,
-    /* 0x08 */  iemOp_invd,
-    /* 0x09 */  iemOp_wbinvd,
-    /* 0x0a */  iemOp_Invalid,
-    /* 0x0b */  iemOp_ud2,
-    /* 0x0c */  iemOp_Invalid,
-    /* 0x0d */  iemOp_nop_Ev_GrpP,
-    /* 0x0e */  iemOp_femms,
-    /* 0x0f */  iemOp_3Dnow,
-    /* 0x10 */  iemOp_movups_Vps_Wps__movupd_Vpd_Wpd__movss_Vss_Wss__movsd_Vsd_Wsd,
-    /* 0x11 */  iemOp_movups_Wps_Vps__movupd_Wpd_Vpd__movss_Wss_Vss__movsd_Vsd_Wsd,
-    /* 0x12 */  iemOp_movlps_Vq_Mq__movhlps_Vq_Uq__movlpd_Vq_Mq__movsldup_Vq_Wq__movddup_Vq_Wq,
-    /* 0x13 */  iemOp_movlps_Mq_Vq__movlpd_Mq_Vq,
-    /* 0x14 */  iemOp_unpckhlps_Vps_Wq__unpcklpd_Vpd_Wq,
-    /* 0x15 */  iemOp_unpckhps_Vps_Wq__unpckhpd_Vpd_Wq,
-    /* 0x16 */  iemOp_movhps_Vq_Mq__movlhps_Vq_Uq__movhpd_Vq_Mq__movshdup_Vq_Wq,
-    /* 0x17 */  iemOp_movhps_Mq_Vq__movhpd_Mq_Vq,
-    /* 0x18 */  iemOp_prefetch_Grp16,
-    /* 0x19 */  iemOp_nop_Ev,
-    /* 0x1a */  iemOp_nop_Ev,
-    /* 0x1b */  iemOp_nop_Ev,
-    /* 0x1c */  iemOp_nop_Ev,
-    /* 0x1d */  iemOp_nop_Ev,
-    /* 0x1e */  iemOp_nop_Ev,
-    /* 0x1f */  iemOp_nop_Ev,
-    /* 0x20 */  iemOp_mov_Rd_Cd,
-    /* 0x21 */  iemOp_mov_Rd_Dd,
-    /* 0x22 */  iemOp_mov_Cd_Rd,
-    /* 0x23 */  iemOp_mov_Dd_Rd,
-    /* 0x24 */  iemOp_mov_Rd_Td,
-    /* 0x25 */  iemOp_Invalid,
-    /* 0x26 */  iemOp_mov_Td_Rd,
-    /* 0x27 */  iemOp_Invalid,
-    /* 0x28 */  iemOp_movaps_Vps_Wps__movapd_Vpd_Wpd,
-    /* 0x29 */  iemOp_movaps_Wps_Vps__movapd_Wpd_Vpd,
-    /* 0x2a */  iemOp_cvtpi2ps_Vps_Qpi__cvtpi2pd_Vpd_Qpi__cvtsi2ss_Vss_Ey__cvtsi2sd_Vsd_Ey,
-    /* 0x2b */  iemOp_movntps_Mps_Vps__movntpd_Mpd_Vpd,
-    /* 0x2c */  iemOp_cvttps2pi_Ppi_Wps__cvttpd2pi_Ppi_Wpd__cvttss2si_Gy_Wss__cvttsd2si_Yu_Wsd,
-    /* 0x2d */  iemOp_cvtps2pi_Ppi_Wps__cvtpd2pi_QpiWpd__cvtss2si_Gy_Wss__cvtsd2si_Gy_Wsd,
-    /* 0x2e */  iemOp_ucomiss_Vss_Wss__ucomisd_Vsd_Wsd,
-    /* 0x2f */  iemOp_comiss_Vss_Wss__comisd_Vsd_Wsd,
-    /* 0x30 */  iemOp_wrmsr,
-    /* 0x31 */  iemOp_rdtsc,
-    /* 0x32 */  iemOp_rdmsr,
-    /* 0x33 */  iemOp_rdpmc,
-    /* 0x34 */  iemOp_sysenter,
-    /* 0x35 */  iemOp_sysexit,
-    /* 0x36 */  iemOp_Invalid,
-    /* 0x37 */  iemOp_getsec,
-    /* 0x38 */  iemOp_3byte_Esc_A4,
-    /* 0x39 */  iemOp_Invalid,
-    /* 0x3a */  iemOp_3byte_Esc_A5,
-    /* 0x3b */  iemOp_Invalid,
-    /* 0x3c */  iemOp_Invalid,
-    /* 0x3d */  iemOp_Invalid,
-    /* 0x3e */  iemOp_Invalid,
-    /* 0x3f */  iemOp_Invalid,
-    /* 0x40 */  iemOp_cmovo_Gv_Ev,
-    /* 0x41 */  iemOp_cmovno_Gv_Ev,
-    /* 0x42 */  iemOp_cmovc_Gv_Ev,
-    /* 0x43 */  iemOp_cmovnc_Gv_Ev,
-    /* 0x44 */  iemOp_cmove_Gv_Ev,
-    /* 0x45 */  iemOp_cmovne_Gv_Ev,
-    /* 0x46 */  iemOp_cmovbe_Gv_Ev,
-    /* 0x47 */  iemOp_cmovnbe_Gv_Ev,
-    /* 0x48 */  iemOp_cmovs_Gv_Ev,
-    /* 0x49 */  iemOp_cmovns_Gv_Ev,
-    /* 0x4a */  iemOp_cmovp_Gv_Ev,
-    /* 0x4b */  iemOp_cmovnp_Gv_Ev,
-    /* 0x4c */  iemOp_cmovl_Gv_Ev,
-    /* 0x4d */  iemOp_cmovnl_Gv_Ev,
-    /* 0x4e */  iemOp_cmovle_Gv_Ev,
-    /* 0x4f */  iemOp_cmovnle_Gv_Ev,
-    /* 0x50 */  iemOp_movmskps_Gy_Ups__movmskpd_Gy_Upd,
-    /* 0x51 */  iemOp_sqrtps_Wps_Vps__sqrtpd_Wpd_Vpd__sqrtss_Vss_Wss__sqrtsd_Vsd_Wsd,
-    /* 0x52 */  iemOp_rsqrtps_Wps_Vps__rsqrtss_Vss_Wss,
-    /* 0x53 */  iemOp_rcpps_Wps_Vps__rcpss_Vs_Wss,
-    /* 0x54 */  iemOp_andps_Vps_Wps__andpd_Wpd_Vpd,
-    /* 0x55 */  iemOp_andnps_Vps_Wps__andnpd_Wpd_Vpd,
-    /* 0x56 */  iemOp_orps_Wpd_Vpd__orpd_Wpd_Vpd,
-    /* 0x57 */  iemOp_xorps_Vps_Wps__xorpd_Wpd_Vpd,
-    /* 0x58 */  iemOp_addps_Vps_Wps__addpd_Vpd_Wpd__addss_Vss_Wss__addsd_Vsd_Wsd,
-    /* 0x59 */  iemOp_mulps_Vps_Wps__mulpd_Vpd_Wpd__mulss_Vss__Wss__mulsd_Vsd_Wsd,
-    /* 0x5a */  iemOp_cvtps2pd_Vpd_Wps__cvtpd2ps_Vps_Wpd__cvtss2sd_Vsd_Wss__cvtsd2ss_Vss_Wsd,
-    /* 0x5b */  iemOp_cvtdq2ps_Vps_Wdq__cvtps2dq_Vdq_Wps__cvtps2dq_Vdq_Wps,
-    /* 0x5c */  iemOp_subps_Vps_Wps__subpd_Vps_Wdp__subss_Vss_Wss__subsd_Vsd_Wsd,
-    /* 0x5d */  iemOp_minps_Vps_Wps__minpd_Vpd_Wpd__minss_Vss_Wss__minsd_Vsd_Wsd,
-    /* 0x5e */  iemOp_divps_Vps_Wps__divpd_Vpd_Wpd__divss_Vss_Wss__divsd_Vsd_Wsd,
-    /* 0x5f */  iemOp_maxps_Vps_Wps__maxpd_Vpd_Wpd__maxss_Vss_Wss__maxsd_Vsd_Wsd,
-    /* 0x60 */  iemOp_punpcklbw_Pq_Qd__punpcklbw_Vdq_Wdq,
-    /* 0x61 */  iemOp_punpcklwd_Pq_Qd__punpcklwd_Vdq_Wdq,
-    /* 0x62 */  iemOp_punpckldq_Pq_Qd__punpckldq_Vdq_Wdq,
-    /* 0x63 */  iemOp_packsswb_Pq_Qq__packsswb_Vdq_Wdq,
-    /* 0x64 */  iemOp_pcmpgtb_Pq_Qq__pcmpgtb_Vdq_Wdq,
-    /* 0x65 */  iemOp_pcmpgtw_Pq_Qq__pcmpgtw_Vdq_Wdq,
-    /* 0x66 */  iemOp_pcmpgtd_Pq_Qq__pcmpgtd_Vdq_Wdq,
-    /* 0x67 */  iemOp_packuswb_Pq_Qq__packuswb_Vdq_Wdq,
-    /* 0x68 */  iemOp_punpckhbw_Pq_Qq__punpckhbw_Vdq_Wdq,
-    /* 0x69 */  iemOp_punpckhwd_Pq_Qd__punpckhwd_Vdq_Wdq,
-    /* 0x6a */  iemOp_punpckhdq_Pq_Qd__punpckhdq_Vdq_Wdq,
-    /* 0x6b */  iemOp_packssdw_Pq_Qd__packssdq_Vdq_Wdq,
-    /* 0x6c */  iemOp_punpcklqdq_Vdq_Wdq,
-    /* 0x6d */  iemOp_punpckhqdq_Vdq_Wdq,
-    /* 0x6e */  iemOp_movd_q_Pd_Ey__movd_q_Vy_Ey,
-    /* 0x6f */  iemOp_movq_Pq_Qq__movdqa_Vdq_Wdq__movdqu_Vdq_Wdq,
-    /* 0x70 */  iemOp_pshufw_Pq_Qq_Ib__pshufd_Vdq_Wdq_Ib__pshufhw_Vdq_Wdq_Ib__pshuflq_Vdq_Wdq_Ib,
-    /* 0x71 */  iemOp_Grp12,
-    /* 0x72 */  iemOp_Grp13,
-    /* 0x73 */  iemOp_Grp14,
-    /* 0x74 */  iemOp_pcmpeqb_Pq_Qq__pcmpeqb_Vdq_Wdq,
-    /* 0x75 */  iemOp_pcmpeqw_Pq_Qq__pcmpeqw_Vdq_Wdq,
-    /* 0x76 */  iemOp_pcmped_Pq_Qq__pcmpeqd_Vdq_Wdq,
-    /* 0x77 */  iemOp_emms,
-    /* 0x78 */  iemOp_vmread_AmdGrp17,
-    /* 0x79 */  iemOp_vmwrite,
-    /* 0x7a */  iemOp_Invalid,
-    /* 0x7b */  iemOp_Invalid,
-    /* 0x7c */  iemOp_haddpd_Vdp_Wpd__haddps_Vps_Wps,
-    /* 0x7d */  iemOp_hsubpd_Vpd_Wpd__hsubps_Vps_Wps,
-    /* 0x7e */  iemOp_movd_q_Ey_Pd__movd_q_Ey_Vy__movq_Vq_Wq,
-    /* 0x7f */  iemOp_movq_Qq_Pq__movq_movdqa_Wdq_Vdq__movdqu_Wdq_Vdq,
-    /* 0x80 */  iemOp_jo_Jv,
-    /* 0x81 */  iemOp_jno_Jv,
-    /* 0x82 */  iemOp_jc_Jv,
-    /* 0x83 */  iemOp_jnc_Jv,
-    /* 0x84 */  iemOp_je_Jv,
-    /* 0x85 */  iemOp_jne_Jv,
-    /* 0x86 */  iemOp_jbe_Jv,
-    /* 0x87 */  iemOp_jnbe_Jv,
-    /* 0x88 */  iemOp_js_Jv,
-    /* 0x89 */  iemOp_jns_Jv,
-    /* 0x8a */  iemOp_jp_Jv,
-    /* 0x8b */  iemOp_jnp_Jv,
-    /* 0x8c */  iemOp_jl_Jv,
-    /* 0x8d */  iemOp_jnl_Jv,
-    /* 0x8e */  iemOp_jle_Jv,
-    /* 0x8f */  iemOp_jnle_Jv,
-    /* 0x90 */  iemOp_seto_Eb,
-    /* 0x91 */  iemOp_setno_Eb,
-    /* 0x92 */  iemOp_setc_Eb,
-    /* 0x93 */  iemOp_setnc_Eb,
-    /* 0x94 */  iemOp_sete_Eb,
-    /* 0x95 */  iemOp_setne_Eb,
-    /* 0x96 */  iemOp_setbe_Eb,
-    /* 0x97 */  iemOp_setnbe_Eb,
-    /* 0x98 */  iemOp_sets_Eb,
-    /* 0x99 */  iemOp_setns_Eb,
-    /* 0x9a */  iemOp_setp_Eb,
-    /* 0x9b */  iemOp_setnp_Eb,
-    /* 0x9c */  iemOp_setl_Eb,
-    /* 0x9d */  iemOp_setnl_Eb,
-    /* 0x9e */  iemOp_setle_Eb,
-    /* 0x9f */  iemOp_setnle_Eb,
-    /* 0xa0 */  iemOp_push_fs,
-    /* 0xa1 */  iemOp_pop_fs,
-    /* 0xa2 */  iemOp_cpuid,
-    /* 0xa3 */  iemOp_bt_Ev_Gv,
-    /* 0xa4 */  iemOp_shld_Ev_Gv_Ib,
-    /* 0xa5 */  iemOp_shld_Ev_Gv_CL,
-    /* 0xa6 */  iemOp_Invalid,
-    /* 0xa7 */  iemOp_Invalid,
-    /* 0xa8 */  iemOp_push_gs,
-    /* 0xa9 */  iemOp_pop_gs,
-    /* 0xaa */  iemOp_rsm,
-    /* 0xab */  iemOp_bts_Ev_Gv,
-    /* 0xac */  iemOp_shrd_Ev_Gv_Ib,
-    /* 0xad */  iemOp_shrd_Ev_Gv_CL,
-    /* 0xae */  iemOp_Grp15,
-    /* 0xaf */  iemOp_imul_Gv_Ev,
-    /* 0xb0 */  iemOp_cmpxchg_Eb_Gb,
-    /* 0xb1 */  iemOp_cmpxchg_Ev_Gv,
-    /* 0xb2 */  iemOp_lss_Gv_Mp,
-    /* 0xb3 */  iemOp_btr_Ev_Gv,
-    /* 0xb4 */  iemOp_lfs_Gv_Mp,
-    /* 0xb5 */  iemOp_lgs_Gv_Mp,
-    /* 0xb6 */  iemOp_movzx_Gv_Eb,
-    /* 0xb7 */  iemOp_movzx_Gv_Ew,
-    /* 0xb8 */  iemOp_popcnt_Gv_Ev_jmpe,
-    /* 0xb9 */  iemOp_Grp10,
-    /* 0xba */  iemOp_Grp8,
-    /* 0xbd */  iemOp_btc_Ev_Gv,
-    /* 0xbc */  iemOp_bsf_Gv_Ev,
-    /* 0xbd */  iemOp_bsr_Gv_Ev,
-    /* 0xbe */  iemOp_movsx_Gv_Eb,
-    /* 0xbf */  iemOp_movsx_Gv_Ew,
-    /* 0xc0 */  iemOp_xadd_Eb_Gb,
-    /* 0xc1 */  iemOp_xadd_Ev_Gv,
-    /* 0xc2 */  iemOp_cmpps_Vps_Wps_Ib__cmppd_Vpd_Wpd_Ib__cmpss_Vss_Wss_Ib__cmpsd_Vsd_Wsd_Ib,
-    /* 0xc3 */  iemOp_movnti_My_Gy,
-    /* 0xc4 */  iemOp_pinsrw_Pq_Ry_Mw_Ib__pinsrw_Vdq_Ry_Mw_Ib,
-    /* 0xc5 */  iemOp_pextrw_Gd_Nq_Ib__pextrw_Gd_Udq_Ib,
-    /* 0xc6 */  iemOp_shufps_Vps_Wps_Ib__shufdp_Vpd_Wpd_Ib,
-    /* 0xc7 */  iemOp_Grp9,
-    /* 0xc8 */  iemOp_bswap_rAX_r8,
-    /* 0xc9 */  iemOp_bswap_rCX_r9,
-    /* 0xca */  iemOp_bswap_rDX_r10,
-    /* 0xcb */  iemOp_bswap_rBX_r11,
-    /* 0xcc */  iemOp_bswap_rSP_r12,
-    /* 0xcd */  iemOp_bswap_rBP_r13,
-    /* 0xce */  iemOp_bswap_rSI_r14,
-    /* 0xcf */  iemOp_bswap_rDI_r15,
-    /* 0xd0 */  iemOp_addsubpd_Vpd_Wpd__addsubps_Vps_Wps,
-    /* 0xd1 */  iemOp_psrlw_Pp_Qp__psrlw_Vdp_Wdq,
-    /* 0xd2 */  iemOp_psrld_Pq_Qq__psrld_Vdq_Wdq,
-    /* 0xd3 */  iemOp_psrlq_Pq_Qq__psrlq_Vdq_Wdq,
-    /* 0xd4 */  iemOp_paddq_Pq_Qq__paddq_Vdq_Wdq,
-    /* 0xd5 */  iemOp_pmulq_Pq_Qq__pmullw_Vdq_Wdq,
-    /* 0xd6 */  iemOp_movq_Wq_Vq__movq2dq_Vdq_Nq__movdq2q_Pq_Uq,
-    /* 0xd7 */  iemOp_pmovmskb_Gd_Nq__pmovmskb_Gd_Udq,
-    /* 0xd8 */  iemOp_psubusb_Pq_Qq__psubusb_Vdq_Wdq,
-    /* 0xd9 */  iemOp_psubusw_Pq_Qq__psubusw_Vdq_Wdq,
-    /* 0xda */  iemOp_pminub_Pq_Qq__pminub_Vdq_Wdq,
-    /* 0xdb */  iemOp_pand_Pq_Qq__pand_Vdq_Wdq,
-    /* 0xdc */  iemOp_paddusb_Pq_Qq__paddusb_Vdq_Wdq,
-    /* 0xdd */  iemOp_paddusw_Pq_Qq__paddusw_Vdq_Wdq,
-    /* 0xde */  iemOp_pmaxub_Pq_Qq__pamxub_Vdq_Wdq,
-    /* 0xdf */  iemOp_pandn_Pq_Qq__pandn_Vdq_Wdq,
-    /* 0xe0 */  iemOp_pavgb_Pq_Qq__pavgb_Vdq_Wdq,
-    /* 0xe1 */  iemOp_psraw_Pq_Qq__psraw_Vdq_Wdq,
-    /* 0xe2 */  iemOp_psrad_Pq_Qq__psrad_Vdq_Wdq,
-    /* 0xe3 */  iemOp_pavgw_Pq_Qq__pavgw_Vdq_Wdq,
-    /* 0xe4 */  iemOp_pmulhuw_Pq_Qq__pmulhuw_Vdq_Wdq,
-    /* 0xe5 */  iemOp_pmulhw_Pq_Qq__pmulhw_Vdq_Wdq,
-    /* 0xe6 */  iemOp_cvttpd2dq_Vdq_Wdp__cvtdq2pd_Vdq_Wpd__cvtpd2dq_Vdq_Wpd,
-    /* 0xe7 */  iemOp_movntq_Mq_Pq__movntdq_Mdq_Vdq,
-    /* 0xe8 */  iemOp_psubsb_Pq_Qq__psubsb_Vdq_Wdq,
-    /* 0xe9 */  iemOp_psubsw_Pq_Qq__psubsw_Vdq_Wdq,
-    /* 0xea */  iemOp_pminsw_Pq_Qq__pminsw_Vdq_Wdq,
-    /* 0xeb */  iemOp_por_Pq_Qq__por_Vdq_Wdq,
-    /* 0xec */  iemOp_paddsb_Pq_Qq__paddsb_Vdq_Wdq,
-    /* 0xed */  iemOp_paddsw_Pq_Qq__paddsw_Vdq_Wdq,
-    /* 0xee */  iemOp_pmaxsw_Pq_Qq__pmaxsw_Vdq_Wdq,
-    /* 0xef */  iemOp_pxor_Pq_Qq__pxor_Vdq_Wdq,
-    /* 0xf0 */  iemOp_lddqu_Vdq_Mdq,
-    /* 0xf1 */  iemOp_psllw_Pq_Qq__pslw_Vdq_Wdq,
-    /* 0xf2 */  iemOp_psld_Pq_Qq__pslld_Vdq_Wdq,
-    /* 0xf3 */  iemOp_psllq_Pq_Qq__pslq_Vdq_Wdq,
-    /* 0xf4 */  iemOp_pmuludq_Pq_Qq__pmuludq_Vdq_Wdq,
-    /* 0xf5 */  iemOp_pmaddwd_Pq_Qq__pmaddwd_Vdq_Wdq,
-    /* 0xf6 */  iemOp_psadbw_Pq_Qq__psadbw_Vdq_Wdq,
-    /* 0xf7 */  iemOp_maskmovq_Pq_Nq__maskmovdqu_Vdq_Udq,
-    /* 0xf8 */  iemOp_psubb_Pq_Qq_psubb_Vdq_Wdq,
-    /* 0xf9 */  iemOp_psubw_Pq_Qq__psubw_Vdq_Wdq,
-    /* 0xfa */  iemOp_psubd_Pq_Qq__psubd_Vdq_Wdq,
-    /* 0xfb */  iemOp_psubq_Pq_Qq__psbuq_Vdq_Wdq,
-    /* 0xfc */  iemOp_paddb_Pq_Qq__paddb_Vdq_Wdq,
-    /* 0xfd */  iemOp_paddw_Pq_Qq__paddw_Vdq_Wdq,
-    /* 0xfe */  iemOp_paddd_Pq_Qq__paddd_Vdq_Wdq,
-    /* 0xff */  iemOp_Invalid
-};
-
-/** @}  */
-
-
-/** @name One byte opcodes.
- *
- * @{
- */
-
-/** Opcode 0x00. */
-FNIEMOP_DEF(iemOp_add_Eb_Gb)
-{
-    IEMOP_MNEMONIC("add Eb,Gb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_add);
-}
-
-
-/** Opcode 0x01. */
-FNIEMOP_DEF(iemOp_add_Ev_Gv)
-{
-    IEMOP_MNEMONIC("add Ev,Gv");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_add);
-}
-
-
-/** Opcode 0x02. */
-FNIEMOP_DEF(iemOp_add_Gb_Eb)
-{
-    IEMOP_MNEMONIC("add Gb,Eb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_add);
-}
-
-
-/** Opcode 0x03. */
-FNIEMOP_DEF(iemOp_add_Gv_Ev)
-{
-    IEMOP_MNEMONIC("add Gv,Ev");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_add);
-}
-
-
-/** Opcode 0x04. */
-FNIEMOP_DEF(iemOp_add_Al_Ib)
-{
-    IEMOP_MNEMONIC("add al,Ib");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_add);
-}
-
-
-/** Opcode 0x05. */
-FNIEMOP_DEF(iemOp_add_eAX_Iz)
-{
-    IEMOP_MNEMONIC("add rAX,Iz");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_add);
-}
-
-
-/** Opcode 0x06. */
-FNIEMOP_DEF(iemOp_push_ES)
-{
-    IEMOP_MNEMONIC("push es");
-    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_ES);
-}
-
-
-/** Opcode 0x07. */
-FNIEMOP_DEF(iemOp_pop_ES)
-{
-    IEMOP_MNEMONIC("pop es");
-    IEMOP_HLP_NO_64BIT();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_ES, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x08. */
-FNIEMOP_DEF(iemOp_or_Eb_Gb)
-{
-    IEMOP_MNEMONIC("or  Eb,Gb");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_or);
-}
-
-
-/** Opcode 0x09. */
-FNIEMOP_DEF(iemOp_or_Ev_Gv)
-{
-    IEMOP_MNEMONIC("or  Ev,Gv ");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_or);
-}
-
-
-/** Opcode 0x0a. */
-FNIEMOP_DEF(iemOp_or_Gb_Eb)
-{
-    IEMOP_MNEMONIC("or  Gb,Eb");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_or);
-}
-
-
-/** Opcode 0x0b. */
-FNIEMOP_DEF(iemOp_or_Gv_Ev)
-{
-    IEMOP_MNEMONIC("or  Gv,Ev");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_or);
-}
-
-
-/** Opcode 0x0c. */
-FNIEMOP_DEF(iemOp_or_Al_Ib)
-{
-    IEMOP_MNEMONIC("or  al,Ib");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_or);
-}
-
-
-/** Opcode 0x0d. */
-FNIEMOP_DEF(iemOp_or_eAX_Iz)
-{
-    IEMOP_MNEMONIC("or  rAX,Iz");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_or);
-}
-
-
-/** Opcode 0x0e. */
-FNIEMOP_DEF(iemOp_push_CS)
-{
-    IEMOP_MNEMONIC("push cs");
-    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_CS);
-}
-
-
-/** Opcode 0x0f. */
-FNIEMOP_DEF(iemOp_2byteEscape)
-{
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    /** @todo PUSH CS on 8086, undefined on 80186. */
-    IEMOP_HLP_MIN_286();
-    return FNIEMOP_CALL(g_apfnTwoByteMap[b]);
-}
-
-/** Opcode 0x10. */
-FNIEMOP_DEF(iemOp_adc_Eb_Gb)
-{
-    IEMOP_MNEMONIC("adc Eb,Gb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_adc);
-}
-
-
-/** Opcode 0x11. */
-FNIEMOP_DEF(iemOp_adc_Ev_Gv)
-{
-    IEMOP_MNEMONIC("adc Ev,Gv");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_adc);
-}
-
-
-/** Opcode 0x12. */
-FNIEMOP_DEF(iemOp_adc_Gb_Eb)
-{
-    IEMOP_MNEMONIC("adc Gb,Eb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_adc);
-}
-
-
-/** Opcode 0x13. */
-FNIEMOP_DEF(iemOp_adc_Gv_Ev)
-{
-    IEMOP_MNEMONIC("adc Gv,Ev");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_adc);
-}
-
-
-/** Opcode 0x14. */
-FNIEMOP_DEF(iemOp_adc_Al_Ib)
-{
-    IEMOP_MNEMONIC("adc al,Ib");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_adc);
-}
-
-
-/** Opcode 0x15. */
-FNIEMOP_DEF(iemOp_adc_eAX_Iz)
-{
-    IEMOP_MNEMONIC("adc rAX,Iz");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_adc);
-}
-
-
-/** Opcode 0x16. */
-FNIEMOP_DEF(iemOp_push_SS)
-{
-    IEMOP_MNEMONIC("push ss");
-    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_SS);
-}
-
-
-/** Opcode 0x17. */
-FNIEMOP_DEF(iemOp_pop_SS)
-{
-    IEMOP_MNEMONIC("pop ss"); /** @todo implies instruction fusing? */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_NO_64BIT();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_SS, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x18. */
-FNIEMOP_DEF(iemOp_sbb_Eb_Gb)
-{
-    IEMOP_MNEMONIC("sbb Eb,Gb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_sbb);
-}
-
-
-/** Opcode 0x19. */
-FNIEMOP_DEF(iemOp_sbb_Ev_Gv)
-{
-    IEMOP_MNEMONIC("sbb Ev,Gv");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_sbb);
-}
-
-
-/** Opcode 0x1a. */
-FNIEMOP_DEF(iemOp_sbb_Gb_Eb)
-{
-    IEMOP_MNEMONIC("sbb Gb,Eb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_sbb);
-}
-
-
-/** Opcode 0x1b. */
-FNIEMOP_DEF(iemOp_sbb_Gv_Ev)
-{
-    IEMOP_MNEMONIC("sbb Gv,Ev");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_sbb);
-}
-
-
-/** Opcode 0x1c. */
-FNIEMOP_DEF(iemOp_sbb_Al_Ib)
-{
-    IEMOP_MNEMONIC("sbb al,Ib");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_sbb);
-}
-
-
-/** Opcode 0x1d. */
-FNIEMOP_DEF(iemOp_sbb_eAX_Iz)
-{
-    IEMOP_MNEMONIC("sbb rAX,Iz");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_sbb);
-}
-
-
-/** Opcode 0x1e. */
-FNIEMOP_DEF(iemOp_push_DS)
-{
-    IEMOP_MNEMONIC("push ds");
-    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_DS);
-}
-
-
-/** Opcode 0x1f. */
-FNIEMOP_DEF(iemOp_pop_DS)
-{
-    IEMOP_MNEMONIC("pop ds");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_NO_64BIT();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_DS, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x20. */
-FNIEMOP_DEF(iemOp_and_Eb_Gb)
-{
-    IEMOP_MNEMONIC("and Eb,Gb");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_and);
-}
-
-
-/** Opcode 0x21. */
-FNIEMOP_DEF(iemOp_and_Ev_Gv)
-{
-    IEMOP_MNEMONIC("and Ev,Gv");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_and);
-}
-
-
-/** Opcode 0x22. */
-FNIEMOP_DEF(iemOp_and_Gb_Eb)
-{
-    IEMOP_MNEMONIC("and Gb,Eb");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_and);
-}
-
-
-/** Opcode 0x23. */
-FNIEMOP_DEF(iemOp_and_Gv_Ev)
-{
-    IEMOP_MNEMONIC("and Gv,Ev");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_and);
-}
-
-
-/** Opcode 0x24. */
-FNIEMOP_DEF(iemOp_and_Al_Ib)
-{
-    IEMOP_MNEMONIC("and al,Ib");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_and);
-}
-
-
-/** Opcode 0x25. */
-FNIEMOP_DEF(iemOp_and_eAX_Iz)
-{
-    IEMOP_MNEMONIC("and rAX,Iz");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_and);
-}
-
-
-/** Opcode 0x26. */
-FNIEMOP_DEF(iemOp_seg_ES)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg es");
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SEG_ES;
-    pVCpu->iem.s.iEffSeg    = X86_SREG_ES;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0x27. */
-FNIEMOP_DEF(iemOp_daa)
-{
-    IEMOP_MNEMONIC("daa AL");
-    IEMOP_HLP_NO_64BIT();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF);
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_daa);
-}
-
-
-/** Opcode 0x28. */
-FNIEMOP_DEF(iemOp_sub_Eb_Gb)
-{
-    IEMOP_MNEMONIC("sub Eb,Gb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_sub);
-}
-
-
-/** Opcode 0x29. */
-FNIEMOP_DEF(iemOp_sub_Ev_Gv)
-{
-    IEMOP_MNEMONIC("sub Ev,Gv");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_sub);
-}
-
-
-/** Opcode 0x2a. */
-FNIEMOP_DEF(iemOp_sub_Gb_Eb)
-{
-    IEMOP_MNEMONIC("sub Gb,Eb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_sub);
-}
-
-
-/** Opcode 0x2b. */
-FNIEMOP_DEF(iemOp_sub_Gv_Ev)
-{
-    IEMOP_MNEMONIC("sub Gv,Ev");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_sub);
-}
-
-
-/** Opcode 0x2c. */
-FNIEMOP_DEF(iemOp_sub_Al_Ib)
-{
-    IEMOP_MNEMONIC("sub al,Ib");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_sub);
-}
-
-
-/** Opcode 0x2d. */
-FNIEMOP_DEF(iemOp_sub_eAX_Iz)
-{
-    IEMOP_MNEMONIC("sub rAX,Iz");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_sub);
-}
-
-
-/** Opcode 0x2e. */
-FNIEMOP_DEF(iemOp_seg_CS)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg cs");
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SEG_CS;
-    pVCpu->iem.s.iEffSeg    = X86_SREG_CS;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0x2f. */
-FNIEMOP_DEF(iemOp_das)
-{
-    IEMOP_MNEMONIC("das AL");
-    IEMOP_HLP_NO_64BIT();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF);
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_das);
-}
-
-
-/** Opcode 0x30. */
-FNIEMOP_DEF(iemOp_xor_Eb_Gb)
-{
-    IEMOP_MNEMONIC("xor Eb,Gb");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_xor);
-}
-
-
-/** Opcode 0x31. */
-FNIEMOP_DEF(iemOp_xor_Ev_Gv)
-{
-    IEMOP_MNEMONIC("xor Ev,Gv");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_xor);
-}
-
-
-/** Opcode 0x32. */
-FNIEMOP_DEF(iemOp_xor_Gb_Eb)
-{
-    IEMOP_MNEMONIC("xor Gb,Eb");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_xor);
-}
-
-
-/** Opcode 0x33. */
-FNIEMOP_DEF(iemOp_xor_Gv_Ev)
-{
-    IEMOP_MNEMONIC("xor Gv,Ev");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_xor);
-}
-
-
-/** Opcode 0x34. */
-FNIEMOP_DEF(iemOp_xor_Al_Ib)
-{
-    IEMOP_MNEMONIC("xor al,Ib");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_xor);
-}
-
-
-/** Opcode 0x35. */
-FNIEMOP_DEF(iemOp_xor_eAX_Iz)
-{
-    IEMOP_MNEMONIC("xor rAX,Iz");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_xor);
-}
-
-
-/** Opcode 0x36. */
-FNIEMOP_DEF(iemOp_seg_SS)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg ss");
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SEG_SS;
-    pVCpu->iem.s.iEffSeg    = X86_SREG_SS;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0x37. */
-FNIEMOP_STUB(iemOp_aaa);
-
-
-/** Opcode 0x38. */
-FNIEMOP_DEF(iemOp_cmp_Eb_Gb)
-{
-    IEMOP_MNEMONIC("cmp Eb,Gb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_cmp);
-}
-
-
-/** Opcode 0x39. */
-FNIEMOP_DEF(iemOp_cmp_Ev_Gv)
-{
-    IEMOP_MNEMONIC("cmp Ev,Gv");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_cmp);
-}
-
-
-/** Opcode 0x3a. */
-FNIEMOP_DEF(iemOp_cmp_Gb_Eb)
-{
-    IEMOP_MNEMONIC("cmp Gb,Eb");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_cmp);
-}
-
-
-/** Opcode 0x3b. */
-FNIEMOP_DEF(iemOp_cmp_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmp Gv,Ev");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_cmp);
-}
-
-
-/** Opcode 0x3c. */
-FNIEMOP_DEF(iemOp_cmp_Al_Ib)
-{
-    IEMOP_MNEMONIC("cmp al,Ib");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_cmp);
-}
-
-
-/** Opcode 0x3d. */
-FNIEMOP_DEF(iemOp_cmp_eAX_Iz)
-{
-    IEMOP_MNEMONIC("cmp rAX,Iz");
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_cmp);
-}
-
-
-/** Opcode 0x3e. */
-FNIEMOP_DEF(iemOp_seg_DS)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg ds");
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SEG_DS;
-    pVCpu->iem.s.iEffSeg    = X86_SREG_DS;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0x3f. */
-FNIEMOP_STUB(iemOp_aas);
-
-/**
- * Common 'inc/dec/not/neg register' helper.
- */
-FNIEMOP_DEF_2(iemOpCommonUnaryGReg, PCIEMOPUNARYSIZES, pImpl, uint8_t, iReg)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_ARG(uint16_t *,  pu16Dst, 0);
-            IEM_MC_ARG(uint32_t *,  pEFlags, 1);
-            IEM_MC_REF_GREG_U16(pu16Dst, iReg);
-            IEM_MC_REF_EFLAGS(pEFlags);
-            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU16, pu16Dst, pEFlags);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_ARG(uint32_t *,  pu32Dst, 0);
-            IEM_MC_ARG(uint32_t *,  pEFlags, 1);
-            IEM_MC_REF_GREG_U32(pu32Dst, iReg);
-            IEM_MC_REF_EFLAGS(pEFlags);
-            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU32, pu32Dst, pEFlags);
-            IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_ARG(uint64_t *,  pu64Dst, 0);
-            IEM_MC_ARG(uint32_t *,  pEFlags, 1);
-            IEM_MC_REF_GREG_U64(pu64Dst, iReg);
-            IEM_MC_REF_EFLAGS(pEFlags);
-            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU64, pu64Dst, pEFlags);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x40. */
-FNIEMOP_DEF(iemOp_inc_eAX)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX;
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("inc eAX");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xAX);
-}
-
-
-/** Opcode 0x41. */
-FNIEMOP_DEF(iemOp_inc_eCX)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.b");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_B;
-        pVCpu->iem.s.uRexB     = 1 << 3;
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("inc eCX");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xCX);
-}
-
-
-/** Opcode 0x42. */
-FNIEMOP_DEF(iemOp_inc_eDX)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.x");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_X;
-        pVCpu->iem.s.uRexIndex = 1 << 3;
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("inc eDX");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xDX);
-}
-
-
-
-/** Opcode 0x43. */
-FNIEMOP_DEF(iemOp_inc_eBX)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.bx");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X;
-        pVCpu->iem.s.uRexB     = 1 << 3;
-        pVCpu->iem.s.uRexIndex = 1 << 3;
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("inc eBX");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xBX);
-}
-
-
-/** Opcode 0x44. */
-FNIEMOP_DEF(iemOp_inc_eSP)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.r");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R;
-        pVCpu->iem.s.uRexReg   = 1 << 3;
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("inc eSP");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xSP);
-}
-
-
-/** Opcode 0x45. */
-FNIEMOP_DEF(iemOp_inc_eBP)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.rb");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B;
-        pVCpu->iem.s.uRexReg   = 1 << 3;
-        pVCpu->iem.s.uRexB     = 1 << 3;
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("inc eBP");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xBP);
-}
-
-
-/** Opcode 0x46. */
-FNIEMOP_DEF(iemOp_inc_eSI)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.rx");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_X;
-        pVCpu->iem.s.uRexReg   = 1 << 3;
-        pVCpu->iem.s.uRexIndex = 1 << 3;
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("inc eSI");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xSI);
-}
-
-
-/** Opcode 0x47. */
-FNIEMOP_DEF(iemOp_inc_eDI)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.rbx");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X;
-        pVCpu->iem.s.uRexReg   = 1 << 3;
-        pVCpu->iem.s.uRexB     = 1 << 3;
-        pVCpu->iem.s.uRexIndex = 1 << 3;
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("inc eDI");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xDI);
-}
-
-
-/** Opcode 0x48. */
-FNIEMOP_DEF(iemOp_dec_eAX)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.w");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_SIZE_REX_W;
-        iemRecalEffOpSize(pVCpu);
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("dec eAX");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xAX);
-}
-
-
-/** Opcode 0x49. */
-FNIEMOP_DEF(iemOp_dec_eCX)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.bw");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_B | IEM_OP_PRF_SIZE_REX_W;
-        pVCpu->iem.s.uRexB     = 1 << 3;
-        iemRecalEffOpSize(pVCpu);
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("dec eCX");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xCX);
-}
-
-
-/** Opcode 0x4a. */
-FNIEMOP_DEF(iemOp_dec_eDX)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.xw");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W;
-        pVCpu->iem.s.uRexIndex = 1 << 3;
-        iemRecalEffOpSize(pVCpu);
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("dec eDX");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xDX);
-}
-
-
-/** Opcode 0x4b. */
-FNIEMOP_DEF(iemOp_dec_eBX)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.bxw");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W;
-        pVCpu->iem.s.uRexB     = 1 << 3;
-        pVCpu->iem.s.uRexIndex = 1 << 3;
-        iemRecalEffOpSize(pVCpu);
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("dec eBX");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xBX);
-}
-
-
-/** Opcode 0x4c. */
-FNIEMOP_DEF(iemOp_dec_eSP)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.rw");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_SIZE_REX_W;
-        pVCpu->iem.s.uRexReg   = 1 << 3;
-        iemRecalEffOpSize(pVCpu);
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("dec eSP");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xSP);
-}
-
-
-/** Opcode 0x4d. */
-FNIEMOP_DEF(iemOp_dec_eBP)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.rbw");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B | IEM_OP_PRF_SIZE_REX_W;
-        pVCpu->iem.s.uRexReg   = 1 << 3;
-        pVCpu->iem.s.uRexB     = 1 << 3;
-        iemRecalEffOpSize(pVCpu);
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("dec eBP");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xBP);
-}
-
-
-/** Opcode 0x4e. */
-FNIEMOP_DEF(iemOp_dec_eSI)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.rxw");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W;
-        pVCpu->iem.s.uRexReg   = 1 << 3;
-        pVCpu->iem.s.uRexIndex = 1 << 3;
-        iemRecalEffOpSize(pVCpu);
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("dec eSI");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xSI);
-}
-
-
-/** Opcode 0x4f. */
-FNIEMOP_DEF(iemOp_dec_eDI)
-{
-    /*
-     * This is a REX prefix in 64-bit mode.
-     */
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("rex.rbxw");
-        pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W;
-        pVCpu->iem.s.uRexReg   = 1 << 3;
-        pVCpu->iem.s.uRexB     = 1 << 3;
-        pVCpu->iem.s.uRexIndex = 1 << 3;
-        iemRecalEffOpSize(pVCpu);
-
-        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-    }
-
-    IEMOP_MNEMONIC("dec eDI");
-    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xDI);
-}
-
-
-/**
- * Common 'push register' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonPushGReg, uint8_t, iReg)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        iReg |= pVCpu->iem.s.uRexB;
-        pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
-        pVCpu->iem.s.enmEffOpSize = !(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? IEMMODE_64BIT : IEMMODE_16BIT;
-    }
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint16_t, u16Value);
-            IEM_MC_FETCH_GREG_U16(u16Value, iReg);
-            IEM_MC_PUSH_U16(u16Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_FETCH_GREG_U32(u32Value, iReg);
-            IEM_MC_PUSH_U32(u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_FETCH_GREG_U64(u64Value, iReg);
-            IEM_MC_PUSH_U64(u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-    }
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x50. */
-FNIEMOP_DEF(iemOp_push_eAX)
-{
-    IEMOP_MNEMONIC("push rAX");
-    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xAX);
-}
-
-
-/** Opcode 0x51. */
-FNIEMOP_DEF(iemOp_push_eCX)
-{
-    IEMOP_MNEMONIC("push rCX");
-    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xCX);
-}
-
-
-/** Opcode 0x52. */
-FNIEMOP_DEF(iemOp_push_eDX)
-{
-    IEMOP_MNEMONIC("push rDX");
-    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xDX);
-}
-
-
-/** Opcode 0x53. */
-FNIEMOP_DEF(iemOp_push_eBX)
-{
-    IEMOP_MNEMONIC("push rBX");
-    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xBX);
-}
-
-
-/** Opcode 0x54. */
-FNIEMOP_DEF(iemOp_push_eSP)
-{
-    IEMOP_MNEMONIC("push rSP");
-    if (IEM_GET_TARGET_CPU(pVCpu) == IEMTARGETCPU_8086)
-    {
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(uint16_t, u16Value);
-        IEM_MC_FETCH_GREG_U16(u16Value, X86_GREG_xSP);
-        IEM_MC_SUB_LOCAL_U16(u16Value, 2);
-        IEM_MC_PUSH_U16(u16Value);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xSP);
-}
-
-
-/** Opcode 0x55. */
-FNIEMOP_DEF(iemOp_push_eBP)
-{
-    IEMOP_MNEMONIC("push rBP");
-    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xBP);
-}
-
-
-/** Opcode 0x56. */
-FNIEMOP_DEF(iemOp_push_eSI)
-{
-    IEMOP_MNEMONIC("push rSI");
-    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xSI);
-}
-
-
-/** Opcode 0x57. */
-FNIEMOP_DEF(iemOp_push_eDI)
-{
-    IEMOP_MNEMONIC("push rDI");
-    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xDI);
-}
-
-
-/**
- * Common 'pop register' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonPopGReg, uint8_t, iReg)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        iReg |= pVCpu->iem.s.uRexB;
-        pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
-        pVCpu->iem.s.enmEffOpSize = !(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? IEMMODE_64BIT : IEMMODE_16BIT;
-    }
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint16_t *, pu16Dst);
-            IEM_MC_REF_GREG_U16(pu16Dst, iReg);
-            IEM_MC_POP_U16(pu16Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t *, pu32Dst);
-            IEM_MC_REF_GREG_U32(pu32Dst, iReg);
-            IEM_MC_POP_U32(pu32Dst);
-            IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst); /** @todo testcase*/
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t *, pu64Dst);
-            IEM_MC_REF_GREG_U64(pu64Dst, iReg);
-            IEM_MC_POP_U64(pu64Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-    }
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x58. */
-FNIEMOP_DEF(iemOp_pop_eAX)
-{
-    IEMOP_MNEMONIC("pop rAX");
-    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xAX);
-}
-
-
-/** Opcode 0x59. */
-FNIEMOP_DEF(iemOp_pop_eCX)
-{
-    IEMOP_MNEMONIC("pop rCX");
-    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xCX);
-}
-
-
-/** Opcode 0x5a. */
-FNIEMOP_DEF(iemOp_pop_eDX)
-{
-    IEMOP_MNEMONIC("pop rDX");
-    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xDX);
-}
-
-
-/** Opcode 0x5b. */
-FNIEMOP_DEF(iemOp_pop_eBX)
-{
-    IEMOP_MNEMONIC("pop rBX");
-    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xBX);
-}
-
-
-/** Opcode 0x5c. */
-FNIEMOP_DEF(iemOp_pop_eSP)
-{
-    IEMOP_MNEMONIC("pop rSP");
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-    {
-        if (pVCpu->iem.s.uRexB)
-            return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xSP);
-        pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
-        pVCpu->iem.s.enmEffOpSize = !(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? IEMMODE_64BIT : IEMMODE_16BIT;
-    }
-
-    IEMOP_HLP_DECODED_NL_1(OP_POP, IEMOPFORM_FIXED, OP_PARM_REG_ESP,
-                           DISOPTYPE_HARMLESS | DISOPTYPE_DEFAULT_64_OP_SIZE | DISOPTYPE_REXB_EXTENDS_OPREG);
-    /** @todo add testcase for this instruction. */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint16_t, u16Dst);
-            IEM_MC_POP_U16(&u16Dst); /** @todo not correct MC, fix later. */
-            IEM_MC_STORE_GREG_U16(X86_GREG_xSP, u16Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32Dst);
-            IEM_MC_POP_U32(&u32Dst);
-            IEM_MC_STORE_GREG_U32(X86_GREG_xSP, u32Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64Dst);
-            IEM_MC_POP_U64(&u64Dst);
-            IEM_MC_STORE_GREG_U64(X86_GREG_xSP, u64Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-    }
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x5d. */
-FNIEMOP_DEF(iemOp_pop_eBP)
-{
-    IEMOP_MNEMONIC("pop rBP");
-    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xBP);
-}
-
-
-/** Opcode 0x5e. */
-FNIEMOP_DEF(iemOp_pop_eSI)
-{
-    IEMOP_MNEMONIC("pop rSI");
-    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xSI);
-}
-
-
-/** Opcode 0x5f. */
-FNIEMOP_DEF(iemOp_pop_eDI)
-{
-    IEMOP_MNEMONIC("pop rDI");
-    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xDI);
-}
-
-
-/** Opcode 0x60. */
-FNIEMOP_DEF(iemOp_pusha)
-{
-    IEMOP_MNEMONIC("pusha");
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_NO_64BIT();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_pusha_16);
-    Assert(pVCpu->iem.s.enmEffOpSize == IEMMODE_32BIT);
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_pusha_32);
-}
-
-
-/** Opcode 0x61. */
-FNIEMOP_DEF(iemOp_popa)
-{
-    IEMOP_MNEMONIC("popa");
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_NO_64BIT();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_popa_16);
-    Assert(pVCpu->iem.s.enmEffOpSize == IEMMODE_32BIT);
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_popa_32);
-}
-
-
-/** Opcode 0x62. */
-FNIEMOP_STUB(iemOp_bound_Gv_Ma_evex);
-//    IEMOP_HLP_MIN_186();
-
-
-/** Opcode 0x63 - non-64-bit modes. */
-FNIEMOP_DEF(iemOp_arpl_Ew_Gw)
-{
-    IEMOP_MNEMONIC("arpl Ew,Gw");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* Register */
-        IEMOP_HLP_DECODED_NL_2(OP_ARPL, IEMOPFORM_MR_REG, OP_PARM_Ew, OP_PARM_Gw, DISOPTYPE_HARMLESS);
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint16_t *,      pu16Dst,    0);
-        IEM_MC_ARG(uint16_t,        u16Src,     1);
-        IEM_MC_ARG(uint32_t *,      pEFlags,    2);
-
-        IEM_MC_FETCH_GREG_U16(u16Src, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-        IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK));
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_arpl, pu16Dst, u16Src, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* Memory */
-        IEM_MC_BEGIN(3, 2);
-        IEM_MC_ARG(uint16_t *, pu16Dst,          0);
-        IEM_MC_ARG(uint16_t,   u16Src,           1);
-        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DECODED_NL_2(OP_ARPL, IEMOPFORM_MR_REG, OP_PARM_Ew, OP_PARM_Gw, DISOPTYPE_HARMLESS);
-        IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_GREG_U16(u16Src, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_arpl, pu16Dst, u16Src, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-
-}
-
-
-/** Opcode 0x63.
- * @note This is a weird one. It works like a regular move instruction if
- *       REX.W isn't set, at least according to AMD docs (rev 3.15, 2009-11).
- * @todo This definitely needs a testcase to verify the odd cases.  */
-FNIEMOP_DEF(iemOp_movsxd_Gv_Ev)
-{
-    Assert(pVCpu->iem.s.enmEffOpSize == IEMMODE_64BIT); /* Caller branched already . */
-
-    IEMOP_MNEMONIC("movsxd Gv,Ev");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * Register to register.
-         */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(uint64_t, u64Value);
-        IEM_MC_FETCH_GREG_U32_SX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint64_t, u64Value);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_MEM_U32_SX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-        IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x64. */
-FNIEMOP_DEF(iemOp_seg_FS)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg fs");
-    IEMOP_HLP_MIN_386();
-
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SEG_FS;
-    pVCpu->iem.s.iEffSeg    = X86_SREG_FS;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0x65. */
-FNIEMOP_DEF(iemOp_seg_GS)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg gs");
-    IEMOP_HLP_MIN_386();
-
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SEG_GS;
-    pVCpu->iem.s.iEffSeg    = X86_SREG_GS;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0x66. */
-FNIEMOP_DEF(iemOp_op_size)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("op size");
-    IEMOP_HLP_MIN_386();
-
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SIZE_OP;
-    iemRecalEffOpSize(pVCpu);
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0x67. */
-FNIEMOP_DEF(iemOp_addr_size)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("addr size");
-    IEMOP_HLP_MIN_386();
-
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SIZE_ADDR;
-    switch (pVCpu->iem.s.enmDefAddrMode)
-    {
-        case IEMMODE_16BIT: pVCpu->iem.s.enmEffAddrMode = IEMMODE_32BIT; break;
-        case IEMMODE_32BIT: pVCpu->iem.s.enmEffAddrMode = IEMMODE_16BIT; break;
-        case IEMMODE_64BIT: pVCpu->iem.s.enmEffAddrMode = IEMMODE_32BIT; break;
-        default: AssertFailed();
-    }
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0x68. */
-FNIEMOP_DEF(iemOp_push_Iz)
-{
-    IEMOP_MNEMONIC("push Iz");
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-        {
-            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_PUSH_U16(u16Imm);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-        }
-
-        case IEMMODE_32BIT:
-        {
-            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_PUSH_U32(u32Imm);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-        }
-
-        case IEMMODE_64BIT:
-        {
-            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_PUSH_U64(u64Imm);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-        }
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x69. */
-FNIEMOP_DEF(iemOp_imul_Gv_Ev_Iz)
-{
-    IEMOP_MNEMONIC("imul Gv,Ev,Iz"); /* Gv = Ev * Iz; */
-    IEMOP_HLP_MIN_186();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-        {
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register operand */
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,            0);
-                IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/ u16Imm,1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-                IEM_MC_LOCAL(uint16_t,      u16Tmp);
-
-                IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_LOCAL(pu16Dst, u16Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory operand */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,            0);
-                IEM_MC_ARG(uint16_t,        u16Src,             1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-                IEM_MC_LOCAL(uint16_t,      u16Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 2);
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-                IEM_MC_ASSIGN(u16Src, u16Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_LOCAL(pu16Dst, u16Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-        }
-
-        case IEMMODE_32BIT:
-        {
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register operand */
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,            0);
-                IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/ u32Imm,1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-                IEM_MC_LOCAL(uint32_t,      u32Tmp);
-
-                IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_LOCAL(pu32Dst, u32Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory operand */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,            0);
-                IEM_MC_ARG(uint32_t,        u32Src,             1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-                IEM_MC_LOCAL(uint32_t,      u32Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 4);
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-                IEM_MC_ASSIGN(u32Src, u32Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_LOCAL(pu32Dst, u32Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-        }
-
-        case IEMMODE_64BIT:
-        {
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register operand */
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,            0);
-                IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/ u64Imm,1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-                IEM_MC_LOCAL(uint64_t,      u64Tmp);
-
-                IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_LOCAL(pu64Dst, u64Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory operand */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,            0);
-                IEM_MC_ARG(uint64_t,        u64Src,             1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-                IEM_MC_LOCAL(uint64_t,      u64Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 4);
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-                IEM_MC_ASSIGN(u64Src, u64Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_LOCAL(pu64Dst, u64Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-        }
-    }
-    AssertFailedReturn(VERR_IEM_IPE_9);
-}
-
-
-/** Opcode 0x6a. */
-FNIEMOP_DEF(iemOp_push_Ib)
-{
-    IEMOP_MNEMONIC("push Ib");
-    IEMOP_HLP_MIN_186();
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0,0);
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_PUSH_U16(i8Imm);
-            break;
-        case IEMMODE_32BIT:
-            IEM_MC_PUSH_U32(i8Imm);
-            break;
-        case IEMMODE_64BIT:
-            IEM_MC_PUSH_U64(i8Imm);
-            break;
-    }
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x6b. */
-FNIEMOP_DEF(iemOp_imul_Gv_Ev_Ib)
-{
-    IEMOP_MNEMONIC("imul Gv,Ev,Ib"); /* Gv = Ev * Iz; */
-    IEMOP_HLP_MIN_186();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register operand */
-                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
-                IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/ (int8_t)u8Imm, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-                IEM_MC_LOCAL(uint16_t,      u16Tmp);
-
-                IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_LOCAL(pu16Dst, u16Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory operand */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
-                IEM_MC_ARG(uint16_t,        u16Src,                     1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-                IEM_MC_LOCAL(uint16_t,      u16Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16Imm);
-                IEM_MC_ASSIGN(u16Src, u16Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_LOCAL(pu16Dst, u16Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register operand */
-                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
-                IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/ (int8_t)u8Imm, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-                IEM_MC_LOCAL(uint32_t,      u32Tmp);
-
-                IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_LOCAL(pu32Dst, u32Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory operand */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
-                IEM_MC_ARG(uint32_t,        u32Src,                     1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-                IEM_MC_LOCAL(uint32_t,      u32Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_S8_SX_U32(&u32Imm);
-                IEM_MC_ASSIGN(u32Src, u32Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_LOCAL(pu32Dst, u32Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register operand */
-                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
-                IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/ (int8_t)u8Imm, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-                IEM_MC_LOCAL(uint64_t,      u64Tmp);
-
-                IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_LOCAL(pu64Dst, u64Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory operand */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
-                IEM_MC_ARG(uint64_t,        u64Src,                     1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-                IEM_MC_LOCAL(uint64_t,      u64Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S8_SX_U64(&u64Imm);
-                IEM_MC_ASSIGN(u64Src, u64Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_LOCAL(pu64Dst, u64Tmp);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-    }
-    AssertFailedReturn(VERR_IEM_IPE_8);
-}
-
-
-/** Opcode 0x6c. */
-FNIEMOP_DEF(iemOp_insb_Yb_DX)
-{
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        IEMOP_MNEMONIC("rep ins Yb,DX");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op8_addr16, false);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op8_addr32, false);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op8_addr64, false);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        IEMOP_MNEMONIC("ins Yb,DX");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op8_addr16, false);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op8_addr32, false);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op8_addr64, false);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x6d. */
-FNIEMOP_DEF(iemOp_inswd_Yv_DX)
-{
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-    {
-        IEMOP_MNEMONIC("rep ins Yv,DX");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op16_addr16, false);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op16_addr32, false);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op16_addr64, false);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_64BIT:
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op32_addr16, false);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op32_addr32, false);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op32_addr64, false);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        IEMOP_MNEMONIC("ins Yv,DX");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op16_addr16, false);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op16_addr32, false);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op16_addr64, false);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_64BIT:
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op32_addr16, false);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op32_addr32, false);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op32_addr64, false);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x6e. */
-FNIEMOP_DEF(iemOp_outsb_Yb_DX)
-{
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        IEMOP_MNEMONIC("rep outs DX,Yb");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op8_addr16, pVCpu->iem.s.iEffSeg, false);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op8_addr32, pVCpu->iem.s.iEffSeg, false);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op8_addr64, pVCpu->iem.s.iEffSeg, false);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        IEMOP_MNEMONIC("outs DX,Yb");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op8_addr16, pVCpu->iem.s.iEffSeg, false);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op8_addr32, pVCpu->iem.s.iEffSeg, false);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op8_addr64, pVCpu->iem.s.iEffSeg, false);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x6f. */
-FNIEMOP_DEF(iemOp_outswd_Yv_DX)
-{
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-    {
-        IEMOP_MNEMONIC("rep outs DX,Yv");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op16_addr16, pVCpu->iem.s.iEffSeg, false);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op16_addr32, pVCpu->iem.s.iEffSeg, false);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op16_addr64, pVCpu->iem.s.iEffSeg, false);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_64BIT:
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op32_addr16, pVCpu->iem.s.iEffSeg, false);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op32_addr32, pVCpu->iem.s.iEffSeg, false);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op32_addr64, pVCpu->iem.s.iEffSeg, false);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        IEMOP_MNEMONIC("outs DX,Yv");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op16_addr16, pVCpu->iem.s.iEffSeg, false);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op16_addr32, pVCpu->iem.s.iEffSeg, false);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op16_addr64, pVCpu->iem.s.iEffSeg, false);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_64BIT:
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op32_addr16, pVCpu->iem.s.iEffSeg, false);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op32_addr32, pVCpu->iem.s.iEffSeg, false);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op32_addr64, pVCpu->iem.s.iEffSeg, false);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x70. */
-FNIEMOP_DEF(iemOp_jo_Jb)
-{
-    IEMOP_MNEMONIC("jo  Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ELSE() {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x71. */
-FNIEMOP_DEF(iemOp_jno_Jb)
-{
-    IEMOP_MNEMONIC("jno Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ELSE() {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-/** Opcode 0x72. */
-FNIEMOP_DEF(iemOp_jc_Jb)
-{
-    IEMOP_MNEMONIC("jc/jnae Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ELSE() {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x73. */
-FNIEMOP_DEF(iemOp_jnc_Jb)
-{
-    IEMOP_MNEMONIC("jnc/jnb Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ELSE() {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x74. */
-FNIEMOP_DEF(iemOp_je_Jb)
-{
-    IEMOP_MNEMONIC("je/jz   Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ELSE() {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x75. */
-FNIEMOP_DEF(iemOp_jne_Jb)
-{
-    IEMOP_MNEMONIC("jne/jnz Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ELSE() {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x76. */
-FNIEMOP_DEF(iemOp_jbe_Jb)
-{
-    IEMOP_MNEMONIC("jbe/jna Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ELSE() {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x77. */
-FNIEMOP_DEF(iemOp_jnbe_Jb)
-{
-    IEMOP_MNEMONIC("jnbe/ja Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ELSE() {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x78. */
-FNIEMOP_DEF(iemOp_js_Jb)
-{
-    IEMOP_MNEMONIC("js  Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ELSE() {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x79. */
-FNIEMOP_DEF(iemOp_jns_Jb)
-{
-    IEMOP_MNEMONIC("jns Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ELSE() {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x7a. */
-FNIEMOP_DEF(iemOp_jp_Jb)
-{
-    IEMOP_MNEMONIC("jp  Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ELSE() {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x7b. */
-FNIEMOP_DEF(iemOp_jnp_Jb)
-{
-    IEMOP_MNEMONIC("jnp Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ELSE() {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x7c. */
-FNIEMOP_DEF(iemOp_jl_Jb)
-{
-    IEMOP_MNEMONIC("jl/jnge Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ELSE() {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x7d. */
-FNIEMOP_DEF(iemOp_jnl_Jb)
-{
-    IEMOP_MNEMONIC("jnl/jge Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ELSE() {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x7e. */
-FNIEMOP_DEF(iemOp_jle_Jb)
-{
-    IEMOP_MNEMONIC("jle/jng Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ELSE() {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x7f. */
-FNIEMOP_DEF(iemOp_jnle_Jb)
-{
-    IEMOP_MNEMONIC("jnle/jg Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-        IEM_MC_ADVANCE_RIP();
-    } IEM_MC_ELSE() {
-        IEM_MC_REL_JMP_S8(i8Imm);
-    } IEM_MC_ENDIF();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x80. */
-FNIEMOP_DEF(iemOp_Grp1_Eb_Ib_80)
-{
-    uint8_t bRm;   IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Eb,Ib");
-    PCIEMOPBINSIZES pImpl = g_apIemImplGrp1[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK];
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
-        IEM_MC_ARG_CONST(uint8_t,   u8Src, /*=*/ u8Imm,     1);
-        IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        uint32_t fAccess;
-        if (pImpl->pfnLockedU8)
-            fAccess = IEM_ACCESS_DATA_RW;
-        else /* CMP */
-            fAccess = IEM_ACCESS_DATA_R;
-        IEM_MC_BEGIN(3, 2);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
-        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        2);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-        IEM_MC_ARG_CONST(uint8_t,   u8Src, /*=*/ u8Imm,     1);
-        if (pImpl->pfnLockedU8)
-            IEMOP_HLP_DONE_DECODING();
-        else
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_MEM_MAP(pu8Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU8, pu8Dst, u8Src, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, fAccess);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x81. */
-FNIEMOP_DEF(iemOp_Grp1_Ev_Iz)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Ev,Iz");
-    PCIEMOPBINSIZES pImpl = g_apIemImplGrp1[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK];
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-        {
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register target */
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG_CONST(uint16_t,  u16Src, /*=*/ u16Imm,   1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory target */
-                uint32_t fAccess;
-                if (pImpl->pfnLockedU16)
-                    fAccess = IEM_ACCESS_DATA_RW;
-                else /* CMP, TEST */
-                    fAccess = IEM_ACCESS_DATA_R;
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,        u16Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 2);
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-                IEM_MC_ASSIGN(u16Src, u16Imm);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu16Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            break;
-        }
-
-        case IEMMODE_32BIT:
-        {
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register target */
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG_CONST(uint32_t,  u32Src, /*=*/ u32Imm,   1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory target */
-                uint32_t fAccess;
-                if (pImpl->pfnLockedU32)
-                    fAccess = IEM_ACCESS_DATA_RW;
-                else /* CMP, TEST */
-                    fAccess = IEM_ACCESS_DATA_R;
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,        u32Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 4);
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-                IEM_MC_ASSIGN(u32Src, u32Imm);
-                if (pImpl->pfnLockedU32)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu32Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            break;
-        }
-
-        case IEMMODE_64BIT:
-        {
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* register target */
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG_CONST(uint64_t,  u64Src, /*=*/ u64Imm,   1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* memory target */
-                uint32_t fAccess;
-                if (pImpl->pfnLockedU64)
-                    fAccess = IEM_ACCESS_DATA_RW;
-                else /* CMP */
-                    fAccess = IEM_ACCESS_DATA_R;
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,        u64Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 4);
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-                if (pImpl->pfnLockedU64)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ASSIGN(u64Src, u64Imm);
-                IEM_MC_MEM_MAP(pu64Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            break;
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x82. */
-FNIEMOP_DEF(iemOp_Grp1_Eb_Ib_82)
-{
-    IEMOP_HLP_NO_64BIT(); /** @todo do we need to decode the whole instruction or is this ok? */
-    return FNIEMOP_CALL(iemOp_Grp1_Eb_Ib_80);
-}
-
-
-/** Opcode 0x83. */
-FNIEMOP_DEF(iemOp_Grp1_Ev_Ib)
-{
-    uint8_t bRm;   IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Ev,Ib");
-    /* Note! Seems the OR, AND, and XOR instructions are present on CPUs prior
-             to the 386 even if absent in the intel reference manuals and some
-             3rd party opcode listings. */
-    PCIEMOPBINSIZES pImpl = g_apIemImplGrp1[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK];
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * Register target
-         */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
-                IEM_MC_ARG_CONST(uint16_t,  u16Src, /*=*/ (int8_t)u8Imm,1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-            }
-
-            case IEMMODE_32BIT:
-            {
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
-                IEM_MC_ARG_CONST(uint32_t,  u32Src, /*=*/ (int8_t)u8Imm,1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-            }
-
-            case IEMMODE_64BIT:
-            {
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
-                IEM_MC_ARG_CONST(uint64_t,  u64Src, /*=*/ (int8_t)u8Imm,1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-            }
-        }
-    }
-    else
-    {
-        /*
-         * Memory target.
-         */
-        uint32_t fAccess;
-        if (pImpl->pfnLockedU16)
-            fAccess = IEM_ACCESS_DATA_RW;
-        else /* CMP */
-            fAccess = IEM_ACCESS_DATA_R;
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
-                IEM_MC_ARG(uint16_t,        u16Src,                     1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,            2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-                IEM_MC_ASSIGN(u16Src, (int8_t)u8Imm);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu16Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-            }
-
-            case IEMMODE_32BIT:
-            {
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
-                IEM_MC_ARG(uint32_t,        u32Src,                     1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,            2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-                IEM_MC_ASSIGN(u32Src, (int8_t)u8Imm);
-                if (pImpl->pfnLockedU32)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu32Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-            }
-
-            case IEMMODE_64BIT:
-            {
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
-                IEM_MC_ARG(uint64_t,        u64Src,                     1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,            2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-                IEM_MC_ASSIGN(u64Src, (int8_t)u8Imm);
-                if (pImpl->pfnLockedU64)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu64Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-            }
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x84. */
-FNIEMOP_DEF(iemOp_test_Eb_Gb)
-{
-    IEMOP_MNEMONIC("test Eb,Gb");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_test);
-}
-
-
-/** Opcode 0x85. */
-FNIEMOP_DEF(iemOp_test_Ev_Gv)
-{
-    IEMOP_MNEMONIC("test Ev,Gv");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_test);
-}
-
-
-/** Opcode 0x86. */
-FNIEMOP_DEF(iemOp_xchg_Eb_Gb)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_MNEMONIC("xchg Eb,Gb");
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint8_t, uTmp1);
-        IEM_MC_LOCAL(uint8_t, uTmp2);
-
-        IEM_MC_FETCH_GREG_U8(uTmp1, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_FETCH_GREG_U8(uTmp2, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_STORE_GREG_U8((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,                              uTmp1);
-        IEM_MC_STORE_GREG_U8(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uTmp2);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         */
-/** @todo the register must be committed separately! */
-        IEM_MC_BEGIN(2, 2);
-        IEM_MC_ARG(uint8_t *,  pu8Mem,           0);
-        IEM_MC_ARG(uint8_t *,  pu8Reg,           1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEM_MC_MEM_MAP(pu8Mem, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_REF_GREG_U8(pu8Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_CALL_VOID_AIMPL_2(iemAImpl_xchg_u8, pu8Mem, pu8Reg);
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Mem, IEM_ACCESS_DATA_RW);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x87. */
-FNIEMOP_DEF(iemOp_xchg_Ev_Gv)
-{
-    IEMOP_MNEMONIC("xchg Ev,Gv");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint16_t, uTmp1);
-                IEM_MC_LOCAL(uint16_t, uTmp2);
-
-                IEM_MC_FETCH_GREG_U16(uTmp1, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U16(uTmp2, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,                              uTmp1);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uTmp2);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, uTmp1);
-                IEM_MC_LOCAL(uint32_t, uTmp2);
-
-                IEM_MC_FETCH_GREG_U32(uTmp1, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U32(uTmp2, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,                              uTmp1);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uTmp2);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, uTmp1);
-                IEM_MC_LOCAL(uint64_t, uTmp2);
-
-                IEM_MC_FETCH_GREG_U64(uTmp1, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U64(uTmp2, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,                              uTmp1);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uTmp2);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-/** @todo the register must be committed separately! */
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint16_t *,  pu16Mem, 0);
-                IEM_MC_ARG(uint16_t *,  pu16Reg, 1);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu16Mem, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_REF_GREG_U16(pu16Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_VOID_AIMPL_2(iemAImpl_xchg_u16, pu16Mem, pu16Reg);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Mem, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint32_t *,  pu32Mem, 0);
-                IEM_MC_ARG(uint32_t *,  pu32Reg, 1);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu32Mem, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_REF_GREG_U32(pu32Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_VOID_AIMPL_2(iemAImpl_xchg_u32, pu32Mem, pu32Reg);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Mem, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Reg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint64_t *,  pu64Mem, 0);
-                IEM_MC_ARG(uint64_t *,  pu64Reg, 1);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu64Mem, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_REF_GREG_U64(pu64Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_VOID_AIMPL_2(iemAImpl_xchg_u64, pu64Mem, pu64Reg);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Mem, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x88. */
-FNIEMOP_DEF(iemOp_mov_Eb_Gb)
-{
-    IEMOP_MNEMONIC("mov Eb,Gb");
-
-    uint8_t bRm;
-    IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(uint8_t, u8Value);
-        IEM_MC_FETCH_GREG_U8(u8Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_STORE_GREG_U8((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u8Value);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're writing a register to memory.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint8_t, u8Value);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_GREG_U8(u8Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_STORE_MEM_U8(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u8Value);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-
-}
-
-
-/** Opcode 0x89. */
-FNIEMOP_DEF(iemOp_mov_Ev_Gv)
-{
-    IEMOP_MNEMONIC("mov Ev,Gv");
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_FETCH_GREG_U16(u16Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_FETCH_GREG_U32(u32Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_FETCH_GREG_U64(u64Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    else
-    {
-        /*
-         * We're writing a register to memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U16(u16Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U32(u32Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U64(u64Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x8a. */
-FNIEMOP_DEF(iemOp_mov_Gb_Eb)
-{
-    IEMOP_MNEMONIC("mov Gb,Eb");
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(uint8_t, u8Value);
-        IEM_MC_FETCH_GREG_U8(u8Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_STORE_GREG_U8(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u8Value);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint8_t, u8Value);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_MEM_U8(u8Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-        IEM_MC_STORE_GREG_U8(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u8Value);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x8b. */
-FNIEMOP_DEF(iemOp_mov_Gv_Ev)
-{
-    IEMOP_MNEMONIC("mov Gv,Ev");
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_FETCH_GREG_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_FETCH_GREG_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_FETCH_GREG_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x63. */
-FNIEMOP_DEF(iemOp_arpl_Ew_Gw_movsx_Gv_Ev)
-{
-    if (pVCpu->iem.s.enmCpuMode != IEMMODE_64BIT)
-        return FNIEMOP_CALL(iemOp_arpl_Ew_Gw);
-    if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        return FNIEMOP_CALL(iemOp_mov_Gv_Ev);
-    return FNIEMOP_CALL(iemOp_movsxd_Gv_Ev);
-}
-
-
-/** Opcode 0x8c. */
-FNIEMOP_DEF(iemOp_mov_Ev_Sw)
-{
-    IEMOP_MNEMONIC("mov Ev,Sw");
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * Check that the destination register exists. The REX.R prefix is ignored.
-     */
-    uint8_t const iSegReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-    if (   iSegReg > X86_SREG_GS)
-        return IEMOP_RAISE_INVALID_OPCODE(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     * In that case, the operand size is respected and the upper bits are
-     * cleared (starting with some pentium).
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_FETCH_SREG_U16(u16Value, iSegReg);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_FETCH_SREG_ZX_U32(u32Value, iSegReg);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_FETCH_SREG_ZX_U64(u64Value, iSegReg);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    else
-    {
-        /*
-         * We're saving the register to memory.  The access is word sized
-         * regardless of operand size prefixes.
-         */
-#if 0 /* not necessary */
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_16BIT;
-#endif
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint16_t,  u16Value);
-        IEM_MC_LOCAL(RTGCPTR,   GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_SREG_U16(u16Value, iSegReg);
-        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Value);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-
-
-/** Opcode 0x8d. */
-FNIEMOP_DEF(iemOp_lea_Gv_M)
-{
-    IEMOP_MNEMONIC("lea Gv,M");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE(); /* no register form */
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-            IEM_MC_LOCAL(uint16_t, u16Cast);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_ASSIGN_TO_SMALLER(u16Cast, GCPtrEffSrc);
-            IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Cast);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-            IEM_MC_LOCAL(uint32_t, u32Cast);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_ASSIGN_TO_SMALLER(u32Cast, GCPtrEffSrc);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Cast);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, GCPtrEffSrc);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-    }
-    AssertFailedReturn(VERR_IEM_IPE_7);
-}
-
-
-/** Opcode 0x8e. */
-FNIEMOP_DEF(iemOp_mov_Sw_Ev)
-{
-    IEMOP_MNEMONIC("mov Sw,Ev");
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * The practical operand size is 16-bit.
-     */
-#if 0 /* not necessary */
-    pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_16BIT;
-#endif
-
-    /*
-     * Check that the destination register exists and can be used with this
-     * instruction.  The REX.R prefix is ignored.
-     */
-    uint8_t const iSegReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-    if (   iSegReg == X86_SREG_CS
-        || iSegReg > X86_SREG_GS)
-        return IEMOP_RAISE_INVALID_OPCODE(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(2, 0);
-        IEM_MC_ARG_CONST(uint8_t, iSRegArg, iSegReg, 0);
-        IEM_MC_ARG(uint16_t,      u16Value,          1);
-        IEM_MC_FETCH_GREG_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_2(iemCImpl_load_SReg, iSRegArg, u16Value);
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're loading the register from memory.  The access is word sized
-         * regardless of operand size prefixes.
-         */
-        IEM_MC_BEGIN(2, 1);
-        IEM_MC_ARG_CONST(uint8_t, iSRegArg, iSegReg, 0);
-        IEM_MC_ARG(uint16_t,      u16Value,          1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_MEM_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-        IEM_MC_CALL_CIMPL_2(iemCImpl_load_SReg, iSRegArg, u16Value);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x8f /0. */
-FNIEMOP_DEF_1(iemOp_pop_Ev, uint8_t, bRm)
-{
-    /* This bugger is rather annoying as it requires rSP to be updated before
-       doing the effective address calculations.  Will eventually require a
-       split between the R/M+SIB decoding and the effective address
-       calculation - which is something that is required for any attempt at
-       reusing this code for a recompiler.  It may also be good to have if we
-       need to delay #UD exception caused by invalid lock prefixes.
-
-       For now, we'll do a mostly safe interpreter-only implementation here. */
-    /** @todo What's the deal with the 'reg' field and pop Ev?  Ignorning it for
-     *        now until tests show it's checked.. */
-    IEMOP_MNEMONIC("pop Ev");
-
-    /* Register access is relatively easy and can share code. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return FNIEMOP_CALL_1(iemOpCommonPopGReg, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-
-    /*
-     * Memory target.
-     *
-     * Intel says that RSP is incremented before it's used in any effective
-     * address calcuations.  This means some serious extra annoyance here since
-     * we decode and calculate the effective address in one step and like to
-     * delay committing registers till everything is done.
-     *
-     * So, we'll decode and calculate the effective address twice.  This will
-     * require some recoding if turned into a recompiler.
-     */
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE(); /* The common code does this differently. */
-
-#ifndef TST_IEM_CHECK_MC
-    /* Calc effective address with modified ESP. */
-/** @todo testcase */
-    PCPUMCTX        pCtx     = IEM_GET_CTX(pVCpu);
-    RTGCPTR         GCPtrEff;
-    VBOXSTRICTRC    rcStrict;
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT: rcStrict = iemOpHlpCalcRmEffAddrEx(pVCpu, bRm, 0, &GCPtrEff, 2); break;
-        case IEMMODE_32BIT: rcStrict = iemOpHlpCalcRmEffAddrEx(pVCpu, bRm, 0, &GCPtrEff, 4); break;
-        case IEMMODE_64BIT: rcStrict = iemOpHlpCalcRmEffAddrEx(pVCpu, bRm, 0, &GCPtrEff, 8); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    if (rcStrict != VINF_SUCCESS)
-        return rcStrict;
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /* Perform the operation - this should be CImpl. */
-    RTUINT64U TmpRsp;
-    TmpRsp.u = pCtx->rsp;
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-        {
-            uint16_t u16Value;
-            rcStrict = iemMemStackPopU16Ex(pVCpu, &u16Value, &TmpRsp);
-            if (rcStrict == VINF_SUCCESS)
-                rcStrict = iemMemStoreDataU16(pVCpu, pVCpu->iem.s.iEffSeg, GCPtrEff, u16Value);
-            break;
-        }
-
-        case IEMMODE_32BIT:
-        {
-            uint32_t u32Value;
-            rcStrict = iemMemStackPopU32Ex(pVCpu, &u32Value, &TmpRsp);
-            if (rcStrict == VINF_SUCCESS)
-                rcStrict = iemMemStoreDataU32(pVCpu, pVCpu->iem.s.iEffSeg, GCPtrEff, u32Value);
-            break;
-        }
-
-        case IEMMODE_64BIT:
-        {
-            uint64_t u64Value;
-            rcStrict = iemMemStackPopU64Ex(pVCpu, &u64Value, &TmpRsp);
-            if (rcStrict == VINF_SUCCESS)
-                rcStrict = iemMemStoreDataU64(pVCpu, pVCpu->iem.s.iEffSeg, GCPtrEff, u64Value);
-            break;
-        }
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    if (rcStrict == VINF_SUCCESS)
-    {
-        pCtx->rsp = TmpRsp.u;
-        iemRegUpdateRipAndClearRF(pVCpu);
-    }
-    return rcStrict;
-
-#else
-    return VERR_IEM_IPE_2;
-#endif
-}
-
-
-/** Opcode 0x8f. */
-FNIEMOP_DEF(iemOp_Grp1A)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_REG_MASK) == (0 << X86_MODRM_REG_SHIFT)) /* /0 */
-        return FNIEMOP_CALL_1(iemOp_pop_Ev, bRm);
-
-    /* AMD has defined /1 thru /7 as XOP prefix (similar to three byte VEX). */
-    /** @todo XOP decoding. */
-    IEMOP_MNEMONIC("3-byte-xop");
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/**
- * Common 'xchg reg,rAX' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonXchgGRegRax, uint8_t, iReg)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    iReg |= pVCpu->iem.s.uRexB;
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint16_t, u16Tmp1);
-            IEM_MC_LOCAL(uint16_t, u16Tmp2);
-            IEM_MC_FETCH_GREG_U16(u16Tmp1, iReg);
-            IEM_MC_FETCH_GREG_U16(u16Tmp2, X86_GREG_xAX);
-            IEM_MC_STORE_GREG_U16(X86_GREG_xAX, u16Tmp1);
-            IEM_MC_STORE_GREG_U16(iReg,         u16Tmp2);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint32_t, u32Tmp1);
-            IEM_MC_LOCAL(uint32_t, u32Tmp2);
-            IEM_MC_FETCH_GREG_U32(u32Tmp1, iReg);
-            IEM_MC_FETCH_GREG_U32(u32Tmp2, X86_GREG_xAX);
-            IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u32Tmp1);
-            IEM_MC_STORE_GREG_U32(iReg,         u32Tmp2);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t, u64Tmp1);
-            IEM_MC_LOCAL(uint64_t, u64Tmp2);
-            IEM_MC_FETCH_GREG_U64(u64Tmp1, iReg);
-            IEM_MC_FETCH_GREG_U64(u64Tmp2, X86_GREG_xAX);
-            IEM_MC_STORE_GREG_U64(X86_GREG_xAX, u64Tmp1);
-            IEM_MC_STORE_GREG_U64(iReg,         u64Tmp2);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x90. */
-FNIEMOP_DEF(iemOp_nop)
-{
-    /* R8/R8D and RAX/EAX can be exchanged. */
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REX_B)
-    {
-        IEMOP_MNEMONIC("xchg r8,rAX");
-        return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xAX);
-    }
-
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK)
-        IEMOP_MNEMONIC("pause");
-    else
-        IEMOP_MNEMONIC("nop");
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x91. */
-FNIEMOP_DEF(iemOp_xchg_eCX_eAX)
-{
-    IEMOP_MNEMONIC("xchg rCX,rAX");
-    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xCX);
-}
-
-
-/** Opcode 0x92. */
-FNIEMOP_DEF(iemOp_xchg_eDX_eAX)
-{
-    IEMOP_MNEMONIC("xchg rDX,rAX");
-    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xDX);
-}
-
-
-/** Opcode 0x93. */
-FNIEMOP_DEF(iemOp_xchg_eBX_eAX)
-{
-    IEMOP_MNEMONIC("xchg rBX,rAX");
-    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xBX);
-}
-
-
-/** Opcode 0x94. */
-FNIEMOP_DEF(iemOp_xchg_eSP_eAX)
-{
-    IEMOP_MNEMONIC("xchg rSX,rAX");
-    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xSP);
-}
-
-
-/** Opcode 0x95. */
-FNIEMOP_DEF(iemOp_xchg_eBP_eAX)
-{
-    IEMOP_MNEMONIC("xchg rBP,rAX");
-    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xBP);
-}
-
-
-/** Opcode 0x96. */
-FNIEMOP_DEF(iemOp_xchg_eSI_eAX)
-{
-    IEMOP_MNEMONIC("xchg rSI,rAX");
-    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xSI);
-}
-
-
-/** Opcode 0x97. */
-FNIEMOP_DEF(iemOp_xchg_eDI_eAX)
-{
-    IEMOP_MNEMONIC("xchg rDI,rAX");
-    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xDI);
-}
-
-
-/** Opcode 0x98. */
-FNIEMOP_DEF(iemOp_cbw)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEMOP_MNEMONIC("cbw");
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 7) {
-                IEM_MC_OR_GREG_U16(X86_GREG_xAX, UINT16_C(0xff00));
-            } IEM_MC_ELSE() {
-                IEM_MC_AND_GREG_U16(X86_GREG_xAX, UINT16_C(0x00ff));
-            } IEM_MC_ENDIF();
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEMOP_MNEMONIC("cwde");
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 15) {
-                IEM_MC_OR_GREG_U32(X86_GREG_xAX, UINT32_C(0xffff0000));
-            } IEM_MC_ELSE() {
-                IEM_MC_AND_GREG_U32(X86_GREG_xAX, UINT32_C(0x0000ffff));
-            } IEM_MC_ENDIF();
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEMOP_MNEMONIC("cdqe");
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 31) {
-                IEM_MC_OR_GREG_U64(X86_GREG_xAX, UINT64_C(0xffffffff00000000));
-            } IEM_MC_ELSE() {
-                IEM_MC_AND_GREG_U64(X86_GREG_xAX, UINT64_C(0x00000000ffffffff));
-            } IEM_MC_ENDIF();
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x99. */
-FNIEMOP_DEF(iemOp_cwd)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEMOP_MNEMONIC("cwd");
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 15) {
-                IEM_MC_STORE_GREG_U16_CONST(X86_GREG_xDX, UINT16_C(0xffff));
-            } IEM_MC_ELSE() {
-                IEM_MC_STORE_GREG_U16_CONST(X86_GREG_xDX, 0);
-            } IEM_MC_ENDIF();
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEMOP_MNEMONIC("cdq");
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 31) {
-                IEM_MC_STORE_GREG_U32_CONST(X86_GREG_xDX, UINT32_C(0xffffffff));
-            } IEM_MC_ELSE() {
-                IEM_MC_STORE_GREG_U32_CONST(X86_GREG_xDX, 0);
-            } IEM_MC_ENDIF();
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEMOP_MNEMONIC("cqo");
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 63) {
-                IEM_MC_STORE_GREG_U64_CONST(X86_GREG_xDX, UINT64_C(0xffffffffffffffff));
-            } IEM_MC_ELSE() {
-                IEM_MC_STORE_GREG_U64_CONST(X86_GREG_xDX, 0);
-            } IEM_MC_ENDIF();
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x9a. */
-FNIEMOP_DEF(iemOp_call_Ap)
-{
-    IEMOP_MNEMONIC("call Ap");
-    IEMOP_HLP_NO_64BIT();
-
-    /* Decode the far pointer address and pass it on to the far call C implementation. */
-    uint32_t offSeg;
-    if (pVCpu->iem.s.enmEffOpSize != IEMMODE_16BIT)
-        IEM_OPCODE_GET_NEXT_U32(&offSeg);
-    else
-        IEM_OPCODE_GET_NEXT_U16_ZX_U32(&offSeg);
-    uint16_t uSel;  IEM_OPCODE_GET_NEXT_U16(&uSel);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_callf, uSel, offSeg, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x9b. (aka fwait) */
-FNIEMOP_DEF(iemOp_wait)
-{
-    IEMOP_MNEMONIC("wait");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x9c. */
-FNIEMOP_DEF(iemOp_pushf_Fv)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_pushf, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x9d. */
-FNIEMOP_DEF(iemOp_popf_Fv)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_popf, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x9e. */
-FNIEMOP_DEF(iemOp_sahf)
-{
-    IEMOP_MNEMONIC("sahf");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (   pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
-        && !IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fLahfSahf)
-        return IEMOP_RAISE_INVALID_OPCODE();
-    IEM_MC_BEGIN(0, 2);
-    IEM_MC_LOCAL(uint32_t, u32Flags);
-    IEM_MC_LOCAL(uint32_t, EFlags);
-    IEM_MC_FETCH_EFLAGS(EFlags);
-    IEM_MC_FETCH_GREG_U8_ZX_U32(u32Flags, X86_GREG_xSP/*=AH*/);
-    IEM_MC_AND_LOCAL_U32(u32Flags, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF);
-    IEM_MC_AND_LOCAL_U32(EFlags, UINT32_C(0xffffff00));
-    IEM_MC_OR_LOCAL_U32(u32Flags, X86_EFL_1);
-    IEM_MC_OR_2LOCS_U32(EFlags, u32Flags);
-    IEM_MC_COMMIT_EFLAGS(EFlags);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x9f. */
-FNIEMOP_DEF(iemOp_lahf)
-{
-    IEMOP_MNEMONIC("lahf");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (   pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
-        && !IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fLahfSahf)
-        return IEMOP_RAISE_INVALID_OPCODE();
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(uint8_t, u8Flags);
-    IEM_MC_FETCH_EFLAGS_U8(u8Flags);
-    IEM_MC_STORE_GREG_U8(X86_GREG_xSP/*=AH*/, u8Flags);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Macro used by iemOp_mov_Al_Ob, iemOp_mov_rAX_Ov, iemOp_mov_Ob_AL and
- * iemOp_mov_Ov_rAX to fetch the moffsXX bit of the opcode and fend of lock
- * prefixes.  Will return on failures.
- * @param   a_GCPtrMemOff   The variable to store the offset in.
- */
-#define IEMOP_FETCH_MOFFS_XX(a_GCPtrMemOff) \
-    do \
-    { \
-        switch (pVCpu->iem.s.enmEffAddrMode) \
-        { \
-            case IEMMODE_16BIT: \
-                IEM_OPCODE_GET_NEXT_U16_ZX_U64(&(a_GCPtrMemOff)); \
-                break; \
-            case IEMMODE_32BIT: \
-                IEM_OPCODE_GET_NEXT_U32_ZX_U64(&(a_GCPtrMemOff)); \
-                break; \
-            case IEMMODE_64BIT: \
-                IEM_OPCODE_GET_NEXT_U64(&(a_GCPtrMemOff)); \
-                break; \
-            IEM_NOT_REACHED_DEFAULT_CASE_RET(); \
-        } \
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX(); \
-    } while (0)
-
-/** Opcode 0xa0. */
-FNIEMOP_DEF(iemOp_mov_Al_Ob)
-{
-    /*
-     * Get the offset and fend of lock prefixes.
-     */
-    RTGCPTR GCPtrMemOff;
-    IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
-
-    /*
-     * Fetch AL.
-     */
-    IEM_MC_BEGIN(0,1);
-    IEM_MC_LOCAL(uint8_t, u8Tmp);
-    IEM_MC_FETCH_MEM_U8(u8Tmp, pVCpu->iem.s.iEffSeg, GCPtrMemOff);
-    IEM_MC_STORE_GREG_U8(X86_GREG_xAX, u8Tmp);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xa1. */
-FNIEMOP_DEF(iemOp_mov_rAX_Ov)
-{
-    /*
-     * Get the offset and fend of lock prefixes.
-     */
-    IEMOP_MNEMONIC("mov rAX,Ov");
-    RTGCPTR GCPtrMemOff;
-    IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
-
-    /*
-     * Fetch rAX.
-     */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0,1);
-            IEM_MC_LOCAL(uint16_t, u16Tmp);
-            IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrMemOff);
-            IEM_MC_STORE_GREG_U16(X86_GREG_xAX, u16Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0,1);
-            IEM_MC_LOCAL(uint32_t, u32Tmp);
-            IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrMemOff);
-            IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u32Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0,1);
-            IEM_MC_LOCAL(uint64_t, u64Tmp);
-            IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrMemOff);
-            IEM_MC_STORE_GREG_U64(X86_GREG_xAX, u64Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0xa2. */
-FNIEMOP_DEF(iemOp_mov_Ob_AL)
-{
-    /*
-     * Get the offset and fend of lock prefixes.
-     */
-    RTGCPTR GCPtrMemOff;
-    IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
-
-    /*
-     * Store AL.
-     */
-    IEM_MC_BEGIN(0,1);
-    IEM_MC_LOCAL(uint8_t, u8Tmp);
-    IEM_MC_FETCH_GREG_U8(u8Tmp, X86_GREG_xAX);
-    IEM_MC_STORE_MEM_U8(pVCpu->iem.s.iEffSeg, GCPtrMemOff, u8Tmp);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xa3. */
-FNIEMOP_DEF(iemOp_mov_Ov_rAX)
-{
-    /*
-     * Get the offset and fend of lock prefixes.
-     */
-    RTGCPTR GCPtrMemOff;
-    IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
-
-    /*
-     * Store rAX.
-     */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0,1);
-            IEM_MC_LOCAL(uint16_t, u16Tmp);
-            IEM_MC_FETCH_GREG_U16(u16Tmp, X86_GREG_xAX);
-            IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrMemOff, u16Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0,1);
-            IEM_MC_LOCAL(uint32_t, u32Tmp);
-            IEM_MC_FETCH_GREG_U32(u32Tmp, X86_GREG_xAX);
-            IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrMemOff, u32Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0,1);
-            IEM_MC_LOCAL(uint64_t, u64Tmp);
-            IEM_MC_FETCH_GREG_U64(u64Tmp, X86_GREG_xAX);
-            IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrMemOff, u64Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-/** Macro used by iemOp_movsb_Xb_Yb and iemOp_movswd_Xv_Yv */
-#define IEM_MOVS_CASE(ValBits, AddrBits) \
-        IEM_MC_BEGIN(0, 2); \
-        IEM_MC_LOCAL(uint##ValBits##_t, uValue); \
-        IEM_MC_LOCAL(RTGCPTR,           uAddr); \
-        IEM_MC_FETCH_GREG_U##AddrBits##_ZX_U64(uAddr, X86_GREG_xSI); \
-        IEM_MC_FETCH_MEM_U##ValBits(uValue, pVCpu->iem.s.iEffSeg, uAddr); \
-        IEM_MC_FETCH_GREG_U##AddrBits##_ZX_U64(uAddr, X86_GREG_xDI); \
-        IEM_MC_STORE_MEM_U##ValBits(X86_SREG_ES, uAddr, uValue); \
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_DF) { \
-            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
-            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xSI, ValBits / 8); \
-        } IEM_MC_ELSE() { \
-            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
-            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xSI, ValBits / 8); \
-        } IEM_MC_ENDIF(); \
-        IEM_MC_ADVANCE_RIP(); \
-        IEM_MC_END();
-
-/** Opcode 0xa4. */
-FNIEMOP_DEF(iemOp_movsb_Xb_Yb)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        IEMOP_MNEMONIC("rep movsb Xb,Yb");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op8_addr16, pVCpu->iem.s.iEffSeg);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op8_addr32, pVCpu->iem.s.iEffSeg);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op8_addr64, pVCpu->iem.s.iEffSeg);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("movsb Xb,Yb");
-
-    /*
-     * Sharing case implementation with movs[wdq] below.
-     */
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT: IEM_MOVS_CASE(8, 16); break;
-        case IEMMODE_32BIT: IEM_MOVS_CASE(8, 32); break;
-        case IEMMODE_64BIT: IEM_MOVS_CASE(8, 64); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xa5. */
-FNIEMOP_DEF(iemOp_movswd_Xv_Yv)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        IEMOP_MNEMONIC("rep movs Xv,Yv");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op16_addr16, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op16_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op16_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op32_addr16, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op32_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op32_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            case IEMMODE_64BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_6);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op64_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op64_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("movs Xv,Yv");
-
-    /*
-     * Annoying double switch here.
-     * Using ugly macro for implementing the cases, sharing it with movsb.
-     */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_MOVS_CASE(16, 16); break;
-                case IEMMODE_32BIT: IEM_MOVS_CASE(16, 32); break;
-                case IEMMODE_64BIT: IEM_MOVS_CASE(16, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_32BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_MOVS_CASE(32, 16); break;
-                case IEMMODE_32BIT: IEM_MOVS_CASE(32, 32); break;
-                case IEMMODE_64BIT: IEM_MOVS_CASE(32, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_64BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_1); /* cannot be encoded */ break;
-                case IEMMODE_32BIT: IEM_MOVS_CASE(64, 32); break;
-                case IEMMODE_64BIT: IEM_MOVS_CASE(64, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-}
-
-#undef IEM_MOVS_CASE
-
-/** Macro used by iemOp_cmpsb_Xb_Yb and iemOp_cmpswd_Xv_Yv */
-#define IEM_CMPS_CASE(ValBits, AddrBits) \
-        IEM_MC_BEGIN(3, 3); \
-        IEM_MC_ARG(uint##ValBits##_t *, puValue1, 0); \
-        IEM_MC_ARG(uint##ValBits##_t,   uValue2,  1); \
-        IEM_MC_ARG(uint32_t *,          pEFlags,  2); \
-        IEM_MC_LOCAL(uint##ValBits##_t, uValue1); \
-        IEM_MC_LOCAL(RTGCPTR,           uAddr); \
-        \
-        IEM_MC_FETCH_GREG_U##AddrBits##_ZX_U64(uAddr, X86_GREG_xSI); \
-        IEM_MC_FETCH_MEM_U##ValBits(uValue1, pVCpu->iem.s.iEffSeg, uAddr); \
-        IEM_MC_FETCH_GREG_U##AddrBits##_ZX_U64(uAddr, X86_GREG_xDI); \
-        IEM_MC_FETCH_MEM_U##ValBits(uValue2, X86_SREG_ES, uAddr); \
-        IEM_MC_REF_LOCAL(puValue1, uValue1); \
-        IEM_MC_REF_EFLAGS(pEFlags); \
-        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_cmp_u##ValBits, puValue1, uValue2, pEFlags); \
-        \
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_DF) { \
-            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
-            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xSI, ValBits / 8); \
-        } IEM_MC_ELSE() { \
-            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
-            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xSI, ValBits / 8); \
-        } IEM_MC_ENDIF(); \
-        IEM_MC_ADVANCE_RIP(); \
-        IEM_MC_END(); \
-
-/** Opcode 0xa6. */
-FNIEMOP_DEF(iemOp_cmpsb_Xb_Yb)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPZ)
-    {
-        IEMOP_MNEMONIC("repe cmps Xb,Yb");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op8_addr16, pVCpu->iem.s.iEffSeg);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op8_addr32, pVCpu->iem.s.iEffSeg);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op8_addr64, pVCpu->iem.s.iEffSeg);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPNZ)
-    {
-        IEMOP_MNEMONIC("repe cmps Xb,Yb");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op8_addr16, pVCpu->iem.s.iEffSeg);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op8_addr32, pVCpu->iem.s.iEffSeg);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op8_addr64, pVCpu->iem.s.iEffSeg);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("cmps Xb,Yb");
-
-    /*
-     * Sharing case implementation with cmps[wdq] below.
-     */
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT: IEM_CMPS_CASE(8, 16); break;
-        case IEMMODE_32BIT: IEM_CMPS_CASE(8, 32); break;
-        case IEMMODE_64BIT: IEM_CMPS_CASE(8, 64); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-
-}
-
-
-/** Opcode 0xa7. */
-FNIEMOP_DEF(iemOp_cmpswd_Xv_Yv)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPZ)
-    {
-        IEMOP_MNEMONIC("repe cmps Xv,Yv");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op16_addr16, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op16_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op16_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op32_addr16, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op32_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op32_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            case IEMMODE_64BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_4);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op64_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op64_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPNZ)
-    {
-        IEMOP_MNEMONIC("repne cmps Xv,Yv");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op16_addr16, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op16_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op16_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op32_addr16, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op32_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op32_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            case IEMMODE_64BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_2);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op64_addr32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op64_addr64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-
-    IEMOP_MNEMONIC("cmps Xv,Yv");
-
-    /*
-     * Annoying double switch here.
-     * Using ugly macro for implementing the cases, sharing it with cmpsb.
-     */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_CMPS_CASE(16, 16); break;
-                case IEMMODE_32BIT: IEM_CMPS_CASE(16, 32); break;
-                case IEMMODE_64BIT: IEM_CMPS_CASE(16, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_32BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_CMPS_CASE(32, 16); break;
-                case IEMMODE_32BIT: IEM_CMPS_CASE(32, 32); break;
-                case IEMMODE_64BIT: IEM_CMPS_CASE(32, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_64BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_1); /* cannot be encoded */ break;
-                case IEMMODE_32BIT: IEM_CMPS_CASE(64, 32); break;
-                case IEMMODE_64BIT: IEM_CMPS_CASE(64, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-
-}
-
-#undef IEM_CMPS_CASE
-
-/** Opcode 0xa8. */
-FNIEMOP_DEF(iemOp_test_AL_Ib)
-{
-    IEMOP_MNEMONIC("test al,Ib");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_test);
-}
-
-
-/** Opcode 0xa9. */
-FNIEMOP_DEF(iemOp_test_eAX_Iz)
-{
-    IEMOP_MNEMONIC("test rAX,Iz");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_test);
-}
-
-
-/** Macro used by iemOp_stosb_Yb_AL and iemOp_stoswd_Yv_eAX */
-#define IEM_STOS_CASE(ValBits, AddrBits) \
-        IEM_MC_BEGIN(0, 2); \
-        IEM_MC_LOCAL(uint##ValBits##_t, uValue); \
-        IEM_MC_LOCAL(RTGCPTR, uAddr); \
-        IEM_MC_FETCH_GREG_U##ValBits(uValue, X86_GREG_xAX); \
-        IEM_MC_FETCH_GREG_U##AddrBits##_ZX_U64(uAddr,  X86_GREG_xDI); \
-        IEM_MC_STORE_MEM_U##ValBits(X86_SREG_ES, uAddr, uValue); \
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_DF) { \
-            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
-        } IEM_MC_ELSE() { \
-            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
-        } IEM_MC_ENDIF(); \
-        IEM_MC_ADVANCE_RIP(); \
-        IEM_MC_END(); \
-
-/** Opcode 0xaa. */
-FNIEMOP_DEF(iemOp_stosb_Yb_AL)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        IEMOP_MNEMONIC("rep stos Yb,al");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_al_m16);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_al_m32);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_al_m64);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("stos Yb,al");
-
-    /*
-     * Sharing case implementation with stos[wdq] below.
-     */
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT: IEM_STOS_CASE(8, 16); break;
-        case IEMMODE_32BIT: IEM_STOS_CASE(8, 32); break;
-        case IEMMODE_64BIT: IEM_STOS_CASE(8, 64); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xab. */
-FNIEMOP_DEF(iemOp_stoswd_Yv_eAX)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        IEMOP_MNEMONIC("rep stos Yv,rAX");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_ax_m16);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_ax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_ax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_eax_m16);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_eax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_eax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            case IEMMODE_64BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_9);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_rax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_rax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("stos Yv,rAX");
-
-    /*
-     * Annoying double switch here.
-     * Using ugly macro for implementing the cases, sharing it with stosb.
-     */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_STOS_CASE(16, 16); break;
-                case IEMMODE_32BIT: IEM_STOS_CASE(16, 32); break;
-                case IEMMODE_64BIT: IEM_STOS_CASE(16, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_32BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_STOS_CASE(32, 16); break;
-                case IEMMODE_32BIT: IEM_STOS_CASE(32, 32); break;
-                case IEMMODE_64BIT: IEM_STOS_CASE(32, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_64BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_1); /* cannot be encoded */ break;
-                case IEMMODE_32BIT: IEM_STOS_CASE(64, 32); break;
-                case IEMMODE_64BIT: IEM_STOS_CASE(64, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-}
-
-#undef IEM_STOS_CASE
-
-/** Macro used by iemOp_lodsb_AL_Xb and iemOp_lodswd_eAX_Xv */
-#define IEM_LODS_CASE(ValBits, AddrBits) \
-        IEM_MC_BEGIN(0, 2); \
-        IEM_MC_LOCAL(uint##ValBits##_t, uValue); \
-        IEM_MC_LOCAL(RTGCPTR, uAddr); \
-        IEM_MC_FETCH_GREG_U##AddrBits##_ZX_U64(uAddr, X86_GREG_xSI); \
-        IEM_MC_FETCH_MEM_U##ValBits(uValue, pVCpu->iem.s.iEffSeg, uAddr); \
-        IEM_MC_STORE_GREG_U##ValBits(X86_GREG_xAX, uValue); \
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_DF) { \
-            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xSI, ValBits / 8); \
-        } IEM_MC_ELSE() { \
-            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xSI, ValBits / 8); \
-        } IEM_MC_ENDIF(); \
-        IEM_MC_ADVANCE_RIP(); \
-        IEM_MC_END();
-
-/** Opcode 0xac. */
-FNIEMOP_DEF(iemOp_lodsb_AL_Xb)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        IEMOP_MNEMONIC("rep lodsb al,Xb");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_al_m16, pVCpu->iem.s.iEffSeg);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_al_m32, pVCpu->iem.s.iEffSeg);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_al_m64, pVCpu->iem.s.iEffSeg);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("lodsb al,Xb");
-
-    /*
-     * Sharing case implementation with stos[wdq] below.
-     */
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT: IEM_LODS_CASE(8, 16); break;
-        case IEMMODE_32BIT: IEM_LODS_CASE(8, 32); break;
-        case IEMMODE_64BIT: IEM_LODS_CASE(8, 64); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xad. */
-FNIEMOP_DEF(iemOp_lodswd_eAX_Xv)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        IEMOP_MNEMONIC("rep lods rAX,Xv");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_ax_m16, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_ax_m32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_ax_m64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_eax_m16, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_eax_m32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_eax_m64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            case IEMMODE_64BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_7);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_rax_m32, pVCpu->iem.s.iEffSeg);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_rax_m64, pVCpu->iem.s.iEffSeg);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("lods rAX,Xv");
-
-    /*
-     * Annoying double switch here.
-     * Using ugly macro for implementing the cases, sharing it with lodsb.
-     */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_LODS_CASE(16, 16); break;
-                case IEMMODE_32BIT: IEM_LODS_CASE(16, 32); break;
-                case IEMMODE_64BIT: IEM_LODS_CASE(16, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_32BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_LODS_CASE(32, 16); break;
-                case IEMMODE_32BIT: IEM_LODS_CASE(32, 32); break;
-                case IEMMODE_64BIT: IEM_LODS_CASE(32, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_64BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_1); /* cannot be encoded */ break;
-                case IEMMODE_32BIT: IEM_LODS_CASE(64, 32); break;
-                case IEMMODE_64BIT: IEM_LODS_CASE(64, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-}
-
-#undef IEM_LODS_CASE
-
-/** Macro used by iemOp_scasb_AL_Xb and iemOp_scaswd_eAX_Xv */
-#define IEM_SCAS_CASE(ValBits, AddrBits) \
-        IEM_MC_BEGIN(3, 2); \
-        IEM_MC_ARG(uint##ValBits##_t *, puRax,   0); \
-        IEM_MC_ARG(uint##ValBits##_t,   uValue,  1); \
-        IEM_MC_ARG(uint32_t *,          pEFlags, 2); \
-        IEM_MC_LOCAL(RTGCPTR,           uAddr); \
-        \
-        IEM_MC_FETCH_GREG_U##AddrBits##_ZX_U64(uAddr, X86_GREG_xDI); \
-        IEM_MC_FETCH_MEM_U##ValBits(uValue, X86_SREG_ES, uAddr); \
-        IEM_MC_REF_GREG_U##ValBits(puRax, X86_GREG_xAX); \
-        IEM_MC_REF_EFLAGS(pEFlags); \
-        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_cmp_u##ValBits, puRax, uValue, pEFlags); \
-        \
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_DF) { \
-            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
-        } IEM_MC_ELSE() { \
-            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
-        } IEM_MC_ENDIF(); \
-        IEM_MC_ADVANCE_RIP(); \
-        IEM_MC_END();
-
-/** Opcode 0xae. */
-FNIEMOP_DEF(iemOp_scasb_AL_Xb)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPZ)
-    {
-        IEMOP_MNEMONIC("repe scasb al,Xb");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_al_m16);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_al_m32);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_al_m64);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPNZ)
-    {
-        IEMOP_MNEMONIC("repne scasb al,Xb");
-        switch (pVCpu->iem.s.enmEffAddrMode)
-        {
-            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_al_m16);
-            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_al_m32);
-            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_al_m64);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("scasb al,Xb");
-
-    /*
-     * Sharing case implementation with stos[wdq] below.
-     */
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT: IEM_SCAS_CASE(8, 16); break;
-        case IEMMODE_32BIT: IEM_SCAS_CASE(8, 32); break;
-        case IEMMODE_64BIT: IEM_SCAS_CASE(8, 64); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xaf. */
-FNIEMOP_DEF(iemOp_scaswd_eAX_Xv)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeat prefix is encountered.
-     */
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPZ)
-    {
-        IEMOP_MNEMONIC("repe scas rAX,Xv");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_ax_m16);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_ax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_ax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_eax_m16);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_eax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_eax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            case IEMMODE_64BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_6); /** @todo It's this wrong, we can do 16-bit addressing in 64-bit mode, but not 32-bit. right? */
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_rax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_rax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPNZ)
-    {
-        IEMOP_MNEMONIC("repne scas rAX,Xv");
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_ax_m16);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_ax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_ax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case IEMMODE_32BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_eax_m16);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_eax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_eax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            case IEMMODE_64BIT:
-                switch (pVCpu->iem.s.enmEffAddrMode)
-                {
-                    case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_5);
-                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_rax_m32);
-                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_rax_m64);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    IEMOP_MNEMONIC("scas rAX,Xv");
-
-    /*
-     * Annoying double switch here.
-     * Using ugly macro for implementing the cases, sharing it with scasb.
-     */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_SCAS_CASE(16, 16); break;
-                case IEMMODE_32BIT: IEM_SCAS_CASE(16, 32); break;
-                case IEMMODE_64BIT: IEM_SCAS_CASE(16, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_32BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: IEM_SCAS_CASE(32, 16); break;
-                case IEMMODE_32BIT: IEM_SCAS_CASE(32, 32); break;
-                case IEMMODE_64BIT: IEM_SCAS_CASE(32, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-
-        case IEMMODE_64BIT:
-            switch (pVCpu->iem.s.enmEffAddrMode)
-            {
-                case IEMMODE_16BIT: AssertFailedReturn(VERR_IEM_IPE_1); /* cannot be encoded */ break;
-                case IEMMODE_32BIT: IEM_SCAS_CASE(64, 32); break;
-                case IEMMODE_64BIT: IEM_SCAS_CASE(64, 64); break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    return VINF_SUCCESS;
-}
-
-#undef IEM_SCAS_CASE
-
-/**
- * Common 'mov r8, imm8' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonMov_r8_Ib, uint8_t, iReg)
-{
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL_CONST(uint8_t, u8Value,/*=*/ u8Imm);
-    IEM_MC_STORE_GREG_U8(iReg, u8Value);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xb0. */
-FNIEMOP_DEF(iemOp_mov_AL_Ib)
-{
-    IEMOP_MNEMONIC("mov AL,Ib");
-    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xAX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xb1. */
-FNIEMOP_DEF(iemOp_CL_Ib)
-{
-    IEMOP_MNEMONIC("mov CL,Ib");
-    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xCX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xb2. */
-FNIEMOP_DEF(iemOp_DL_Ib)
-{
-    IEMOP_MNEMONIC("mov DL,Ib");
-    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xDX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xb3. */
-FNIEMOP_DEF(iemOp_BL_Ib)
-{
-    IEMOP_MNEMONIC("mov BL,Ib");
-    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xBX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xb4. */
-FNIEMOP_DEF(iemOp_mov_AH_Ib)
-{
-    IEMOP_MNEMONIC("mov AH,Ib");
-    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xSP | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xb5. */
-FNIEMOP_DEF(iemOp_CH_Ib)
-{
-    IEMOP_MNEMONIC("mov CH,Ib");
-    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xBP | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xb6. */
-FNIEMOP_DEF(iemOp_DH_Ib)
-{
-    IEMOP_MNEMONIC("mov DH,Ib");
-    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xSI | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xb7. */
-FNIEMOP_DEF(iemOp_BH_Ib)
-{
-    IEMOP_MNEMONIC("mov BH,Ib");
-    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xDI | pVCpu->iem.s.uRexB);
-}
-
-
-/**
- * Common 'mov regX,immX' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonMov_Rv_Iv, uint8_t, iReg)
-{
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-        {
-            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL_CONST(uint16_t, u16Value,/*=*/ u16Imm);
-            IEM_MC_STORE_GREG_U16(iReg, u16Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-        }
-
-        case IEMMODE_32BIT:
-        {
-            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL_CONST(uint32_t, u32Value,/*=*/ u32Imm);
-            IEM_MC_STORE_GREG_U32(iReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-        }
-        case IEMMODE_64BIT:
-        {
-            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_U64(&u64Imm); /* 64-bit immediate! */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL_CONST(uint64_t, u64Value,/*=*/ u64Imm);
-            IEM_MC_STORE_GREG_U64(iReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-        }
-    }
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xb8. */
-FNIEMOP_DEF(iemOp_eAX_Iv)
-{
-    IEMOP_MNEMONIC("mov rAX,IV");
-    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xAX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xb9. */
-FNIEMOP_DEF(iemOp_eCX_Iv)
-{
-    IEMOP_MNEMONIC("mov rCX,IV");
-    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xCX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xba. */
-FNIEMOP_DEF(iemOp_eDX_Iv)
-{
-    IEMOP_MNEMONIC("mov rDX,IV");
-    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xDX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xbb. */
-FNIEMOP_DEF(iemOp_eBX_Iv)
-{
-    IEMOP_MNEMONIC("mov rBX,IV");
-    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xBX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xbc. */
-FNIEMOP_DEF(iemOp_eSP_Iv)
-{
-    IEMOP_MNEMONIC("mov rSP,IV");
-    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xSP | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xbd. */
-FNIEMOP_DEF(iemOp_eBP_Iv)
-{
-    IEMOP_MNEMONIC("mov rBP,IV");
-    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xBP | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xbe. */
-FNIEMOP_DEF(iemOp_eSI_Iv)
-{
-    IEMOP_MNEMONIC("mov rSI,IV");
-    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xSI | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xbf. */
-FNIEMOP_DEF(iemOp_eDI_Iv)
-{
-    IEMOP_MNEMONIC("mov rDI,IV");
-    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xDI | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0xc0. */
-FNIEMOP_DEF(iemOp_Grp2_Eb_Ib)
-{
-    IEMOP_HLP_MIN_186();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    PCIEMOPSHIFTSIZES pImpl;
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,Ib"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,Ib"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,Ib"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,Ib"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,Ib"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,Ib"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,Ib"); break;
-        case 6: return IEMOP_RAISE_INVALID_OPCODE();
-        IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe stupid */
-    }
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_AF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register */
-        uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,            0);
-        IEM_MC_ARG_CONST(uint8_t,   cShiftArg, cShift, 1);
-        IEM_MC_ARG(uint32_t *,      pEFlags,           2);
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory */
-        IEM_MC_BEGIN(3, 2);
-        IEM_MC_ARG(uint8_t *,   pu8Dst,    0);
-        IEM_MC_ARG(uint8_t,     cShiftArg,  1);
-        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-        uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-        IEM_MC_ASSIGN(cShiftArg, cShift);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xc1. */
-FNIEMOP_DEF(iemOp_Grp2_Ev_Ib)
-{
-    IEMOP_HLP_MIN_186();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    PCIEMOPSHIFTSIZES pImpl;
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,Ib"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,Ib"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,Ib"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,Ib"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,Ib"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,Ib"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,Ib"); break;
-        case 6: return IEMOP_RAISE_INVALID_OPCODE();
-        IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe stupid */
-    }
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_AF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register */
-        uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,           0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, cShift, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,           0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, cShift, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,           0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, cShift, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,  pu16Dst,    0);
-                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,  pu32Dst,    0);
-                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,  pu64Dst,    0);
-                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xc2. */
-FNIEMOP_DEF(iemOp_retn_Iw)
-{
-    IEMOP_MNEMONIC("retn Iw");
-    uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retn, pVCpu->iem.s.enmEffOpSize, u16Imm);
-}
-
-
-/** Opcode 0xc3. */
-FNIEMOP_DEF(iemOp_retn)
-{
-    IEMOP_MNEMONIC("retn");
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retn, pVCpu->iem.s.enmEffOpSize, 0);
-}
-
-
-/** Opcode 0xc4. */
-FNIEMOP_DEF(iemOp_les_Gv_Mp_vex2)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if (   pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
-        || (bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_MNEMONIC("2-byte-vex");
-        /* The LES instruction is invalid 64-bit mode. In legacy and
-           compatability mode it is invalid with MOD=3.
-           The use as a VEX prefix is made possible by assigning the inverted
-           REX.R to the top MOD bit, and the top bit in the inverted register
-           specifier to the bottom MOD bit, thereby effectively limiting 32-bit
-           to accessing registers 0..7 in this VEX form. */
-        /** @todo VEX: Just use new tables for it. */
-        return IEMOP_RAISE_INVALID_OPCODE();
-    }
-    IEMOP_MNEMONIC("les Gv,Mp");
-    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_ES, bRm);
-}
-
-
-/** Opcode 0xc5. */
-FNIEMOP_DEF(iemOp_lds_Gv_Mp_vex3)
-{
-    /* The LDS instruction is invalid 64-bit mode. In legacy and
-       compatability mode it is invalid with MOD=3.
-       The use as a VEX prefix is made possible by assigning the inverted
-       REX.R and REX.X to the two MOD bits, since the REX bits are ignored
-       outside of 64-bit mode.  VEX is not available in real or v86 mode. */
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if (pVCpu->iem.s.enmCpuMode != IEMMODE_64BIT)
-    {
-        if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-        {
-            IEMOP_MNEMONIC("lds Gv,Mp");
-            return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_DS, bRm);
-        }
-        IEMOP_HLP_NO_REAL_OR_V86_MODE();
-    }
-
-    IEMOP_MNEMONIC("3-byte-vex");
-    /** @todo Test when exctly the VEX conformance checks kick in during
-     * instruction decoding and fetching (using \#PF). */
-    uint8_t bVex1;   IEM_OPCODE_GET_NEXT_U8(&bVex1);
-    uint8_t bVex2;   IEM_OPCODE_GET_NEXT_U8(&bVex2);
-    uint8_t bOpcode; IEM_OPCODE_GET_NEXT_U8(&bOpcode);
-#if 0 /* will make sense of this next week... */
-    if (   !(pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPZ | IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REX))
-        &&
-        )
-    {
-
-    }
-#endif
-
-    /** @todo VEX: Just use new tables for it. */
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0xc6. */
-FNIEMOP_DEF(iemOp_Grp11_Eb_Ib)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_REG_MASK) != (0 << X86_MODRM_REG_SHIFT)) /* only mov Eb,Ib in this group. */
-        return IEMOP_RAISE_INVALID_OPCODE();
-    IEMOP_MNEMONIC("mov Eb,Ib");
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register access */
-        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_STORE_GREG_U8((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u8Imm);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory access. */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_STORE_MEM_U8(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u8Imm);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xc7. */
-FNIEMOP_DEF(iemOp_Grp11_Ev_Iz)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_REG_MASK) != (0 << X86_MODRM_REG_SHIFT)) /* only mov Eb,Ib in this group. */
-        return IEMOP_RAISE_INVALID_OPCODE();
-    IEMOP_MNEMONIC("mov Ev,Iz");
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register access */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 0);
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Imm);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 0);
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Imm);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 0);
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Imm);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory access. */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 2);
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Imm);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 4);
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u32Imm);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 4);
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u64Imm);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-
-
-/** Opcode 0xc8. */
-FNIEMOP_DEF(iemOp_enter_Iw_Ib)
-{
-    IEMOP_MNEMONIC("enter Iw,Ib");
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    uint16_t cbFrame;        IEM_OPCODE_GET_NEXT_U16(&cbFrame);
-    uint8_t  u8NestingLevel; IEM_OPCODE_GET_NEXT_U8(&u8NestingLevel);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_enter, pVCpu->iem.s.enmEffOpSize, cbFrame, u8NestingLevel);
-}
-
-
-/** Opcode 0xc9. */
-FNIEMOP_DEF(iemOp_leave)
-{
-    IEMOP_MNEMONIC("retn");
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_leave, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0xca. */
-FNIEMOP_DEF(iemOp_retf_Iw)
-{
-    IEMOP_MNEMONIC("retf Iw");
-    uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retf, pVCpu->iem.s.enmEffOpSize, u16Imm);
-}
-
-
-/** Opcode 0xcb. */
-FNIEMOP_DEF(iemOp_retf)
-{
-    IEMOP_MNEMONIC("retf");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retf, pVCpu->iem.s.enmEffOpSize, 0);
-}
-
-
-/** Opcode 0xcc. */
-FNIEMOP_DEF(iemOp_int_3)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_int, X86_XCPT_BP, true /*fIsBpInstr*/);
-}
-
-
-/** Opcode 0xcd. */
-FNIEMOP_DEF(iemOp_int_Ib)
-{
-    uint8_t u8Int; IEM_OPCODE_GET_NEXT_U8(&u8Int);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_int, u8Int, false /*fIsBpInstr*/);
-}
-
-
-/** Opcode 0xce. */
-FNIEMOP_DEF(iemOp_into)
-{
-    IEMOP_MNEMONIC("into");
-    IEMOP_HLP_NO_64BIT();
-
-    IEM_MC_BEGIN(2, 0);
-    IEM_MC_ARG_CONST(uint8_t,   u8Int,      /*=*/ X86_XCPT_OF, 0);
-    IEM_MC_ARG_CONST(bool,      fIsBpInstr, /*=*/ false, 1);
-    IEM_MC_CALL_CIMPL_2(iemCImpl_int, u8Int, fIsBpInstr);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xcf. */
-FNIEMOP_DEF(iemOp_iret)
-{
-    IEMOP_MNEMONIC("iret");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_iret, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0xd0. */
-FNIEMOP_DEF(iemOp_Grp2_Eb_1)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    PCIEMOPSHIFTSIZES pImpl;
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,1"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,1"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,1"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,1"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,1"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,1"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,1"); break;
-        case 6: return IEMOP_RAISE_INVALID_OPCODE();
-        IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe, well... */
-    }
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_AF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
-        IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=*/1,   1);
-        IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory */
-        IEM_MC_BEGIN(3, 2);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
-        IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=*/1,   1);
-        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags,        2);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-
-/** Opcode 0xd1. */
-FNIEMOP_DEF(iemOp_Grp2_Ev_1)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    PCIEMOPSHIFTSIZES pImpl;
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,1"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,1"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,1"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,1"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,1"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,1"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,1"); break;
-        case 6: return IEMOP_RAISE_INVALID_OPCODE();
-        IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe, well... */
-    }
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_AF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,           0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,           0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,           0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,            0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,            0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,            0);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xd2. */
-FNIEMOP_DEF(iemOp_Grp2_Eb_CL)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    PCIEMOPSHIFTSIZES pImpl;
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,CL"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,CL"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,CL"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,CL"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,CL"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,CL"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,CL"); break;
-        case 6: return IEMOP_RAISE_INVALID_OPCODE();
-        IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc, grr. */
-    }
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_AF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,   pu8Dst,     0);
-        IEM_MC_ARG(uint8_t,     cShiftArg,  1);
-        IEM_MC_ARG(uint32_t *,  pEFlags,    2);
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory */
-        IEM_MC_BEGIN(3, 2);
-        IEM_MC_ARG(uint8_t *,   pu8Dst,          0);
-        IEM_MC_ARG(uint8_t,     cShiftArg,       1);
-        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd3. */
-FNIEMOP_DEF(iemOp_Grp2_Ev_CL)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    PCIEMOPSHIFTSIZES pImpl;
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,CL"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,CL"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,CL"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,CL"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,CL"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,CL"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,CL"); break;
-        case 6: return IEMOP_RAISE_INVALID_OPCODE();
-        IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe stupid */
-    }
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_AF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,    0);
-                IEM_MC_ARG(uint8_t,         cShiftArg,  1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    2);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,    0);
-                IEM_MC_ARG(uint8_t,         cShiftArg,  1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    2);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,    0);
-                IEM_MC_ARG(uint8_t,         cShiftArg,  1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    2);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,  pu16Dst,    0);
-                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,  pu32Dst,    0);
-                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,  pu64Dst,    0);
-                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-/** Opcode 0xd4. */
-FNIEMOP_DEF(iemOp_aam_Ib)
-{
-    IEMOP_MNEMONIC("aam Ib");
-    uint8_t bImm; IEM_OPCODE_GET_NEXT_U8(&bImm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_NO_64BIT();
-    if (!bImm)
-        return IEMOP_RAISE_DIVIDE_ERROR();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_aam, bImm);
-}
-
-
-/** Opcode 0xd5. */
-FNIEMOP_DEF(iemOp_aad_Ib)
-{
-    IEMOP_MNEMONIC("aad Ib");
-    uint8_t bImm; IEM_OPCODE_GET_NEXT_U8(&bImm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_NO_64BIT();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_aad, bImm);
-}
-
-
-/** Opcode 0xd6. */
-FNIEMOP_DEF(iemOp_salc)
-{
-    IEMOP_MNEMONIC("salc");
-    IEMOP_HLP_MIN_286(); /* (undocument at the time) */
-    uint8_t bImm; IEM_OPCODE_GET_NEXT_U8(&bImm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_NO_64BIT();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-        IEM_MC_STORE_GREG_U8_CONST(X86_GREG_xAX, 0xff);
-    } IEM_MC_ELSE() {
-        IEM_MC_STORE_GREG_U8_CONST(X86_GREG_xAX, 0x00);
-    } IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd7. */
-FNIEMOP_DEF(iemOp_xlat)
-{
-    IEMOP_MNEMONIC("xlat");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_LOCAL(uint8_t,  u8Tmp);
-            IEM_MC_LOCAL(uint16_t, u16Addr);
-            IEM_MC_FETCH_GREG_U8_ZX_U16(u16Addr, X86_GREG_xAX);
-            IEM_MC_ADD_GREG_U16_TO_LOCAL(u16Addr, X86_GREG_xBX);
-            IEM_MC_FETCH_MEM16_U8(u8Tmp, pVCpu->iem.s.iEffSeg, u16Addr);
-            IEM_MC_STORE_GREG_U8(X86_GREG_xAX, u8Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_LOCAL(uint8_t,  u8Tmp);
-            IEM_MC_LOCAL(uint32_t, u32Addr);
-            IEM_MC_FETCH_GREG_U8_ZX_U32(u32Addr, X86_GREG_xAX);
-            IEM_MC_ADD_GREG_U32_TO_LOCAL(u32Addr, X86_GREG_xBX);
-            IEM_MC_FETCH_MEM32_U8(u8Tmp, pVCpu->iem.s.iEffSeg, u32Addr);
-            IEM_MC_STORE_GREG_U8(X86_GREG_xAX, u8Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_LOCAL(uint8_t,  u8Tmp);
-            IEM_MC_LOCAL(uint64_t, u64Addr);
-            IEM_MC_FETCH_GREG_U8_ZX_U64(u64Addr, X86_GREG_xAX);
-            IEM_MC_ADD_GREG_U64_TO_LOCAL(u64Addr, X86_GREG_xBX);
-            IEM_MC_FETCH_MEM_U8(u8Tmp, pVCpu->iem.s.iEffSeg, u64Addr);
-            IEM_MC_STORE_GREG_U8(X86_GREG_xAX, u8Tmp);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-         IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and STn, and storing the
- * result in ST0.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpu_st0_stN, uint8_t, bRm, PFNIEMAIMPLFPUR80, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,        FpuRes,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value2,                 2);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, 0, pr80Value2, bRm & X86_MODRM_RM_MASK)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pFpuRes, pr80Value1, pr80Value2);
-        IEM_MC_STORE_FPU_RESULT(FpuRes, 0);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and STn, and only affecting
- * flags.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpuNoStore_st0_stN, uint8_t, bRm, PFNIEMAIMPLFPUR80FSW, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value2,                 2);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, 0, pr80Value2, bRm & X86_MODRM_RM_MASK)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pu16Fsw, pr80Value1, pr80Value2);
-        IEM_MC_UPDATE_FSW(u16Fsw);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(UINT8_MAX);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and STn, only affecting
- * flags, and popping when done.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpuNoStore_st0_stN_pop, uint8_t, bRm, PFNIEMAIMPLFPUR80FSW, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value2,                 2);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, 0, pr80Value2, bRm & X86_MODRM_RM_MASK)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pu16Fsw, pr80Value1, pr80Value2);
-        IEM_MC_UPDATE_FSW_THEN_POP(u16Fsw);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP(UINT8_MAX);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd8 11/0. */
-FNIEMOP_DEF_1(iemOp_fadd_stN,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fadd st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fadd_r80_by_r80);
-}
-
-
-/** Opcode 0xd8 11/1. */
-FNIEMOP_DEF_1(iemOp_fmul_stN,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fmul st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fmul_r80_by_r80);
-}
-
-
-/** Opcode 0xd8 11/2. */
-FNIEMOP_DEF_1(iemOp_fcom_stN,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcom st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpuNoStore_st0_stN, bRm, iemAImpl_fcom_r80_by_r80);
-}
-
-
-/** Opcode 0xd8 11/3. */
-FNIEMOP_DEF_1(iemOp_fcomp_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcomp st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpuNoStore_st0_stN_pop, bRm, iemAImpl_fcom_r80_by_r80);
-}
-
-
-/** Opcode 0xd8 11/4. */
-FNIEMOP_DEF_1(iemOp_fsub_stN,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsub st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fsub_r80_by_r80);
-}
-
-
-/** Opcode 0xd8 11/5. */
-FNIEMOP_DEF_1(iemOp_fsubr_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsubr st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fsubr_r80_by_r80);
-}
-
-
-/** Opcode 0xd8 11/6. */
-FNIEMOP_DEF_1(iemOp_fdiv_stN,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdiv st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fdiv_r80_by_r80);
-}
-
-
-/** Opcode 0xd8 11/7. */
-FNIEMOP_DEF_1(iemOp_fdivr_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdivr st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fdivr_r80_by_r80);
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and an m32r, and storing
- * the result in ST0.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpu_st0_m32r, uint8_t, bRm, PFNIEMAIMPLFPUR32, pfnAImpl)
-{
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_LOCAL(RTFLOAT32U,            r32Val2);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,        FpuRes,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(PCRTFLOAT32U,  pr32Val2,       r32Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_R32(r32Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pFpuRes, pr80Value1, pr32Val2);
-        IEM_MC_STORE_FPU_RESULT(FpuRes, 0);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd8 !11/0. */
-FNIEMOP_DEF_1(iemOp_fadd_m32r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fadd st0,m32r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fadd_r80_by_r32);
-}
-
-
-/** Opcode 0xd8 !11/1. */
-FNIEMOP_DEF_1(iemOp_fmul_m32r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fmul st0,m32r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fmul_r80_by_r32);
-}
-
-
-/** Opcode 0xd8 !11/2. */
-FNIEMOP_DEF_1(iemOp_fcom_m32r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcom st0,m32r");
-
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_LOCAL(RTFLOAT32U,            r32Val2);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(PCRTFLOAT32U,  pr32Val2,       r32Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_R32(r32Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fcom_r80_by_r32, pu16Fsw, pr80Value1, pr32Val2);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd8 !11/3. */
-FNIEMOP_DEF_1(iemOp_fcomp_m32r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcomp st0,m32r");
-
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_LOCAL(RTFLOAT32U,            r32Val2);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(PCRTFLOAT32U,  pr32Val2,       r32Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_R32(r32Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fcom_r80_by_r32, pu16Fsw, pr80Value1, pr32Val2);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd8 !11/4. */
-FNIEMOP_DEF_1(iemOp_fsub_m32r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsub st0,m32r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fsub_r80_by_r32);
-}
-
-
-/** Opcode 0xd8 !11/5. */
-FNIEMOP_DEF_1(iemOp_fsubr_m32r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsubr st0,m32r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fsubr_r80_by_r32);
-}
-
-
-/** Opcode 0xd8 !11/6. */
-FNIEMOP_DEF_1(iemOp_fdiv_m32r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdiv st0,m32r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fdiv_r80_by_r32);
-}
-
-
-/** Opcode 0xd8 !11/7. */
-FNIEMOP_DEF_1(iemOp_fdivr_m32r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdivr st0,m32r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fdivr_r80_by_r32);
-}
-
-
-/** Opcode 0xd8. */
-FNIEMOP_DEF(iemOp_EscF0)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    pVCpu->iem.s.uFpuOpcode = RT_MAKE_U16(bRm, 0xd8 & 0x7);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fadd_stN,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fmul_stN,  bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fcom_stN,  bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fcomp_stN, bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_fsub_stN,  bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fsubr_stN, bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fdiv_stN,  bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fdivr_stN, bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fadd_m32r,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fmul_m32r,  bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fcom_m32r,  bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fcomp_m32r, bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_fsub_m32r,  bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fsubr_m32r, bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fdiv_m32r,  bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fdivr_m32r, bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xd9 /0 mem32real
- * @sa  iemOp_fld_m64r */
-FNIEMOP_DEF_1(iemOp_fld_m32r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fld m32r");
-
-    IEM_MC_BEGIN(2, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_LOCAL(RTFLOAT32U,            r32Val);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,    FpuRes, 0);
-    IEM_MC_ARG_LOCAL_REF(PCRTFLOAT32U,  pr32Val,    r32Val, 1);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_R32(r32Val, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_IS_EMPTY(7)
-        IEM_MC_CALL_FPU_AIMPL_2(iemAImpl_fld_r32_to_r80, pFpuRes, pr32Val);
-        IEM_MC_PUSH_FPU_RESULT_MEM_OP(FpuRes, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP(pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 !11/2 mem32real */
-FNIEMOP_DEF_1(iemOp_fst_m32r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fst m32r");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(PRTFLOAT32U,             pr32Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pr32Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fst_r80_to_r32, pu16Fsw, pr32Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pr32Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF(pr32Dst);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pr32Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 !11/3 */
-FNIEMOP_DEF_1(iemOp_fstp_m32r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fstp m32r");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(PRTFLOAT32U,             pr32Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pr32Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fst_r80_to_r32, pu16Fsw, pr32Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pr32Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF(pr32Dst);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pr32Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 !11/4 */
-FNIEMOP_DEF_1(iemOp_fldenv, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fldenv m14/28byte");
-    IEM_MC_BEGIN(3, 0);
-    IEM_MC_ARG_CONST(IEMMODE,           enmEffOpSize, /*=*/ pVCpu->iem.s.enmEffOpSize,  0);
-    IEM_MC_ARG(uint8_t,                 iEffSeg,                                    1);
-    IEM_MC_ARG(RTGCPTR,                 GCPtrEffSrc,                                2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_fldenv, enmEffOpSize, iEffSeg, GCPtrEffSrc);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 !11/5 */
-FNIEMOP_DEF_1(iemOp_fldcw, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fldcw m2byte");
-    IEM_MC_BEGIN(1, 1);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_ARG(uint16_t,                u16Fsw,                                     0);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    IEM_MC_FETCH_MEM_U16(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_CALL_CIMPL_1(iemCImpl_fldcw, u16Fsw);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 !11/6 */
-FNIEMOP_DEF_1(iemOp_fnstenv, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fstenv m14/m28byte");
-    IEM_MC_BEGIN(3, 0);
-    IEM_MC_ARG_CONST(IEMMODE,           enmEffOpSize, /*=*/ pVCpu->iem.s.enmEffOpSize,  0);
-    IEM_MC_ARG(uint8_t,                 iEffSeg,                                    1);
-    IEM_MC_ARG(RTGCPTR,                 GCPtrEffDst,                                2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_fnstenv, enmEffOpSize, iEffSeg, GCPtrEffDst);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 !11/7 */
-FNIEMOP_DEF_1(iemOp_fnstcw, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fnstcw m2byte");
-    IEM_MC_BEGIN(2, 0);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fcw);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-    IEM_MC_FETCH_FCW(u16Fcw);
-    IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Fcw);
-    IEM_MC_ADVANCE_RIP(); /* C0-C3 are documented as undefined, we leave them unmodified. */
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 0xd0, 0xd9 0xd8-0xdf, ++?.  */
-FNIEMOP_DEF(iemOp_fnop)
-{
-    IEMOP_MNEMONIC("fnop");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    /** @todo Testcase: looks like FNOP leaves FOP alone but updates FPUIP. Could be
-     *        intel optimizations. Investigate. */
-    IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ADVANCE_RIP(); /* C0-C3 are documented as undefined, we leave them unmodified. */
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 11/0 stN */
-FNIEMOP_DEF_1(iemOp_fld_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fld stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /** @todo Testcase: Check if this raises \#MF?  Intel mentioned it not. AMD
-     *        indicates that it does. */
-    IEM_MC_BEGIN(0, 2);
-    IEM_MC_LOCAL(PCRTFLOAT80U,          pr80Value);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, bRm & X86_MODRM_RM_MASK)
-        IEM_MC_SET_FPU_RESULT(FpuRes, 0 /*FSW*/, pr80Value);
-        IEM_MC_PUSH_FPU_RESULT(FpuRes);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_UNDERFLOW();
-    IEM_MC_ENDIF();
-
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 11/3 stN */
-FNIEMOP_DEF_1(iemOp_fxch_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fxch stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /** @todo Testcase: Check if this raises \#MF?  Intel mentioned it not. AMD
-     *        indicates that it does. */
-    IEM_MC_BEGIN(1, 3);
-    IEM_MC_LOCAL(PCRTFLOAT80U,          pr80Value1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,          pr80Value2);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_ARG_CONST(uint8_t,           iStReg, /*=*/ bRm & X86_MODRM_RM_MASK, 0);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, 0, pr80Value2, bRm & X86_MODRM_RM_MASK)
-        IEM_MC_SET_FPU_RESULT(FpuRes, X86_FSW_C1, pr80Value2);
-        IEM_MC_STORE_FPUREG_R80_SRC_REF(bRm & X86_MODRM_RM_MASK, pr80Value1);
-        IEM_MC_STORE_FPU_RESULT(FpuRes, 0);
-    IEM_MC_ELSE()
-        IEM_MC_CALL_CIMPL_1(iemCImpl_fxch_underflow, iStReg);
-    IEM_MC_ENDIF();
-
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 11/4, 0xdd 11/2. */
-FNIEMOP_DEF_1(iemOp_fstp_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fstp st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    /* fstp st0, st0 is frequendly used as an official 'ffreep st0' sequence. */
-    uint8_t const iDstReg = bRm & X86_MODRM_RM_MASK;
-    if (!iDstReg)
-    {
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL_CONST(uint16_t,        u16Fsw, /*=*/ 0);
-        IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-        IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-        IEM_MC_PREPARE_FPU_USAGE();
-        IEM_MC_IF_FPUREG_NOT_EMPTY(0)
-            IEM_MC_UPDATE_FSW_THEN_POP(u16Fsw);
-        IEM_MC_ELSE()
-            IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP(0);
-        IEM_MC_ENDIF();
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(PCRTFLOAT80U,          pr80Value);
-        IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-        IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-        IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-        IEM_MC_PREPARE_FPU_USAGE();
-        IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-            IEM_MC_SET_FPU_RESULT(FpuRes, 0 /*FSW*/, pr80Value);
-            IEM_MC_STORE_FPU_RESULT_THEN_POP(FpuRes, iDstReg);
-        IEM_MC_ELSE()
-            IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP(iDstReg);
-        IEM_MC_ENDIF();
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and replaces it with the
- * result, i.e. unary operators.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpFpu_st0, PFNIEMAIMPLFPUR80UNARY, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(2, 1);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,    FpuRes, 0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          1);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_2(pfnAImpl, pFpuRes, pr80Value);
-        IEM_MC_STORE_FPU_RESULT(FpuRes, 0);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 0xe0. */
-FNIEMOP_DEF(iemOp_fchs)
-{
-    IEMOP_MNEMONIC("fchs st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fchs_r80);
-}
-
-
-/** Opcode 0xd9 0xe1. */
-FNIEMOP_DEF(iemOp_fabs)
-{
-    IEMOP_MNEMONIC("fabs st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fabs_r80);
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and only returns FSW.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpFpuNoStore_st0, PFNIEMAIMPLFPUR80UNARYFSW, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(2, 1);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          1);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_2(pfnAImpl, pu16Fsw, pr80Value);
-        IEM_MC_UPDATE_FSW(u16Fsw);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(UINT8_MAX);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 0xe4. */
-FNIEMOP_DEF(iemOp_ftst)
-{
-    IEMOP_MNEMONIC("ftst st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpuNoStore_st0, iemAImpl_ftst_r80);
-}
-
-
-/** Opcode 0xd9 0xe5. */
-FNIEMOP_DEF(iemOp_fxam)
-{
-    IEMOP_MNEMONIC("fxam st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpuNoStore_st0, iemAImpl_fxam_r80);
-}
-
-
-/**
- * Common worker for FPU instructions pushing a constant onto the FPU stack.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpFpuPushConstant, PFNIEMAIMPLFPUR80LDCONST, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(1, 1);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,    FpuRes, 0);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_IS_EMPTY(7)
-        IEM_MC_CALL_FPU_AIMPL_1(pfnAImpl, pFpuRes);
-        IEM_MC_PUSH_FPU_RESULT(FpuRes);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_OVERFLOW();
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 0xe8. */
-FNIEMOP_DEF(iemOp_fld1)
-{
-    IEMOP_MNEMONIC("fld1");
-    return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fld1);
-}
-
-
-/** Opcode 0xd9 0xe9. */
-FNIEMOP_DEF(iemOp_fldl2t)
-{
-    IEMOP_MNEMONIC("fldl2t");
-    return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldl2t);
-}
-
-
-/** Opcode 0xd9 0xea. */
-FNIEMOP_DEF(iemOp_fldl2e)
-{
-    IEMOP_MNEMONIC("fldl2e");
-    return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldl2e);
-}
-
-/** Opcode 0xd9 0xeb. */
-FNIEMOP_DEF(iemOp_fldpi)
-{
-    IEMOP_MNEMONIC("fldpi");
-    return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldpi);
-}
-
-
-/** Opcode 0xd9 0xec. */
-FNIEMOP_DEF(iemOp_fldlg2)
-{
-    IEMOP_MNEMONIC("fldlg2");
-    return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldlg2);
-}
-
-/** Opcode 0xd9 0xed. */
-FNIEMOP_DEF(iemOp_fldln2)
-{
-    IEMOP_MNEMONIC("fldln2");
-    return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldln2);
-}
-
-
-/** Opcode 0xd9 0xee. */
-FNIEMOP_DEF(iemOp_fldz)
-{
-    IEMOP_MNEMONIC("fldz");
-    return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldz);
-}
-
-
-/** Opcode 0xd9 0xf0. */
-FNIEMOP_DEF(iemOp_f2xm1)
-{
-    IEMOP_MNEMONIC("f2xm1 st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_f2xm1_r80);
-}
-
-
-/** Opcode 0xd9 0xf1. */
-FNIEMOP_DEF(iemOp_fylx2)
-{
-    IEMOP_MNEMONIC("fylx2 st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fyl2x_r80);
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and having two outputs, one
- * replacing ST0 and one pushed onto the stack.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpFpuReplace_st0_push, PFNIEMAIMPLFPUR80UNARYTWO, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(2, 1);
-    IEM_MC_LOCAL(IEMFPURESULTTWO,           FpuResTwo);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULTTWO,  pFpuResTwo, FpuResTwo,  0);
-    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value,              1);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_2(pfnAImpl, pFpuResTwo, pr80Value);
-        IEM_MC_PUSH_FPU_RESULT_TWO(FpuResTwo);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO();
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 0xf2. */
-FNIEMOP_DEF(iemOp_fptan)
-{
-    IEMOP_MNEMONIC("fptan st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpuReplace_st0_push, iemAImpl_fptan_r80_r80);
-}
-
-
-/**
- * Common worker for FPU instructions working on STn and ST0, storing the result
- * in STn, and popping the stack unless IE, DE or ZE was raised.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpu_stN_st0_pop, uint8_t, bRm, PFNIEMAIMPLFPUR80, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,        FpuRes,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value2,                 2);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, bRm & X86_MODRM_RM_MASK, pr80Value2, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pFpuRes, pr80Value1, pr80Value2);
-        IEM_MC_STORE_FPU_RESULT_THEN_POP(FpuRes, bRm & X86_MODRM_RM_MASK);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP(bRm & X86_MODRM_RM_MASK);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 0xf3. */
-FNIEMOP_DEF(iemOp_fpatan)
-{
-    IEMOP_MNEMONIC("fpatan st1,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, 1, iemAImpl_fpatan_r80_by_r80);
-}
-
-
-/** Opcode 0xd9 0xf4. */
-FNIEMOP_DEF(iemOp_fxtract)
-{
-    IEMOP_MNEMONIC("fxtract st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpuReplace_st0_push, iemAImpl_fxtract_r80_r80);
-}
-
-
-/** Opcode 0xd9 0xf5. */
-FNIEMOP_DEF(iemOp_fprem1)
-{
-    IEMOP_MNEMONIC("fprem1 st0, st1");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, 1, iemAImpl_fprem1_r80_by_r80);
-}
-
-
-/** Opcode 0xd9 0xf6. */
-FNIEMOP_DEF(iemOp_fdecstp)
-{
-    IEMOP_MNEMONIC("fdecstp");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    /* Note! C0, C2 and C3 are documented as undefined, we clear them. */
-    /** @todo Testcase: Check whether FOP, FPUIP and FPUCS are affected by
-     *        FINCSTP and FDECSTP. */
-
-    IEM_MC_BEGIN(0,0);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    IEM_MC_FPU_STACK_DEC_TOP();
-    IEM_MC_UPDATE_FSW_CONST(0);
-
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 0xf7. */
-FNIEMOP_DEF(iemOp_fincstp)
-{
-    IEMOP_MNEMONIC("fincstp");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    /* Note! C0, C2 and C3 are documented as undefined, we clear them. */
-    /** @todo Testcase: Check whether FOP, FPUIP and FPUCS are affected by
-     *        FINCSTP and FDECSTP. */
-
-    IEM_MC_BEGIN(0,0);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    IEM_MC_FPU_STACK_INC_TOP();
-    IEM_MC_UPDATE_FSW_CONST(0);
-
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xd9 0xf8. */
-FNIEMOP_DEF(iemOp_fprem)
-{
-    IEMOP_MNEMONIC("fprem st0, st1");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, 1, iemAImpl_fprem_r80_by_r80);
-}
-
-
-/** Opcode 0xd9 0xf9. */
-FNIEMOP_DEF(iemOp_fyl2xp1)
-{
-    IEMOP_MNEMONIC("fyl2xp1 st1,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, 1, iemAImpl_fyl2xp1_r80_by_r80);
-}
-
-
-/** Opcode 0xd9 0xfa. */
-FNIEMOP_DEF(iemOp_fsqrt)
-{
-    IEMOP_MNEMONIC("fsqrt st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fsqrt_r80);
-}
-
-
-/** Opcode 0xd9 0xfb. */
-FNIEMOP_DEF(iemOp_fsincos)
-{
-    IEMOP_MNEMONIC("fsincos st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpuReplace_st0_push, iemAImpl_fsincos_r80_r80);
-}
-
-
-/** Opcode 0xd9 0xfc. */
-FNIEMOP_DEF(iemOp_frndint)
-{
-    IEMOP_MNEMONIC("frndint st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_frndint_r80);
-}
-
-
-/** Opcode 0xd9 0xfd. */
-FNIEMOP_DEF(iemOp_fscale)
-{
-    IEMOP_MNEMONIC("fscale st0, st1");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, 1, iemAImpl_fscale_r80_by_r80);
-}
-
-
-/** Opcode 0xd9 0xfe. */
-FNIEMOP_DEF(iemOp_fsin)
-{
-    IEMOP_MNEMONIC("fsin st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fsin_r80);
-}
-
-
-/** Opcode 0xd9 0xff. */
-FNIEMOP_DEF(iemOp_fcos)
-{
-    IEMOP_MNEMONIC("fcos st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fcos_r80);
-}
-
-
-/** Used by iemOp_EscF1. */
-IEM_STATIC const PFNIEMOP g_apfnEscF1_E0toFF[32] =
-{
-    /* 0xe0 */  iemOp_fchs,
-    /* 0xe1 */  iemOp_fabs,
-    /* 0xe2 */  iemOp_Invalid,
-    /* 0xe3 */  iemOp_Invalid,
-    /* 0xe4 */  iemOp_ftst,
-    /* 0xe5 */  iemOp_fxam,
-    /* 0xe6 */  iemOp_Invalid,
-    /* 0xe7 */  iemOp_Invalid,
-    /* 0xe8 */  iemOp_fld1,
-    /* 0xe9 */  iemOp_fldl2t,
-    /* 0xea */  iemOp_fldl2e,
-    /* 0xeb */  iemOp_fldpi,
-    /* 0xec */  iemOp_fldlg2,
-    /* 0xed */  iemOp_fldln2,
-    /* 0xee */  iemOp_fldz,
-    /* 0xef */  iemOp_Invalid,
-    /* 0xf0 */  iemOp_f2xm1,
-    /* 0xf1 */  iemOp_fylx2,
-    /* 0xf2 */  iemOp_fptan,
-    /* 0xf3 */  iemOp_fpatan,
-    /* 0xf4 */  iemOp_fxtract,
-    /* 0xf5 */  iemOp_fprem1,
-    /* 0xf6 */  iemOp_fdecstp,
-    /* 0xf7 */  iemOp_fincstp,
-    /* 0xf8 */  iemOp_fprem,
-    /* 0xf9 */  iemOp_fyl2xp1,
-    /* 0xfa */  iemOp_fsqrt,
-    /* 0xfb */  iemOp_fsincos,
-    /* 0xfc */  iemOp_frndint,
-    /* 0xfd */  iemOp_fscale,
-    /* 0xfe */  iemOp_fsin,
-    /* 0xff */  iemOp_fcos
-};
-
-
-/** Opcode 0xd9. */
-FNIEMOP_DEF(iemOp_EscF1)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    pVCpu->iem.s.uFpuOpcode = RT_MAKE_U16(bRm, 0xd9 & 0x7);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fld_stN, bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fxch_stN, bRm);
-            case 2:
-                if (bRm == 0xd0)
-                    return FNIEMOP_CALL(iemOp_fnop);
-                return IEMOP_RAISE_INVALID_OPCODE();
-            case 3: return FNIEMOP_CALL_1(iemOp_fstp_stN, bRm); /* Reserved. Intel behavior seems to be FSTP ST(i) though. */
-            case 4:
-            case 5:
-            case 6:
-            case 7:
-                Assert((unsigned)bRm - 0xe0U < RT_ELEMENTS(g_apfnEscF1_E0toFF));
-                return FNIEMOP_CALL(g_apfnEscF1_E0toFF[bRm - 0xe0]);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fld_m32r,  bRm);
-            case 1: return IEMOP_RAISE_INVALID_OPCODE();
-            case 2: return FNIEMOP_CALL_1(iemOp_fst_m32r,  bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fstp_m32r, bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_fldenv,    bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fldcw,     bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fnstenv,    bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fnstcw,     bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xda 11/0. */
-FNIEMOP_DEF_1(iemOp_fcmovb_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcmovb st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,      pr80ValueN);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST(pr80ValueN, bRm & X86_MODRM_RM_MASK, 0)
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF)
-            IEM_MC_STORE_FPUREG_R80_SRC_REF(0, pr80ValueN);
-        IEM_MC_ENDIF();
-        IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xda 11/1. */
-FNIEMOP_DEF_1(iemOp_fcmove_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcmove st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,      pr80ValueN);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST(pr80ValueN, bRm & X86_MODRM_RM_MASK, 0)
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF)
-            IEM_MC_STORE_FPUREG_R80_SRC_REF(0, pr80ValueN);
-        IEM_MC_ENDIF();
-        IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xda 11/2. */
-FNIEMOP_DEF_1(iemOp_fcmovbe_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcmovbe st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,      pr80ValueN);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST(pr80ValueN, bRm & X86_MODRM_RM_MASK, 0)
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF)
-            IEM_MC_STORE_FPUREG_R80_SRC_REF(0, pr80ValueN);
-        IEM_MC_ENDIF();
-        IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xda 11/3. */
-FNIEMOP_DEF_1(iemOp_fcmovu_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcmovu st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,      pr80ValueN);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST(pr80ValueN, bRm & X86_MODRM_RM_MASK, 0)
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF)
-            IEM_MC_STORE_FPUREG_R80_SRC_REF(0, pr80ValueN);
-        IEM_MC_ENDIF();
-        IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and STn, only affecting
- * flags, and popping twice when done.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpFpuNoStore_st0_stN_pop_pop, PFNIEMAIMPLFPUR80FSW, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value2,                 2);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, 0, pr80Value2, 1)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pu16Fsw, pr80Value1, pr80Value2);
-        IEM_MC_UPDATE_FSW_THEN_POP_POP(u16Fsw);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP();
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xda 0xe9. */
-FNIEMOP_DEF(iemOp_fucompp)
-{
-    IEMOP_MNEMONIC("fucompp st0,stN");
-    return FNIEMOP_CALL_1(iemOpHlpFpuNoStore_st0_stN_pop_pop, iemAImpl_fucom_r80_by_r80);
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and an m32i, and storing
- * the result in ST0.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpu_st0_m32i, uint8_t, bRm, PFNIEMAIMPLFPUI32, pfnAImpl)
-{
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,              FpuRes);
-    IEM_MC_LOCAL(int32_t,                   i32Val2);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT,     pFpuRes,        FpuRes,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(int32_t const *,   pi32Val2,       i32Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I32(i32Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pFpuRes, pr80Value1, pi32Val2);
-        IEM_MC_STORE_FPU_RESULT(FpuRes, 0);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xda !11/0. */
-FNIEMOP_DEF_1(iemOp_fiadd_m32i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fiadd m32i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fiadd_r80_by_i32);
-}
-
-
-/** Opcode 0xda !11/1. */
-FNIEMOP_DEF_1(iemOp_fimul_m32i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fimul m32i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fimul_r80_by_i32);
-}
-
-
-/** Opcode 0xda !11/2. */
-FNIEMOP_DEF_1(iemOp_ficom_m32i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("ficom st0,m32i");
-
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(uint16_t,                  u16Fsw);
-    IEM_MC_LOCAL(int32_t,                   i32Val2);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,        pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(int32_t const *,   pi32Val2,       i32Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I32(i32Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_ficom_r80_by_i32, pu16Fsw, pr80Value1, pi32Val2);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xda !11/3. */
-FNIEMOP_DEF_1(iemOp_ficomp_m32i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("ficomp st0,m32i");
-
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(uint16_t,                  u16Fsw);
-    IEM_MC_LOCAL(int32_t,                   i32Val2);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,        pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(int32_t const *,   pi32Val2,       i32Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I32(i32Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_ficom_r80_by_i32, pu16Fsw, pr80Value1, pi32Val2);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xda !11/4. */
-FNIEMOP_DEF_1(iemOp_fisub_m32i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fisub m32i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fisub_r80_by_i32);
-}
-
-
-/** Opcode 0xda !11/5. */
-FNIEMOP_DEF_1(iemOp_fisubr_m32i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fisubr m32i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fisubr_r80_by_i32);
-}
-
-
-/** Opcode 0xda !11/6. */
-FNIEMOP_DEF_1(iemOp_fidiv_m32i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fidiv m32i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fidiv_r80_by_i32);
-}
-
-
-/** Opcode 0xda !11/7. */
-FNIEMOP_DEF_1(iemOp_fidivr_m32i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fidivr m32i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fidivr_r80_by_i32);
-}
-
-
-/** Opcode 0xda. */
-FNIEMOP_DEF(iemOp_EscF2)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    pVCpu->iem.s.uFpuOpcode = RT_MAKE_U16(bRm, 0xda & 0x7);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fcmovb_stN, bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fcmove_stN, bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fcmovbe_stN, bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fcmovu_stN, bRm);
-            case 4: return IEMOP_RAISE_INVALID_OPCODE();
-            case 5:
-                if (bRm == 0xe9)
-                    return FNIEMOP_CALL(iemOp_fucompp);
-                return IEMOP_RAISE_INVALID_OPCODE();
-            case 6: return IEMOP_RAISE_INVALID_OPCODE();
-            case 7: return IEMOP_RAISE_INVALID_OPCODE();
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fiadd_m32i,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fimul_m32i,  bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_ficom_m32i,  bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_ficomp_m32i, bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_fisub_m32i,  bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fisubr_m32i, bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fidiv_m32i,  bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fidivr_m32i, bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xdb !11/0. */
-FNIEMOP_DEF_1(iemOp_fild_m32i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fild m32i");
-
-    IEM_MC_BEGIN(2, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,              FpuRes);
-    IEM_MC_LOCAL(int32_t,                   i32Val);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT,     pFpuRes,    FpuRes, 0);
-    IEM_MC_ARG_LOCAL_REF(int32_t const *,   pi32Val,    i32Val, 1);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I32(i32Val, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_IS_EMPTY(7)
-        IEM_MC_CALL_FPU_AIMPL_2(iemAImpl_fild_i32_to_r80, pFpuRes, pi32Val);
-        IEM_MC_PUSH_FPU_RESULT_MEM_OP(FpuRes, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP(pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb !11/1. */
-FNIEMOP_DEF_1(iemOp_fisttp_m32i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fisttp m32i");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(int32_t *,               pi32Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pi32Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fistt_r80_to_i32, pu16Fsw, pi32Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pi32Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_I32_CONST_BY_REF(pi32Dst, INT32_MIN /* (integer indefinite) */);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pi32Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb !11/2. */
-FNIEMOP_DEF_1(iemOp_fist_m32i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fist m32i");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(int32_t *,               pi32Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pi32Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fist_r80_to_i32, pu16Fsw, pi32Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pi32Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_I32_CONST_BY_REF(pi32Dst, INT32_MIN /* (integer indefinite) */);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pi32Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb !11/3. */
-FNIEMOP_DEF_1(iemOp_fistp_m32i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fisttp m32i");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(int32_t *,               pi32Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pi32Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fist_r80_to_i32, pu16Fsw, pi32Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pi32Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_I32_CONST_BY_REF(pi32Dst, INT32_MIN /* (integer indefinite) */);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pi32Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb !11/5. */
-FNIEMOP_DEF_1(iemOp_fld_m80r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fld m80r");
-
-    IEM_MC_BEGIN(2, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_LOCAL(RTFLOAT80U,            r80Val);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,    FpuRes, 0);
-    IEM_MC_ARG_LOCAL_REF(PCRTFLOAT80U,  pr80Val,    r80Val, 1);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_R80(r80Val, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_IS_EMPTY(7)
-        IEM_MC_CALL_FPU_AIMPL_2(iemAImpl_fld_r80_from_r80, pFpuRes, pr80Val);
-        IEM_MC_PUSH_FPU_RESULT_MEM_OP(FpuRes, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP(pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb !11/7. */
-FNIEMOP_DEF_1(iemOp_fstp_m80r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fstp m80r");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(PRTFLOAT80U,             pr80Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pr80Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fst_r80_to_r80, pu16Fsw, pr80Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pr80Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF(pr80Dst);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pr80Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 11/0. */
-FNIEMOP_DEF_1(iemOp_fcmovnb_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcmovnb st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,      pr80ValueN);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST(pr80ValueN, bRm & X86_MODRM_RM_MASK, 0)
-        IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_CF)
-            IEM_MC_STORE_FPUREG_R80_SRC_REF(0, pr80ValueN);
-        IEM_MC_ENDIF();
-        IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 11/1. */
-FNIEMOP_DEF_1(iemOp_fcmovne_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcmovne st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,      pr80ValueN);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST(pr80ValueN, bRm & X86_MODRM_RM_MASK, 0)
-        IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_ZF)
-            IEM_MC_STORE_FPUREG_R80_SRC_REF(0, pr80ValueN);
-        IEM_MC_ENDIF();
-        IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 11/2. */
-FNIEMOP_DEF_1(iemOp_fcmovnbe_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcmovnbe st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,      pr80ValueN);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST(pr80ValueN, bRm & X86_MODRM_RM_MASK, 0)
-        IEM_MC_IF_EFL_NO_BITS_SET(X86_EFL_CF | X86_EFL_ZF)
-            IEM_MC_STORE_FPUREG_R80_SRC_REF(0, pr80ValueN);
-        IEM_MC_ENDIF();
-        IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 11/3. */
-FNIEMOP_DEF_1(iemOp_fcmovnnu_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcmovnnu st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(PCRTFLOAT80U,      pr80ValueN);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST(pr80ValueN, bRm & X86_MODRM_RM_MASK, 0)
-        IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_PF)
-            IEM_MC_STORE_FPUREG_R80_SRC_REF(0, pr80ValueN);
-        IEM_MC_ENDIF();
-        IEM_MC_UPDATE_FPU_OPCODE_IP();
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 0xe0. */
-FNIEMOP_DEF(iemOp_fneni)
-{
-    IEMOP_MNEMONIC("fneni (8087/ign)");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0,0);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 0xe1. */
-FNIEMOP_DEF(iemOp_fndisi)
-{
-    IEMOP_MNEMONIC("fndisi (8087/ign)");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0,0);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 0xe2. */
-FNIEMOP_DEF(iemOp_fnclex)
-{
-    IEMOP_MNEMONIC("fnclex");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0,0);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    IEM_MC_CLEAR_FSW_EX();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 0xe3. */
-FNIEMOP_DEF(iemOp_fninit)
-{
-    IEMOP_MNEMONIC("fninit");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_finit, false /*fCheckXcpts*/);
-}
-
-
-/** Opcode 0xdb 0xe4. */
-FNIEMOP_DEF(iemOp_fnsetpm)
-{
-    IEMOP_MNEMONIC("fnsetpm (80287/ign)");   /* set protected mode on fpu. */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0,0);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdb 0xe5. */
-FNIEMOP_DEF(iemOp_frstpm)
-{
-    IEMOP_MNEMONIC("frstpm (80287XL/ign)"); /* reset pm, back to real mode. */
-#if 0 /* #UDs on newer CPUs */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0,0);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-#else
-    return IEMOP_RAISE_INVALID_OPCODE();
-#endif
-}
-
-
-/** Opcode 0xdb 11/5. */
-FNIEMOP_DEF_1(iemOp_fucomi_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fucomi st0,stN");
-    return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_fcomi_fucomi, bRm & X86_MODRM_RM_MASK, iemAImpl_fucomi_r80_by_r80, false /*fPop*/);
-}
-
-
-/** Opcode 0xdb 11/6. */
-FNIEMOP_DEF_1(iemOp_fcomi_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcomi st0,stN");
-    return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_fcomi_fucomi, bRm & X86_MODRM_RM_MASK, iemAImpl_fcomi_r80_by_r80, false /*fPop*/);
-}
-
-
-/** Opcode 0xdb. */
-FNIEMOP_DEF(iemOp_EscF3)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    pVCpu->iem.s.uFpuOpcode = RT_MAKE_U16(bRm, 0xdb & 0x7);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fcmovnb_stN,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fcmovne_stN,  bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fcmovnbe_stN, bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fcmovnnu_stN, bRm);
-            case 4:
-                switch (bRm)
-                {
-                    case 0xe0:  return FNIEMOP_CALL(iemOp_fneni);
-                    case 0xe1:  return FNIEMOP_CALL(iemOp_fndisi);
-                    case 0xe2:  return FNIEMOP_CALL(iemOp_fnclex);
-                    case 0xe3:  return FNIEMOP_CALL(iemOp_fninit);
-                    case 0xe4:  return FNIEMOP_CALL(iemOp_fnsetpm);
-                    case 0xe5:  return FNIEMOP_CALL(iemOp_frstpm);
-                    case 0xe6:  return IEMOP_RAISE_INVALID_OPCODE();
-                    case 0xe7:  return IEMOP_RAISE_INVALID_OPCODE();
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-            case 5: return FNIEMOP_CALL_1(iemOp_fucomi_stN, bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fcomi_stN,  bRm);
-            case 7: return IEMOP_RAISE_INVALID_OPCODE();
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fild_m32i,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fisttp_m32i,bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fist_m32i,  bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fistp_m32i, bRm);
-            case 4: return IEMOP_RAISE_INVALID_OPCODE();
-            case 5: return FNIEMOP_CALL_1(iemOp_fld_m80r,   bRm);
-            case 6: return IEMOP_RAISE_INVALID_OPCODE();
-            case 7: return FNIEMOP_CALL_1(iemOp_fstp_m80r,  bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/**
- * Common worker for FPU instructions working on STn and ST0, and storing the
- * result in STn unless IE, DE or ZE was raised.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpu_stN_st0, uint8_t, bRm, PFNIEMAIMPLFPUR80, pfnAImpl)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,        FpuRes,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value2,                 2);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, bRm & X86_MODRM_RM_MASK, pr80Value2, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pFpuRes, pr80Value1, pr80Value2);
-        IEM_MC_STORE_FPU_RESULT(FpuRes, bRm & X86_MODRM_RM_MASK);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(bRm & X86_MODRM_RM_MASK);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdc 11/0. */
-FNIEMOP_DEF_1(iemOp_fadd_stN_st0,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fadd stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fadd_r80_by_r80);
-}
-
-
-/** Opcode 0xdc 11/1. */
-FNIEMOP_DEF_1(iemOp_fmul_stN_st0,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fmul stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fmul_r80_by_r80);
-}
-
-
-/** Opcode 0xdc 11/4. */
-FNIEMOP_DEF_1(iemOp_fsubr_stN_st0,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsubr stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fsubr_r80_by_r80);
-}
-
-
-/** Opcode 0xdc 11/5. */
-FNIEMOP_DEF_1(iemOp_fsub_stN_st0,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsub stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fsub_r80_by_r80);
-}
-
-
-/** Opcode 0xdc 11/6. */
-FNIEMOP_DEF_1(iemOp_fdivr_stN_st0,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdivr stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fdivr_r80_by_r80);
-}
-
-
-/** Opcode 0xdc 11/7. */
-FNIEMOP_DEF_1(iemOp_fdiv_stN_st0,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdiv stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fdiv_r80_by_r80);
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and a 64-bit floating point
- * memory operand, and storing the result in ST0.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpu_ST0_m64r, uint8_t, bRm, PFNIEMAIMPLFPUR64, pfnImpl)
-{
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_LOCAL(RTFLOAT64U,            r64Factor2);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,        FpuRes,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Factor1,                1);
-    IEM_MC_ARG_LOCAL_REF(PRTFLOAT64U,   pr64Factor2,    r64Factor2, 2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_FETCH_MEM_R64(r64Factor2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Factor1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnImpl, pFpuRes, pr80Factor1, pr64Factor2);
-        IEM_MC_STORE_FPU_RESULT_MEM_OP(FpuRes, 0, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(0, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdc !11/0. */
-FNIEMOP_DEF_1(iemOp_fadd_m64r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fadd m64r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fadd_r80_by_r64);
-}
-
-
-/** Opcode 0xdc !11/1. */
-FNIEMOP_DEF_1(iemOp_fmul_m64r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fmul m64r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fmul_r80_by_r64);
-}
-
-
-/** Opcode 0xdc !11/2. */
-FNIEMOP_DEF_1(iemOp_fcom_m64r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcom st0,m64r");
-
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_LOCAL(RTFLOAT64U,            r64Val2);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(PCRTFLOAT64U,  pr64Val2,       r64Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_R64(r64Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fcom_r80_by_r64, pu16Fsw, pr80Value1, pr64Val2);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdc !11/3. */
-FNIEMOP_DEF_1(iemOp_fcomp_m64r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcomp st0,m64r");
-
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_LOCAL(RTFLOAT64U,            r64Val2);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(PCRTFLOAT64U,  pr64Val2,       r64Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_R64(r64Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fcom_r80_by_r64, pu16Fsw, pr80Value1, pr64Val2);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdc !11/4. */
-FNIEMOP_DEF_1(iemOp_fsub_m64r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsub m64r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fsub_r80_by_r64);
-}
-
-
-/** Opcode 0xdc !11/5. */
-FNIEMOP_DEF_1(iemOp_fsubr_m64r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsubr m64r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fsubr_r80_by_r64);
-}
-
-
-/** Opcode 0xdc !11/6. */
-FNIEMOP_DEF_1(iemOp_fdiv_m64r,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdiv m64r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fdiv_r80_by_r64);
-}
-
-
-/** Opcode 0xdc !11/7. */
-FNIEMOP_DEF_1(iemOp_fdivr_m64r, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdivr m64r");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fdivr_r80_by_r64);
-}
-
-
-/** Opcode 0xdc. */
-FNIEMOP_DEF(iemOp_EscF4)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    pVCpu->iem.s.uFpuOpcode = RT_MAKE_U16(bRm, 0xdc & 0x7);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fadd_stN_st0,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fmul_stN_st0,  bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fcom_stN,      bRm); /* Marked reserved, intel behavior is that of FCOM ST(i). */
-            case 3: return FNIEMOP_CALL_1(iemOp_fcomp_stN,     bRm); /* Marked reserved, intel behavior is that of FCOMP ST(i). */
-            case 4: return FNIEMOP_CALL_1(iemOp_fsubr_stN_st0, bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fsub_stN_st0,  bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fdivr_stN_st0, bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fdiv_stN_st0,  bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fadd_m64r,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fmul_m64r,  bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fcom_m64r,  bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fcomp_m64r, bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_fsub_m64r,  bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fsubr_m64r, bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fdiv_m64r,  bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fdivr_m64r, bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xdd !11/0.
- * @sa iemOp_fld_m32r */
-FNIEMOP_DEF_1(iemOp_fld_m64r,    uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fld m64r");
-
-    IEM_MC_BEGIN(2, 3);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_LOCAL(RTFLOAT64U,            r64Val);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,    FpuRes, 0);
-    IEM_MC_ARG_LOCAL_REF(PCRTFLOAT64U,  pr64Val,    r64Val, 1);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_FETCH_MEM_R64(r64Val, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_IS_EMPTY(7)
-        IEM_MC_CALL_FPU_AIMPL_2(iemAImpl_fld_r64_to_r80, pFpuRes, pr64Val);
-        IEM_MC_PUSH_FPU_RESULT_MEM_OP(FpuRes, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP(pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdd !11/0. */
-FNIEMOP_DEF_1(iemOp_fisttp_m64i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fisttp m64i");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(int64_t *,               pi64Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pi64Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fistt_r80_to_i64, pu16Fsw, pi64Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pi64Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_I64_CONST_BY_REF(pi64Dst, INT64_MIN /* (integer indefinite) */);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pi64Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdd !11/0. */
-FNIEMOP_DEF_1(iemOp_fst_m64r,    uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fst m64r");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(PRTFLOAT64U,             pr64Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pr64Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fst_r80_to_r64, pu16Fsw, pr64Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pr64Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF(pr64Dst);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pr64Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-
-
-/** Opcode 0xdd !11/0. */
-FNIEMOP_DEF_1(iemOp_fstp_m64r,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fstp m64r");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(PRTFLOAT64U,             pr64Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pr64Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fst_r80_to_r64, pu16Fsw, pr64Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pr64Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF(pr64Dst);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pr64Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdd !11/0. */
-FNIEMOP_DEF_1(iemOp_frstor,      uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("frstor m94/108byte");
-    IEM_MC_BEGIN(3, 0);
-    IEM_MC_ARG_CONST(IEMMODE,           enmEffOpSize, /*=*/ pVCpu->iem.s.enmEffOpSize,  0);
-    IEM_MC_ARG(uint8_t,                 iEffSeg,                                    1);
-    IEM_MC_ARG(RTGCPTR,                 GCPtrEffSrc,                                2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_frstor, enmEffOpSize, iEffSeg, GCPtrEffSrc);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdd !11/0. */
-FNIEMOP_DEF_1(iemOp_fnsave,      uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fnsave m94/108byte");
-    IEM_MC_BEGIN(3, 0);
-    IEM_MC_ARG_CONST(IEMMODE,           enmEffOpSize, /*=*/ pVCpu->iem.s.enmEffOpSize,  0);
-    IEM_MC_ARG(uint8_t,                 iEffSeg,                                    1);
-    IEM_MC_ARG(RTGCPTR,                 GCPtrEffDst,                                2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_fnsave, enmEffOpSize, iEffSeg, GCPtrEffDst);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-
-}
-
-/** Opcode 0xdd !11/0. */
-FNIEMOP_DEF_1(iemOp_fnstsw,      uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fnstsw m16");
-
-    IEM_MC_BEGIN(0, 2);
-    IEM_MC_LOCAL(uint16_t, u16Tmp);
-    IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-    IEM_MC_FETCH_FSW(u16Tmp);
-    IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Tmp);
-    IEM_MC_ADVANCE_RIP();
-
-/** @todo Debug / drop a hint to the verifier that things may differ
- * from REM. Seen 0x4020 (iem) vs 0x4000 (rem) at 0008:801c6b88 booting
- * NT4SP1. (X86_FSW_PE) */
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdd 11/0. */
-FNIEMOP_DEF_1(iemOp_ffree_stN,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("ffree stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    /* Note! C0, C1, C2 and C3 are documented as undefined, we leave the
-             unmodified. */
-
-    IEM_MC_BEGIN(0, 0);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    IEM_MC_FPU_STACK_FREE(bRm & X86_MODRM_RM_MASK);
-    IEM_MC_UPDATE_FPU_OPCODE_IP();
-
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdd 11/1. */
-FNIEMOP_DEF_1(iemOp_fst_stN,     uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fst st0,stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 2);
-    IEM_MC_LOCAL(PCRTFLOAT80U,          pr80Value);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_SET_FPU_RESULT(FpuRes, 0 /*FSW*/, pr80Value);
-        IEM_MC_STORE_FPU_RESULT(FpuRes, bRm & X86_MODRM_RM_MASK);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(bRm & X86_MODRM_RM_MASK);
-    IEM_MC_ENDIF();
-
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdd 11/3. */
-FNIEMOP_DEF_1(iemOp_fucom_stN_st0, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcom st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpuNoStore_st0_stN, bRm, iemAImpl_fucom_r80_by_r80);
-}
-
-
-/** Opcode 0xdd 11/4. */
-FNIEMOP_DEF_1(iemOp_fucomp_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcomp st0,stN");
-    return FNIEMOP_CALL_2(iemOpHlpFpuNoStore_st0_stN_pop, bRm, iemAImpl_fucom_r80_by_r80);
-}
-
-
-/** Opcode 0xdd. */
-FNIEMOP_DEF(iemOp_EscF5)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    pVCpu->iem.s.uFpuOpcode = RT_MAKE_U16(bRm, 0xdd & 0x7);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_ffree_stN,   bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fxch_stN,    bRm); /* Reserved, intel behavior is that of XCHG ST(i). */
-            case 2: return FNIEMOP_CALL_1(iemOp_fst_stN,     bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fstp_stN,    bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_fucom_stN_st0,bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fucomp_stN,  bRm);
-            case 6: return IEMOP_RAISE_INVALID_OPCODE();
-            case 7: return IEMOP_RAISE_INVALID_OPCODE();
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fld_m64r,    bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fisttp_m64i, bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fst_m64r,    bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fstp_m64r,   bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_frstor,      bRm);
-            case 5: return IEMOP_RAISE_INVALID_OPCODE();
-            case 6: return FNIEMOP_CALL_1(iemOp_fnsave,      bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fnstsw,      bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xde 11/0. */
-FNIEMOP_DEF_1(iemOp_faddp_stN_st0, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("faddp stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fadd_r80_by_r80);
-}
-
-
-/** Opcode 0xde 11/0. */
-FNIEMOP_DEF_1(iemOp_fmulp_stN_st0, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fmulp stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fmul_r80_by_r80);
-}
-
-
-/** Opcode 0xde 0xd9. */
-FNIEMOP_DEF(iemOp_fcompp)
-{
-    IEMOP_MNEMONIC("fucompp st0,stN");
-    return FNIEMOP_CALL_1(iemOpHlpFpuNoStore_st0_stN_pop_pop, iemAImpl_fcom_r80_by_r80);
-}
-
-
-/** Opcode 0xde 11/4. */
-FNIEMOP_DEF_1(iemOp_fsubrp_stN_st0, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsubrp stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fsubr_r80_by_r80);
-}
-
-
-/** Opcode 0xde 11/5. */
-FNIEMOP_DEF_1(iemOp_fsubp_stN_st0, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fsubp stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fsub_r80_by_r80);
-}
-
-
-/** Opcode 0xde 11/6. */
-FNIEMOP_DEF_1(iemOp_fdivrp_stN_st0, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdivrp stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fdivr_r80_by_r80);
-}
-
-
-/** Opcode 0xde 11/7. */
-FNIEMOP_DEF_1(iemOp_fdivp_stN_st0, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fdivp stN,st0");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fdiv_r80_by_r80);
-}
-
-
-/**
- * Common worker for FPU instructions working on ST0 and an m16i, and storing
- * the result in ST0.
- *
- * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_2(iemOpHlpFpu_st0_m16i, uint8_t, bRm, PFNIEMAIMPLFPUI16, pfnAImpl)
-{
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,              FpuRes);
-    IEM_MC_LOCAL(int16_t,                   i16Val2);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT,     pFpuRes,        FpuRes,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(int16_t const *,   pi16Val2,       i16Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I16(i16Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pFpuRes, pr80Value1, pi16Val2);
-        IEM_MC_STORE_FPU_RESULT(FpuRes, 0);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW(0);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xde !11/0. */
-FNIEMOP_DEF_1(iemOp_fiadd_m16i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fiadd m16i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fiadd_r80_by_i16);
-}
-
-
-/** Opcode 0xde !11/1. */
-FNIEMOP_DEF_1(iemOp_fimul_m16i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fimul m16i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fimul_r80_by_i16);
-}
-
-
-/** Opcode 0xde !11/2. */
-FNIEMOP_DEF_1(iemOp_ficom_m16i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("ficom st0,m16i");
-
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(uint16_t,                  u16Fsw);
-    IEM_MC_LOCAL(int16_t,                   i16Val2);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,        pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(int16_t const *,   pi16Val2,       i16Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I16(i16Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_ficom_r80_by_i16, pu16Fsw, pr80Value1, pi16Val2);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xde !11/3. */
-FNIEMOP_DEF_1(iemOp_ficomp_m16i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("ficomp st0,m16i");
-
-    IEM_MC_BEGIN(3, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(uint16_t,                  u16Fsw);
-    IEM_MC_LOCAL(int16_t,                   i16Val2);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,        pu16Fsw,        u16Fsw,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value1,                 1);
-    IEM_MC_ARG_LOCAL_REF(int16_t const *,   pi16Val2,       i16Val2,    2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I16(i16Val2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value1, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_ficom_r80_by_i16, pu16Fsw, pr80Value1, pi16Val2);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xde !11/4. */
-FNIEMOP_DEF_1(iemOp_fisub_m16i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fisub m16i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fisub_r80_by_i16);
-}
-
-
-/** Opcode 0xde !11/5. */
-FNIEMOP_DEF_1(iemOp_fisubr_m16i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fisubr m16i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fisubr_r80_by_i16);
-}
-
-
-/** Opcode 0xde !11/6. */
-FNIEMOP_DEF_1(iemOp_fidiv_m16i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fiadd m16i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fidiv_r80_by_i16);
-}
-
-
-/** Opcode 0xde !11/7. */
-FNIEMOP_DEF_1(iemOp_fidivr_m16i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fiadd m16i");
-    return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fidivr_r80_by_i16);
-}
-
-
-/** Opcode 0xde. */
-FNIEMOP_DEF(iemOp_EscF6)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    pVCpu->iem.s.uFpuOpcode = RT_MAKE_U16(bRm, 0xde & 0x7);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_faddp_stN_st0, bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fmulp_stN_st0, bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fcomp_stN, bRm);
-            case 3: if (bRm == 0xd9)
-                        return FNIEMOP_CALL(iemOp_fcompp);
-                    return IEMOP_RAISE_INVALID_OPCODE();
-            case 4: return FNIEMOP_CALL_1(iemOp_fsubrp_stN_st0, bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fsubp_stN_st0, bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fdivrp_stN_st0, bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fdivp_stN_st0, bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fiadd_m16i,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fimul_m16i,  bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_ficom_m16i,  bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_ficomp_m16i, bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_fisub_m16i,  bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fisubr_m16i, bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fidiv_m16i,  bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fidivr_m16i, bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xdf 11/0.
- * Undocument instruction, assumed to work like ffree + fincstp.  */
-FNIEMOP_DEF_1(iemOp_ffreep_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("ffreep stN");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 0);
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-    IEM_MC_FPU_STACK_FREE(bRm & X86_MODRM_RM_MASK);
-    IEM_MC_FPU_STACK_INC_TOP();
-    IEM_MC_UPDATE_FPU_OPCODE_IP();
-
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdf 0xe0. */
-FNIEMOP_DEF(iemOp_fnstsw_ax)
-{
-    IEMOP_MNEMONIC("fnstsw ax");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(uint16_t, u16Tmp);
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-    IEM_MC_FETCH_FSW(u16Tmp);
-    IEM_MC_STORE_GREG_U16(X86_GREG_xAX, u16Tmp);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdf 11/5. */
-FNIEMOP_DEF_1(iemOp_fucomip_st0_stN, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcomip st0,stN");
-    return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_fcomi_fucomi, bRm & X86_MODRM_RM_MASK, iemAImpl_fcomi_r80_by_r80, true /*fPop*/);
-}
-
-
-/** Opcode 0xdf 11/6. */
-FNIEMOP_DEF_1(iemOp_fcomip_st0_stN,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fcomip st0,stN");
-    return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_fcomi_fucomi, bRm & X86_MODRM_RM_MASK, iemAImpl_fcomi_r80_by_r80, true /*fPop*/);
-}
-
-
-/** Opcode 0xdf !11/0. */
-FNIEMOP_DEF_1(iemOp_fild_m16i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fild m16i");
-
-    IEM_MC_BEGIN(2, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,              FpuRes);
-    IEM_MC_LOCAL(int16_t,                   i16Val);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT,     pFpuRes,    FpuRes, 0);
-    IEM_MC_ARG_LOCAL_REF(int16_t const *,   pi16Val,    i16Val, 1);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I16(i16Val, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_IS_EMPTY(7)
-        IEM_MC_CALL_FPU_AIMPL_2(iemAImpl_fild_i16_to_r80, pFpuRes, pi16Val);
-        IEM_MC_PUSH_FPU_RESULT_MEM_OP(FpuRes, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP(pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdf !11/1. */
-FNIEMOP_DEF_1(iemOp_fisttp_m16i, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fisttp m16i");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(int16_t *,               pi16Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pi16Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fistt_r80_to_i16, pu16Fsw, pi16Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pi16Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_I16_CONST_BY_REF(pi16Dst, INT16_MIN /* (integer indefinite) */);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pi16Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdf !11/2. */
-FNIEMOP_DEF_1(iemOp_fist_m16i,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fistp m16i");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(int16_t *,               pi16Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pi16Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fist_r80_to_i16, pu16Fsw, pi16Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pi16Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_I16_CONST_BY_REF(pi16Dst, INT16_MIN /* (integer indefinite) */);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pi16Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdf !11/3. */
-FNIEMOP_DEF_1(iemOp_fistp_m16i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fistp m16i");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(int16_t *,               pi16Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pi16Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fist_r80_to_i16, pu16Fsw, pi16Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pi16Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_I16_CONST_BY_REF(pi16Dst, INT16_MIN /* (integer indefinite) */);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pi16Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdf !11/4. */
-FNIEMOP_STUB_1(iemOp_fbld_m80d,   uint8_t, bRm);
-
-
-/** Opcode 0xdf !11/5. */
-FNIEMOP_DEF_1(iemOp_fild_m64i,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fild m64i");
-
-    IEM_MC_BEGIN(2, 3);
-    IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-    IEM_MC_LOCAL(IEMFPURESULT,              FpuRes);
-    IEM_MC_LOCAL(int64_t,                   i64Val);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT,     pFpuRes,    FpuRes, 0);
-    IEM_MC_ARG_LOCAL_REF(int64_t const *,   pi64Val,    i64Val, 1);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-    IEM_MC_FETCH_MEM_I64(i64Val, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_IS_EMPTY(7)
-        IEM_MC_CALL_FPU_AIMPL_2(iemAImpl_fild_i64_to_r80, pFpuRes, pi64Val);
-        IEM_MC_PUSH_FPU_RESULT_MEM_OP(FpuRes, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP(pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdf !11/6. */
-FNIEMOP_STUB_1(iemOp_fbstp_m80d,  uint8_t, bRm);
-
-
-/** Opcode 0xdf !11/7. */
-FNIEMOP_DEF_1(iemOp_fistp_m64i,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fistp m64i");
-    IEM_MC_BEGIN(3, 2);
-    IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-    IEM_MC_LOCAL(uint16_t,              u16Fsw);
-    IEM_MC_ARG_LOCAL_REF(uint16_t *,    pu16Fsw,    u16Fsw, 0);
-    IEM_MC_ARG(int64_t *,               pi64Dst,            1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value,          2);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
-    IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
-    IEM_MC_MEM_MAP(pi64Dst, IEM_ACCESS_DATA_W, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1 /*arg*/);
-    IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(iemAImpl_fist_r80_to_i64, pu16Fsw, pi64Dst, pr80Value);
-        IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE(pi64Dst, IEM_ACCESS_DATA_W, u16Fsw);
-        IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP(u16Fsw, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ELSE()
-        IEM_MC_IF_FCW_IM()
-            IEM_MC_STORE_MEM_I64_CONST_BY_REF(pi64Dst, INT64_MIN /* (integer indefinite) */);
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pi64Dst, IEM_ACCESS_DATA_W);
-        IEM_MC_ENDIF();
-        IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP(UINT8_MAX, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xdf. */
-FNIEMOP_DEF(iemOp_EscF7)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_ffreep_stN, bRm); /* ffree + pop afterwards, since forever according to AMD. */
-            case 1: return FNIEMOP_CALL_1(iemOp_fxch_stN,   bRm); /* Reserved, behaves like FXCH ST(i) on intel. */
-            case 2: return FNIEMOP_CALL_1(iemOp_fstp_stN,   bRm); /* Reserved, behaves like FSTP ST(i) on intel. */
-            case 3: return FNIEMOP_CALL_1(iemOp_fstp_stN,   bRm); /* Reserved, behaves like FSTP ST(i) on intel. */
-            case 4: if (bRm == 0xe0)
-                        return FNIEMOP_CALL(iemOp_fnstsw_ax);
-                    return IEMOP_RAISE_INVALID_OPCODE();
-            case 5: return FNIEMOP_CALL_1(iemOp_fucomip_st0_stN, bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fcomip_st0_stN,  bRm);
-            case 7: return IEMOP_RAISE_INVALID_OPCODE();
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_fild_m16i,   bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_fisttp_m16i, bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_fist_m16i,   bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_fistp_m16i,  bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_fbld_m80d,   bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_fild_m64i,   bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_fbstp_m80d,  bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_fistp_m64i,  bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xe0. */
-FNIEMOP_DEF(iemOp_loopne_Jb)
-{
-    IEMOP_MNEMONIC("loopne Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_SUB_GREG_U16(X86_GREG_xCX, 1);
-            IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(X86_EFL_ZF) {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ELSE() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_SUB_GREG_U32(X86_GREG_xCX, 1);
-            IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(X86_EFL_ZF) {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ELSE() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_SUB_GREG_U64(X86_GREG_xCX, 1);
-            IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(X86_EFL_ZF) {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ELSE() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0xe1. */
-FNIEMOP_DEF(iemOp_loope_Jb)
-{
-    IEMOP_MNEMONIC("loope Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_SUB_GREG_U16(X86_GREG_xCX, 1);
-            IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(X86_EFL_ZF) {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ELSE() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_SUB_GREG_U32(X86_GREG_xCX, 1);
-            IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(X86_EFL_ZF) {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ELSE() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_SUB_GREG_U64(X86_GREG_xCX, 1);
-            IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(X86_EFL_ZF) {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ELSE() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0xe2. */
-FNIEMOP_DEF(iemOp_loop_Jb)
-{
-    IEMOP_MNEMONIC("loop Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    /** @todo Check out the #GP case if EIP < CS.Base or EIP > CS.Limit when
-     * using the 32-bit operand size override.  How can that be restarted?  See
-     * weird pseudo code in intel manual. */
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0,0);
-            if (-(int8_t)IEM_GET_INSTR_LEN(pVCpu) != i8Imm)
-            {
-                IEM_MC_SUB_GREG_U16(X86_GREG_xCX, 1);
-                IEM_MC_IF_CX_IS_NZ() {
-                    IEM_MC_REL_JMP_S8(i8Imm);
-                } IEM_MC_ELSE() {
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ENDIF();
-            }
-            else
-            {
-                IEM_MC_STORE_GREG_U16_CONST(X86_GREG_xCX, 0);
-                IEM_MC_ADVANCE_RIP();
-            }
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0,0);
-            if (-(int8_t)IEM_GET_INSTR_LEN(pVCpu) != i8Imm)
-            {
-                IEM_MC_SUB_GREG_U32(X86_GREG_xCX, 1);
-                IEM_MC_IF_ECX_IS_NZ() {
-                    IEM_MC_REL_JMP_S8(i8Imm);
-                } IEM_MC_ELSE() {
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ENDIF();
-            }
-            else
-            {
-                IEM_MC_STORE_GREG_U32_CONST(X86_GREG_xCX, 0);
-                IEM_MC_ADVANCE_RIP();
-            }
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0,0);
-            if (-(int8_t)IEM_GET_INSTR_LEN(pVCpu) != i8Imm)
-            {
-                IEM_MC_SUB_GREG_U64(X86_GREG_xCX, 1);
-                IEM_MC_IF_RCX_IS_NZ() {
-                    IEM_MC_REL_JMP_S8(i8Imm);
-                } IEM_MC_ELSE() {
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ENDIF();
-            }
-            else
-            {
-                IEM_MC_STORE_GREG_U64_CONST(X86_GREG_xCX, 0);
-                IEM_MC_ADVANCE_RIP();
-            }
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0xe3. */
-FNIEMOP_DEF(iemOp_jecxz_Jb)
-{
-    IEMOP_MNEMONIC("jecxz Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    switch (pVCpu->iem.s.enmEffAddrMode)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_IF_CX_IS_NZ() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ELSE() {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_IF_ECX_IS_NZ() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ELSE() {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0,0);
-            IEM_MC_IF_RCX_IS_NZ() {
-                IEM_MC_ADVANCE_RIP();
-            } IEM_MC_ELSE() {
-                IEM_MC_REL_JMP_S8(i8Imm);
-            } IEM_MC_ENDIF();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0xe4 */
-FNIEMOP_DEF(iemOp_in_AL_Ib)
-{
-    IEMOP_MNEMONIC("in eAX,Ib");
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_in, u8Imm, 1);
-}
-
-
-/** Opcode 0xe5 */
-FNIEMOP_DEF(iemOp_in_eAX_Ib)
-{
-    IEMOP_MNEMONIC("in eAX,Ib");
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_in, u8Imm, pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
-}
-
-
-/** Opcode 0xe6 */
-FNIEMOP_DEF(iemOp_out_Ib_AL)
-{
-    IEMOP_MNEMONIC("out Ib,AL");
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_out, u8Imm, 1);
-}
-
-
-/** Opcode 0xe7 */
-FNIEMOP_DEF(iemOp_out_Ib_eAX)
-{
-    IEMOP_MNEMONIC("out Ib,eAX");
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_out, u8Imm, pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
-}
-
-
-/** Opcode 0xe8. */
-FNIEMOP_DEF(iemOp_call_Jv)
-{
-    IEMOP_MNEMONIC("call Jv");
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-        {
-            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-            return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_call_rel_16, (int16_t)u16Imm);
-        }
-
-        case IEMMODE_32BIT:
-        {
-            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-            return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_call_rel_32, (int32_t)u32Imm);
-        }
-
-        case IEMMODE_64BIT:
-        {
-            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-            return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_call_rel_64, u64Imm);
-        }
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0xe9. */
-FNIEMOP_DEF(iemOp_jmp_Jv)
-{
-    IEMOP_MNEMONIC("jmp Jv");
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-        {
-            int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-            IEM_MC_BEGIN(0, 0);
-            IEM_MC_REL_JMP_S16(i16Imm);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-        }
-
-        case IEMMODE_64BIT:
-        case IEMMODE_32BIT:
-        {
-            int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-            IEM_MC_BEGIN(0, 0);
-            IEM_MC_REL_JMP_S32(i32Imm);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-        }
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0xea. */
-FNIEMOP_DEF(iemOp_jmp_Ap)
-{
-    IEMOP_MNEMONIC("jmp Ap");
-    IEMOP_HLP_NO_64BIT();
-
-    /* Decode the far pointer address and pass it on to the far call C implementation. */
-    uint32_t offSeg;
-    if (pVCpu->iem.s.enmEffOpSize != IEMMODE_16BIT)
-        IEM_OPCODE_GET_NEXT_U32(&offSeg);
-    else
-        IEM_OPCODE_GET_NEXT_U16_ZX_U32(&offSeg);
-    uint16_t uSel;  IEM_OPCODE_GET_NEXT_U16(&uSel);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_FarJmp, uSel, offSeg, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0xeb. */
-FNIEMOP_DEF(iemOp_jmp_Jb)
-{
-    IEMOP_MNEMONIC("jmp Jb");
-    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_REL_JMP_S8(i8Imm);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xec */
-FNIEMOP_DEF(iemOp_in_AL_DX)
-{
-    IEMOP_MNEMONIC("in  AL,DX");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_in_eAX_DX, 1);
-}
-
-
-/** Opcode 0xed */
-FNIEMOP_DEF(iemOp_eAX_DX)
-{
-    IEMOP_MNEMONIC("in  eAX,DX");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_in_eAX_DX, pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
-}
-
-
-/** Opcode 0xee */
-FNIEMOP_DEF(iemOp_out_DX_AL)
-{
-    IEMOP_MNEMONIC("out DX,AL");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_out_DX_eAX, 1);
-}
-
-
-/** Opcode 0xef */
-FNIEMOP_DEF(iemOp_out_DX_eAX)
-{
-    IEMOP_MNEMONIC("out DX,eAX");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_out_DX_eAX, pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
-}
-
-
-/** Opcode 0xf0. */
-FNIEMOP_DEF(iemOp_lock)
-{
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("lock");
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_LOCK;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0xf1. */
-FNIEMOP_DEF(iemOp_int_1)
-{
-    IEMOP_MNEMONIC("int1"); /* icebp */
-    IEMOP_HLP_MIN_386(); /** @todo does not generate #UD on 286, or so they say... */
-    /** @todo testcase! */
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_int, X86_XCPT_DB, false /*fIsBpInstr*/);
-}
-
-
-/** Opcode 0xf2. */
-FNIEMOP_DEF(iemOp_repne)
-{
-    /* This overrides any previous REPE prefix. */
-    pVCpu->iem.s.fPrefixes &= ~IEM_OP_PRF_REPZ;
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("repne");
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REPNZ;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0xf3. */
-FNIEMOP_DEF(iemOp_repe)
-{
-    /* This overrides any previous REPNE prefix. */
-    pVCpu->iem.s.fPrefixes &= ~IEM_OP_PRF_REPNZ;
-    IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("repe");
-    pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REPZ;
-
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
-}
-
-
-/** Opcode 0xf4. */
-FNIEMOP_DEF(iemOp_hlt)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_hlt);
-}
-
-
-/** Opcode 0xf5. */
-FNIEMOP_DEF(iemOp_cmc)
-{
-    IEMOP_MNEMONIC("cmc");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_FLIP_EFL_BIT(X86_EFL_CF);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common implementation of 'inc/dec/not/neg Eb'.
- *
- * @param   bRm             The RM byte.
- * @param   pImpl           The instruction implementation.
- */
-FNIEMOP_DEF_2(iemOpCommonUnaryEb, uint8_t, bRm, PCIEMOPUNARYSIZES, pImpl)
-{
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register access */
-        IEM_MC_BEGIN(2, 0);
-        IEM_MC_ARG(uint8_t *,   pu8Dst, 0);
-        IEM_MC_ARG(uint32_t *,  pEFlags, 1);
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU8, pu8Dst, pEFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory access. */
-        IEM_MC_BEGIN(2, 2);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,          0);
-        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU8, pu8Dst, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnLockedU8, pu8Dst, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common implementation of 'inc/dec/not/neg Ev'.
- *
- * @param   bRm             The RM byte.
- * @param   pImpl           The instruction implementation.
- */
-FNIEMOP_DEF_2(iemOpCommonUnaryEv, uint8_t, bRm, PCIEMOPUNARYSIZES, pImpl)
-{
-    /* Registers are handled by a common worker. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, pImpl, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-
-    /* Memory we do here. */
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(2, 2);
-            IEM_MC_ARG(uint16_t *,      pu16Dst,         0);
-            IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags, 1);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-            IEM_MC_FETCH_EFLAGS(EFlags);
-            if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU16, pu16Dst, pEFlags);
-            else
-                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnLockedU16, pu16Dst, pEFlags);
-
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-            IEM_MC_COMMIT_EFLAGS(EFlags);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(2, 2);
-            IEM_MC_ARG(uint32_t *,      pu32Dst,         0);
-            IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags, 1);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-            IEM_MC_FETCH_EFLAGS(EFlags);
-            if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU32, pu32Dst, pEFlags);
-            else
-                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnLockedU32, pu32Dst, pEFlags);
-
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-            IEM_MC_COMMIT_EFLAGS(EFlags);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(2, 2);
-            IEM_MC_ARG(uint64_t *,      pu64Dst,         0);
-            IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags, 1);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-            IEM_MC_FETCH_EFLAGS(EFlags);
-            if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU64, pu64Dst, pEFlags);
-            else
-                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnLockedU64, pu64Dst, pEFlags);
-
-            IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-            IEM_MC_COMMIT_EFLAGS(EFlags);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0xf6 /0. */
-FNIEMOP_DEF_1(iemOp_grp3_test_Eb, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("test Eb,Ib");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register access */
-        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
-        IEM_MC_ARG_CONST(uint8_t,   u8Src,/*=*/u8Imm,   1);
-        IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u8, pu8Dst, u8Src, pEFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory access. */
-        IEM_MC_BEGIN(3, 2);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
-        IEM_MC_ARG(uint8_t,         u8Src,              1);
-        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,    2);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-        IEM_MC_ASSIGN(u8Src, u8Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_R, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u8, pu8Dst, u8Src, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_R);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xf7 /0. */
-FNIEMOP_DEF_1(iemOp_grp3_test_Ev, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("test Ev,Iv");
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register access */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/u16Imm,     1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_32BIT:
-            {
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/u32Imm,     1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u32, pu32Dst, u32Src, pEFlags);
-                /* No clearing the high dword here - test doesn't write back the result. */
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_64BIT:
-            {
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/u64Imm,     1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory access. */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,            0);
-                IEM_MC_ARG(uint16_t,        u16Src,             1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,    2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 2);
-                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-                IEM_MC_ASSIGN(u16Src, u16Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_R, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_R);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_32BIT:
-            {
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,            0);
-                IEM_MC_ARG(uint32_t,        u32Src,             1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,    2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 4);
-                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-                IEM_MC_ASSIGN(u32Src, u32Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_R, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_R);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_64BIT:
-            {
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,            0);
-                IEM_MC_ARG(uint64_t,        u64Src,             1);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,    2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 4);
-                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-                IEM_MC_ASSIGN(u64Src, u64Imm);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_R, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_R);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0xf6 /4, /5, /6 and /7. */
-FNIEMOP_DEF_2(iemOpCommonGrp3MulDivEb, uint8_t, bRm, PFNIEMAIMPLMULDIVU8, pfnU8)
-{
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register access */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(3, 1);
-        IEM_MC_ARG(uint16_t *,      pu16AX,     0);
-        IEM_MC_ARG(uint8_t,         u8Value,    1);
-        IEM_MC_ARG(uint32_t *,      pEFlags,    2);
-        IEM_MC_LOCAL(int32_t,       rc);
-
-        IEM_MC_FETCH_GREG_U8(u8Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_GREG_U16(pu16AX, X86_GREG_xAX);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_AIMPL_3(rc, pfnU8, pu16AX, u8Value, pEFlags);
-        IEM_MC_IF_LOCAL_IS_Z(rc) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_RAISE_DIVIDE_ERROR();
-        } IEM_MC_ENDIF();
-
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory access. */
-        IEM_MC_BEGIN(3, 2);
-        IEM_MC_ARG(uint16_t *,      pu16AX,     0);
-        IEM_MC_ARG(uint8_t,         u8Value,    1);
-        IEM_MC_ARG(uint32_t *,      pEFlags,    2);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_LOCAL(int32_t,       rc);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_MEM_U8(u8Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-        IEM_MC_REF_GREG_U16(pu16AX, X86_GREG_xAX);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_AIMPL_3(rc, pfnU8, pu16AX, u8Value, pEFlags);
-        IEM_MC_IF_LOCAL_IS_Z(rc) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_RAISE_DIVIDE_ERROR();
-        } IEM_MC_ENDIF();
-
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xf7 /4, /5, /6 and /7. */
-FNIEMOP_DEF_2(iemOpCommonGrp3MulDivEv, uint8_t, bRm, PCIEMOPMULDIVSIZES, pImpl)
-{
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register access */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(4, 1);
-                IEM_MC_ARG(uint16_t *,      pu16AX,     0);
-                IEM_MC_ARG(uint16_t *,      pu16DX,     1);
-                IEM_MC_ARG(uint16_t,        u16Value,   2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
-                IEM_MC_LOCAL(int32_t,       rc);
-
-                IEM_MC_FETCH_GREG_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U16(pu16AX, X86_GREG_xAX);
-                IEM_MC_REF_GREG_U16(pu16DX, X86_GREG_xDX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU16, pu16AX, pu16DX, u16Value, pEFlags);
-                IEM_MC_IF_LOCAL_IS_Z(rc) {
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ELSE() {
-                    IEM_MC_RAISE_DIVIDE_ERROR();
-                } IEM_MC_ENDIF();
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_32BIT:
-            {
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(4, 1);
-                IEM_MC_ARG(uint32_t *,      pu32AX,     0);
-                IEM_MC_ARG(uint32_t *,      pu32DX,     1);
-                IEM_MC_ARG(uint32_t,        u32Value,   2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
-                IEM_MC_LOCAL(int32_t,       rc);
-
-                IEM_MC_FETCH_GREG_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U32(pu32AX, X86_GREG_xAX);
-                IEM_MC_REF_GREG_U32(pu32DX, X86_GREG_xDX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU32, pu32AX, pu32DX, u32Value, pEFlags);
-                IEM_MC_IF_LOCAL_IS_Z(rc) {
-                    IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32AX);
-                    IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32DX);
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ELSE() {
-                    IEM_MC_RAISE_DIVIDE_ERROR();
-                } IEM_MC_ENDIF();
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_64BIT:
-            {
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(4, 1);
-                IEM_MC_ARG(uint64_t *,      pu64AX,     0);
-                IEM_MC_ARG(uint64_t *,      pu64DX,     1);
-                IEM_MC_ARG(uint64_t,        u64Value,   2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
-                IEM_MC_LOCAL(int32_t,       rc);
-
-                IEM_MC_FETCH_GREG_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U64(pu64AX, X86_GREG_xAX);
-                IEM_MC_REF_GREG_U64(pu64DX, X86_GREG_xDX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU64, pu64AX, pu64DX, u64Value, pEFlags);
-                IEM_MC_IF_LOCAL_IS_Z(rc) {
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ELSE() {
-                    IEM_MC_RAISE_DIVIDE_ERROR();
-                } IEM_MC_ENDIF();
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory access. */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint16_t *,      pu16AX,     0);
-                IEM_MC_ARG(uint16_t *,      pu16DX,     1);
-                IEM_MC_ARG(uint16_t,        u16Value,   2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_LOCAL(int32_t,       rc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_GREG_U16(pu16AX, X86_GREG_xAX);
-                IEM_MC_REF_GREG_U16(pu16DX, X86_GREG_xDX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU16, pu16AX, pu16DX, u16Value, pEFlags);
-                IEM_MC_IF_LOCAL_IS_Z(rc) {
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ELSE() {
-                    IEM_MC_RAISE_DIVIDE_ERROR();
-                } IEM_MC_ENDIF();
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_32BIT:
-            {
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint32_t *,      pu32AX,     0);
-                IEM_MC_ARG(uint32_t *,      pu32DX,     1);
-                IEM_MC_ARG(uint32_t,        u32Value,   2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_LOCAL(int32_t,       rc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_GREG_U32(pu32AX, X86_GREG_xAX);
-                IEM_MC_REF_GREG_U32(pu32DX, X86_GREG_xDX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU32, pu32AX, pu32DX, u32Value, pEFlags);
-                IEM_MC_IF_LOCAL_IS_Z(rc) {
-                    IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32AX);
-                    IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32DX);
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ELSE() {
-                    IEM_MC_RAISE_DIVIDE_ERROR();
-                } IEM_MC_ENDIF();
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_64BIT:
-            {
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint64_t *,      pu64AX,     0);
-                IEM_MC_ARG(uint64_t *,      pu64DX,     1);
-                IEM_MC_ARG(uint64_t,        u64Value,   2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_LOCAL(int32_t,       rc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_GREG_U64(pu64AX, X86_GREG_xAX);
-                IEM_MC_REF_GREG_U64(pu64DX, X86_GREG_xDX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU64, pu64AX, pu64DX, u64Value, pEFlags);
-                IEM_MC_IF_LOCAL_IS_Z(rc) {
-                    IEM_MC_ADVANCE_RIP();
-                } IEM_MC_ELSE() {
-                    IEM_MC_RAISE_DIVIDE_ERROR();
-                } IEM_MC_ENDIF();
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-/** Opcode 0xf6. */
-FNIEMOP_DEF(iemOp_Grp3_Eb)
+/** Invalid with RM byte where both AMD and Intel decodes any additional
+ *  address encoding bytes. */
+FNIEMOPRM_DEF(iemOp_InvalidWithRMAllNeeded)
 {
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    IEMOP_MNEMONIC(InvalidWithRMAllNeeded, "InvalidWithRMAllNeeded");
+#ifndef TST_IEM_CHECK_MC
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
     {
-        case 0:
-            return FNIEMOP_CALL_1(iemOp_grp3_test_Eb, bRm);
-        case 1:
-/** @todo testcase: Present on <=386, most 486 (not early), Pentiums, and current CPUs too. CPUUNDOC.EXE */
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 2:
-            IEMOP_MNEMONIC("not Eb");
-            return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_not);
-        case 3:
-            IEMOP_MNEMONIC("neg Eb");
-            return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_neg);
-        case 4:
-            IEMOP_MNEMONIC("mul Eb");
-            IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, iemAImpl_mul_u8);
-        case 5:
-            IEMOP_MNEMONIC("imul Eb");
-            IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, iemAImpl_imul_u8);
-        case 6:
-            IEMOP_MNEMONIC("div Eb");
-            IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_OF | X86_EFL_CF);
-            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, iemAImpl_div_u8);
-        case 7:
-            IEMOP_MNEMONIC("idiv Eb");
-            IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_OF | X86_EFL_CF);
-            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, iemAImpl_idiv_u8);
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        RTGCPTR      GCPtrEff;
+        VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
     }
+#endif
+    IEMOP_HLP_DONE_DECODING();
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
-/** Opcode 0xf7. */
-FNIEMOP_DEF(iemOp_Grp3_Ev)
+/** Invalid with RM byte where intel requires 8-byte immediate.
+ * Intel will also need SIB and displacement if bRm indicates memory. */
+FNIEMOPRM_DEF(iemOp_InvalidWithRMNeedImm8)
 {
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    IEMOP_MNEMONIC(InvalidWithRMNeedImm8, "InvalidWithRMNeedImm8");
+    if (pVCpu->iem.s.enmCpuVendor == CPUMCPUVENDOR_INTEL)
     {
-        case 0:
-            return FNIEMOP_CALL_1(iemOp_grp3_test_Ev, bRm);
-        case 1:
-/** @todo testcase: Present on <=386, most 486 (not early), Pentiums, and current CPUs too. CPUUNDOC.EXE */
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 2:
-            IEMOP_MNEMONIC("not Ev");
-            return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_not);
-        case 3:
-            IEMOP_MNEMONIC("neg Ev");
-            return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_neg);
-        case 4:
-            IEMOP_MNEMONIC("mul Ev");
-            IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_mul);
-        case 5:
-            IEMOP_MNEMONIC("imul Ev");
-            IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_imul);
-        case 6:
-            IEMOP_MNEMONIC("div Ev");
-            IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_OF | X86_EFL_CF);
-            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_div);
-        case 7:
-            IEMOP_MNEMONIC("idiv Ev");
-            IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_OF | X86_EFL_CF);
-            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_idiv);
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+#ifndef TST_IEM_CHECK_MC
+        if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+        {
+            RTGCPTR      GCPtrEff;
+            VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+        }
+#endif
+        uint8_t bImm8;  IEM_OPCODE_GET_NEXT_U8(&bImm8);  RT_NOREF(bRm);
     }
+    IEMOP_HLP_DONE_DECODING();
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
-/** Opcode 0xf8. */
-FNIEMOP_DEF(iemOp_clc)
-{
-    IEMOP_MNEMONIC("clc");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_CLEAR_EFL_BIT(X86_EFL_CF);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xf9. */
-FNIEMOP_DEF(iemOp_stc)
-{
-    IEMOP_MNEMONIC("stc");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_SET_EFL_BIT(X86_EFL_CF);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xfa. */
-FNIEMOP_DEF(iemOp_cli)
-{
-    IEMOP_MNEMONIC("cli");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_cli);
-}
-
-
-FNIEMOP_DEF(iemOp_sti)
-{
-    IEMOP_MNEMONIC("sti");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_sti);
-}
-
-
-/** Opcode 0xfc. */
-FNIEMOP_DEF(iemOp_cld)
-{
-    IEMOP_MNEMONIC("cld");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_CLEAR_EFL_BIT(X86_EFL_DF);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xfd. */
-FNIEMOP_DEF(iemOp_std)
-{
-    IEMOP_MNEMONIC("std");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_SET_EFL_BIT(X86_EFL_DF);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xfe. */
-FNIEMOP_DEF(iemOp_Grp4)
+/** Invalid with RM byte where intel requires 8-byte immediate.
+ * Both AMD and Intel also needs SIB and displacement according to bRm. */
+FNIEMOPRM_DEF(iemOp_InvalidWithRMAllNeedImm8)
 {
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    IEMOP_MNEMONIC(InvalidWithRMAllNeedImm8, "InvalidWithRMAllNeedImm8");
+#ifndef TST_IEM_CHECK_MC
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
     {
-        case 0:
-            IEMOP_MNEMONIC("inc Ev");
-            return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_inc);
-        case 1:
-            IEMOP_MNEMONIC("dec Ev");
-            return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_dec);
-        default:
-            IEMOP_MNEMONIC("grp4-ud");
-            return IEMOP_RAISE_INVALID_OPCODE();
+        RTGCPTR      GCPtrEff;
+        VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
     }
+#endif
+    uint8_t bImm8;  IEM_OPCODE_GET_NEXT_U8(&bImm8);  RT_NOREF(bRm);
+    IEMOP_HLP_DONE_DECODING();
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
-/**
- * Opcode 0xff /2.
- * @param   bRm             The RM byte.
- */
-FNIEMOP_DEF_1(iemOp_Grp5_calln_Ev, uint8_t, bRm)
+/** Invalid opcode where intel requires Mod R/M sequence. */
+FNIEMOP_DEF(iemOp_InvalidNeedRM)
 {
-    IEMOP_MNEMONIC("calln Ev");
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    IEMOP_MNEMONIC(InvalidNeedRM, "InvalidNeedRM");
+    if (pVCpu->iem.s.enmCpuVendor == CPUMCPUVENDOR_INTEL)
     {
-        /* The new RIP is taken from a register. */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(1, 0);
-                IEM_MC_ARG(uint16_t, u16Target, 0);
-                IEM_MC_FETCH_GREG_U16(u16Target, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_CIMPL_1(iemCImpl_call_16, u16Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(1, 0);
-                IEM_MC_ARG(uint32_t, u32Target, 0);
-                IEM_MC_FETCH_GREG_U32(u32Target, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_CIMPL_1(iemCImpl_call_32, u32Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(1, 0);
-                IEM_MC_ARG(uint64_t, u64Target, 0);
-                IEM_MC_FETCH_GREG_U64(u64Target, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_CIMPL_1(iemCImpl_call_64, u64Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* The new RIP is taken from a register. */
-        switch (pVCpu->iem.s.enmEffOpSize)
+        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); RT_NOREF(bRm);
+#ifndef TST_IEM_CHECK_MC
+        if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
         {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(1, 1);
-                IEM_MC_ARG(uint16_t,  u16Target, 0);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U16(u16Target, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_CALL_CIMPL_1(iemCImpl_call_16, u16Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(1, 1);
-                IEM_MC_ARG(uint32_t,  u32Target, 0);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U32(u32Target, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_CALL_CIMPL_1(iemCImpl_call_32, u32Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(1, 1);
-                IEM_MC_ARG(uint64_t,  u64Target, 0);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U64(u64Target, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_CALL_CIMPL_1(iemCImpl_call_64, u64Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            RTGCPTR      GCPtrEff;
+            VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
         }
+#endif
     }
+    IEMOP_HLP_DONE_DECODING();
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
-typedef IEM_CIMPL_DECL_TYPE_3(FNIEMCIMPLFARBRANCH, uint16_t, uSel, uint64_t, offSeg, IEMMODE, enmOpSize);
 
-FNIEMOP_DEF_2(iemOpHlp_Grp5_far_Ep, uint8_t, bRm, FNIEMCIMPLFARBRANCH *, pfnCImpl)
+/** Invalid opcode where both AMD and Intel requires Mod R/M sequence. */
+FNIEMOP_DEF(iemOp_InvalidAllNeedRM)
 {
-    /* Registers? How?? */
-    if (RT_LIKELY((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT)))
-    { /* likely */ }
-    else
-        return IEMOP_RAISE_INVALID_OPCODE(); /* callf eax is not legal */
-
-    /* Far pointer loaded from memory. */
-    switch (pVCpu->iem.s.enmEffOpSize)
+    IEMOP_MNEMONIC(InvalidAllNeedRM, "InvalidAllNeedRM");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); RT_NOREF(bRm);
+#ifndef TST_IEM_CHECK_MC
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
     {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(3, 1);
-            IEM_MC_ARG(uint16_t,        u16Sel,                         0);
-            IEM_MC_ARG(uint16_t,        offSeg,                         1);
-            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize, IEMMODE_16BIT,    2);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-            IEM_MC_FETCH_MEM_U16_DISP(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc, 2);
-            IEM_MC_CALL_CIMPL_3(pfnCImpl, u16Sel, offSeg, enmEffOpSize);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            /** @todo testcase: AMD does not seem to believe in the case (see bs-cpu-xcpt-1)
-             *        and will apparently ignore REX.W, at least for the jmp far qword [rsp]
-             *        and call far qword [rsp] encodings. */
-            if (!IEM_IS_GUEST_CPU_AMD(pVCpu))
-            {
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint16_t,        u16Sel,                         0);
-                IEM_MC_ARG(uint64_t,        offSeg,                         1);
-                IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize, IEMMODE_16BIT,    2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U64(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_FETCH_MEM_U16_DISP(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc, 8);
-                IEM_MC_CALL_CIMPL_3(pfnCImpl, u16Sel, offSeg, enmEffOpSize);
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-            /* AMD falls thru. */
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(3, 1);
-            IEM_MC_ARG(uint16_t,        u16Sel,                         0);
-            IEM_MC_ARG(uint32_t,        offSeg,                         1);
-            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize, IEMMODE_32BIT,    2);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U32(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-            IEM_MC_FETCH_MEM_U16_DISP(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc, 4);
-            IEM_MC_CALL_CIMPL_3(pfnCImpl, u16Sel, offSeg, enmEffOpSize);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        RTGCPTR      GCPtrEff;
+        VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
     }
+#endif
+    IEMOP_HLP_DONE_DECODING();
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
-/**
- * Opcode 0xff /3.
- * @param   bRm             The RM byte.
- */
-FNIEMOP_DEF_1(iemOp_Grp5_callf_Ep, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("callf Ep");
-    return FNIEMOP_CALL_2(iemOpHlp_Grp5_far_Ep, bRm, iemCImpl_callf);
-}
-
-
-/**
- * Opcode 0xff /4.
- * @param   bRm             The RM byte.
- */
-FNIEMOP_DEF_1(iemOp_Grp5_jmpn_Ev, uint8_t, bRm)
+/** Invalid opcode where intel requires Mod R/M sequence and 8-byte
+ *  immediate. */
+FNIEMOP_DEF(iemOp_InvalidNeedRMImm8)
 {
-    IEMOP_MNEMONIC("jmpn Ev");
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* The new RIP is taken from a register. */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Target);
-                IEM_MC_FETCH_GREG_U16(u16Target, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_SET_RIP_U16(u16Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Target);
-                IEM_MC_FETCH_GREG_U32(u32Target, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_SET_RIP_U32(u32Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Target);
-                IEM_MC_FETCH_GREG_U64(u64Target, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_SET_RIP_U64(u64Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
+    IEMOP_MNEMONIC(InvalidNeedRMImm8, "InvalidNeedRMImm8");
+    if (pVCpu->iem.s.enmCpuVendor == CPUMCPUVENDOR_INTEL)
     {
-        /* The new RIP is taken from a memory location. */
-        switch (pVCpu->iem.s.enmEffOpSize)
+        uint8_t bRm;  IEM_OPCODE_GET_NEXT_U8(&bRm);  RT_NOREF(bRm);
+#ifndef TST_IEM_CHECK_MC
+        if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
         {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint16_t, u16Target);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U16(u16Target, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_SET_RIP_U16(u16Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Target);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U32(u32Target, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_SET_RIP_U32(u32Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Target);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U64(u64Target, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_SET_RIP_U64(u64Target);
-                IEM_MC_END()
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            RTGCPTR      GCPtrEff;
+            VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
         }
+#endif
+        uint8_t bImm; IEM_OPCODE_GET_NEXT_U8(&bImm); RT_NOREF(bImm);
     }
+    IEMOP_HLP_DONE_DECODING();
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
-/**
- * Opcode 0xff /5.
- * @param   bRm             The RM byte.
- */
-FNIEMOP_DEF_1(iemOp_Grp5_jmpf_Ep, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("jmpf Ep");
-    return FNIEMOP_CALL_2(iemOpHlp_Grp5_far_Ep, bRm, iemCImpl_FarJmp);
-}
-
-
-/**
- * Opcode 0xff /6.
- * @param   bRm             The RM byte.
- */
-FNIEMOP_DEF_1(iemOp_Grp5_push_Ev, uint8_t, bRm)
+/** Invalid opcode where intel requires a 3rd escape byte and a Mod R/M
+ *  sequence. */
+FNIEMOP_DEF(iemOp_InvalidNeed3ByteEscRM)
 {
-    IEMOP_MNEMONIC("push Ev");
-
-    /* Registers are handled by a common worker. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return FNIEMOP_CALL_1(iemOpCommonPushGReg, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-
-    /* Memory we do here. */
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    switch (pVCpu->iem.s.enmEffOpSize)
+    IEMOP_MNEMONIC(InvalidNeed3ByteEscRM, "InvalidNeed3ByteEscRM");
+    if (pVCpu->iem.s.enmCpuVendor == CPUMCPUVENDOR_INTEL)
     {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint16_t,  u16Src);
-            IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16(u16Src, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-            IEM_MC_PUSH_U16(u16Src);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint32_t,  u32Src);
-            IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U32(u32Src, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-            IEM_MC_PUSH_U32(u32Src);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t,  u64Src);
-            IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U64(u64Src, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-            IEM_MC_PUSH_U64(u64Src);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        uint8_t b3rd; IEM_OPCODE_GET_NEXT_U8(&b3rd); RT_NOREF(b3rd);
+        uint8_t bRm;  IEM_OPCODE_GET_NEXT_U8(&bRm);  RT_NOREF(bRm);
+#ifndef TST_IEM_CHECK_MC
+        if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+        {
+            RTGCPTR      GCPtrEff;
+            VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+        }
+#endif
     }
+    IEMOP_HLP_DONE_DECODING();
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
-/** Opcode 0xff. */
-FNIEMOP_DEF(iemOp_Grp5)
+/** Invalid opcode where intel requires a 3rd escape byte, Mod R/M sequence, and
+ *  a 8-byte immediate. */
+FNIEMOP_DEF(iemOp_InvalidNeed3ByteEscRMImm8)
 {
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    IEMOP_MNEMONIC(InvalidNeed3ByteEscRMImm8, "InvalidNeed3ByteEscRMImm8");
+    if (pVCpu->iem.s.enmCpuVendor == CPUMCPUVENDOR_INTEL)
     {
-        case 0:
-            IEMOP_MNEMONIC("inc Ev");
-            return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_inc);
-        case 1:
-            IEMOP_MNEMONIC("dec Ev");
-            return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_dec);
-        case 2:
-            return FNIEMOP_CALL_1(iemOp_Grp5_calln_Ev, bRm);
-        case 3:
-            return FNIEMOP_CALL_1(iemOp_Grp5_callf_Ep, bRm);
-        case 4:
-            return FNIEMOP_CALL_1(iemOp_Grp5_jmpn_Ev, bRm);
-        case 5:
-            return FNIEMOP_CALL_1(iemOp_Grp5_jmpf_Ep, bRm);
-        case 6:
-            return FNIEMOP_CALL_1(iemOp_Grp5_push_Ev, bRm);
-        case 7:
-            IEMOP_MNEMONIC("grp5-ud");
-            return IEMOP_RAISE_INVALID_OPCODE();
+        uint8_t b3rd; IEM_OPCODE_GET_NEXT_U8(&b3rd); RT_NOREF(b3rd);
+        uint8_t bRm;  IEM_OPCODE_GET_NEXT_U8(&bRm);  RT_NOREF(bRm);
+#ifndef TST_IEM_CHECK_MC
+        if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+        {
+            RTGCPTR      GCPtrEff;
+            VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 1, &GCPtrEff);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+        }
+#endif
+        uint8_t bImm; IEM_OPCODE_GET_NEXT_U8(&bImm); RT_NOREF(bImm);
+        IEMOP_HLP_DONE_DECODING();
     }
-    AssertFailedReturn(VERR_IEM_IPE_3);
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
+/** Repeats a_fn four times.  For decoding tables. */
+#define IEMOP_X4(a_fn) a_fn, a_fn, a_fn, a_fn
 
-const PFNIEMOP g_apfnOneByteMap[256] =
-{
-    /* 0x00 */  iemOp_add_Eb_Gb,        iemOp_add_Ev_Gv,        iemOp_add_Gb_Eb,        iemOp_add_Gv_Ev,
-    /* 0x04 */  iemOp_add_Al_Ib,        iemOp_add_eAX_Iz,       iemOp_push_ES,          iemOp_pop_ES,
-    /* 0x08 */  iemOp_or_Eb_Gb,         iemOp_or_Ev_Gv,         iemOp_or_Gb_Eb,         iemOp_or_Gv_Ev,
-    /* 0x0c */  iemOp_or_Al_Ib,         iemOp_or_eAX_Iz,        iemOp_push_CS,          iemOp_2byteEscape,
-    /* 0x10 */  iemOp_adc_Eb_Gb,        iemOp_adc_Ev_Gv,        iemOp_adc_Gb_Eb,        iemOp_adc_Gv_Ev,
-    /* 0x14 */  iemOp_adc_Al_Ib,        iemOp_adc_eAX_Iz,       iemOp_push_SS,          iemOp_pop_SS,
-    /* 0x18 */  iemOp_sbb_Eb_Gb,        iemOp_sbb_Ev_Gv,        iemOp_sbb_Gb_Eb,        iemOp_sbb_Gv_Ev,
-    /* 0x1c */  iemOp_sbb_Al_Ib,        iemOp_sbb_eAX_Iz,       iemOp_push_DS,          iemOp_pop_DS,
-    /* 0x20 */  iemOp_and_Eb_Gb,        iemOp_and_Ev_Gv,        iemOp_and_Gb_Eb,        iemOp_and_Gv_Ev,
-    /* 0x24 */  iemOp_and_Al_Ib,        iemOp_and_eAX_Iz,       iemOp_seg_ES,           iemOp_daa,
-    /* 0x28 */  iemOp_sub_Eb_Gb,        iemOp_sub_Ev_Gv,        iemOp_sub_Gb_Eb,        iemOp_sub_Gv_Ev,
-    /* 0x2c */  iemOp_sub_Al_Ib,        iemOp_sub_eAX_Iz,       iemOp_seg_CS,           iemOp_das,
-    /* 0x30 */  iemOp_xor_Eb_Gb,        iemOp_xor_Ev_Gv,        iemOp_xor_Gb_Eb,        iemOp_xor_Gv_Ev,
-    /* 0x34 */  iemOp_xor_Al_Ib,        iemOp_xor_eAX_Iz,       iemOp_seg_SS,           iemOp_aaa,
-    /* 0x38 */  iemOp_cmp_Eb_Gb,        iemOp_cmp_Ev_Gv,        iemOp_cmp_Gb_Eb,        iemOp_cmp_Gv_Ev,
-    /* 0x3c */  iemOp_cmp_Al_Ib,        iemOp_cmp_eAX_Iz,       iemOp_seg_DS,           iemOp_aas,
-    /* 0x40 */  iemOp_inc_eAX,          iemOp_inc_eCX,          iemOp_inc_eDX,          iemOp_inc_eBX,
-    /* 0x44 */  iemOp_inc_eSP,          iemOp_inc_eBP,          iemOp_inc_eSI,          iemOp_inc_eDI,
-    /* 0x48 */  iemOp_dec_eAX,          iemOp_dec_eCX,          iemOp_dec_eDX,          iemOp_dec_eBX,
-    /* 0x4c */  iemOp_dec_eSP,          iemOp_dec_eBP,          iemOp_dec_eSI,          iemOp_dec_eDI,
-    /* 0x50 */  iemOp_push_eAX,         iemOp_push_eCX,         iemOp_push_eDX,         iemOp_push_eBX,
-    /* 0x54 */  iemOp_push_eSP,         iemOp_push_eBP,         iemOp_push_eSI,         iemOp_push_eDI,
-    /* 0x58 */  iemOp_pop_eAX,          iemOp_pop_eCX,          iemOp_pop_eDX,          iemOp_pop_eBX,
-    /* 0x5c */  iemOp_pop_eSP,          iemOp_pop_eBP,          iemOp_pop_eSI,          iemOp_pop_eDI,
-    /* 0x60 */  iemOp_pusha,            iemOp_popa,             iemOp_bound_Gv_Ma_evex, iemOp_arpl_Ew_Gw_movsx_Gv_Ev,
-    /* 0x64 */  iemOp_seg_FS,           iemOp_seg_GS,           iemOp_op_size,          iemOp_addr_size,
-    /* 0x68 */  iemOp_push_Iz,          iemOp_imul_Gv_Ev_Iz,    iemOp_push_Ib,          iemOp_imul_Gv_Ev_Ib,
-    /* 0x6c */  iemOp_insb_Yb_DX,       iemOp_inswd_Yv_DX,      iemOp_outsb_Yb_DX,      iemOp_outswd_Yv_DX,
-    /* 0x70 */  iemOp_jo_Jb,            iemOp_jno_Jb,           iemOp_jc_Jb,            iemOp_jnc_Jb,
-    /* 0x74 */  iemOp_je_Jb,            iemOp_jne_Jb,           iemOp_jbe_Jb,           iemOp_jnbe_Jb,
-    /* 0x78 */  iemOp_js_Jb,            iemOp_jns_Jb,           iemOp_jp_Jb,            iemOp_jnp_Jb,
-    /* 0x7c */  iemOp_jl_Jb,            iemOp_jnl_Jb,           iemOp_jle_Jb,           iemOp_jnle_Jb,
-    /* 0x80 */  iemOp_Grp1_Eb_Ib_80,    iemOp_Grp1_Ev_Iz,       iemOp_Grp1_Eb_Ib_82,    iemOp_Grp1_Ev_Ib,
-    /* 0x84 */  iemOp_test_Eb_Gb,       iemOp_test_Ev_Gv,       iemOp_xchg_Eb_Gb,       iemOp_xchg_Ev_Gv,
-    /* 0x88 */  iemOp_mov_Eb_Gb,        iemOp_mov_Ev_Gv,        iemOp_mov_Gb_Eb,        iemOp_mov_Gv_Ev,
-    /* 0x8c */  iemOp_mov_Ev_Sw,        iemOp_lea_Gv_M,         iemOp_mov_Sw_Ev,        iemOp_Grp1A,
-    /* 0x90 */  iemOp_nop,              iemOp_xchg_eCX_eAX,     iemOp_xchg_eDX_eAX,     iemOp_xchg_eBX_eAX,
-    /* 0x94 */  iemOp_xchg_eSP_eAX,     iemOp_xchg_eBP_eAX,     iemOp_xchg_eSI_eAX,     iemOp_xchg_eDI_eAX,
-    /* 0x98 */  iemOp_cbw,              iemOp_cwd,              iemOp_call_Ap,          iemOp_wait,
-    /* 0x9c */  iemOp_pushf_Fv,         iemOp_popf_Fv,          iemOp_sahf,             iemOp_lahf,
-    /* 0xa0 */  iemOp_mov_Al_Ob,        iemOp_mov_rAX_Ov,       iemOp_mov_Ob_AL,        iemOp_mov_Ov_rAX,
-    /* 0xa4 */  iemOp_movsb_Xb_Yb,      iemOp_movswd_Xv_Yv,     iemOp_cmpsb_Xb_Yb,      iemOp_cmpswd_Xv_Yv,
-    /* 0xa8 */  iemOp_test_AL_Ib,       iemOp_test_eAX_Iz,      iemOp_stosb_Yb_AL,      iemOp_stoswd_Yv_eAX,
-    /* 0xac */  iemOp_lodsb_AL_Xb,      iemOp_lodswd_eAX_Xv,    iemOp_scasb_AL_Xb,      iemOp_scaswd_eAX_Xv,
-    /* 0xb0 */  iemOp_mov_AL_Ib,        iemOp_CL_Ib,            iemOp_DL_Ib,            iemOp_BL_Ib,
-    /* 0xb4 */  iemOp_mov_AH_Ib,        iemOp_CH_Ib,            iemOp_DH_Ib,            iemOp_BH_Ib,
-    /* 0xb8 */  iemOp_eAX_Iv,           iemOp_eCX_Iv,           iemOp_eDX_Iv,           iemOp_eBX_Iv,
-    /* 0xbc */  iemOp_eSP_Iv,           iemOp_eBP_Iv,           iemOp_eSI_Iv,           iemOp_eDI_Iv,
-    /* 0xc0 */  iemOp_Grp2_Eb_Ib,       iemOp_Grp2_Ev_Ib,       iemOp_retn_Iw,          iemOp_retn,
-    /* 0xc4 */  iemOp_les_Gv_Mp_vex2,   iemOp_lds_Gv_Mp_vex3,   iemOp_Grp11_Eb_Ib,      iemOp_Grp11_Ev_Iz,
-    /* 0xc8 */  iemOp_enter_Iw_Ib,      iemOp_leave,            iemOp_retf_Iw,          iemOp_retf,
-    /* 0xcc */  iemOp_int_3,            iemOp_int_Ib,           iemOp_into,             iemOp_iret,
-    /* 0xd0 */  iemOp_Grp2_Eb_1,        iemOp_Grp2_Ev_1,        iemOp_Grp2_Eb_CL,       iemOp_Grp2_Ev_CL,
-    /* 0xd4 */  iemOp_aam_Ib,           iemOp_aad_Ib,           iemOp_salc,             iemOp_xlat,
-    /* 0xd8 */  iemOp_EscF0,            iemOp_EscF1,            iemOp_EscF2,            iemOp_EscF3,
-    /* 0xdc */  iemOp_EscF4,            iemOp_EscF5,            iemOp_EscF6,            iemOp_EscF7,
-    /* 0xe0 */  iemOp_loopne_Jb,        iemOp_loope_Jb,         iemOp_loop_Jb,          iemOp_jecxz_Jb,
-    /* 0xe4 */  iemOp_in_AL_Ib,         iemOp_in_eAX_Ib,        iemOp_out_Ib_AL,        iemOp_out_Ib_eAX,
-    /* 0xe8 */  iemOp_call_Jv,          iemOp_jmp_Jv,           iemOp_jmp_Ap,           iemOp_jmp_Jb,
-    /* 0xec */  iemOp_in_AL_DX,         iemOp_eAX_DX,           iemOp_out_DX_AL,        iemOp_out_DX_eAX,
-    /* 0xf0 */  iemOp_lock,             iemOp_int_1,            iemOp_repne,            iemOp_repe,
-    /* 0xf4 */  iemOp_hlt,              iemOp_cmc,              iemOp_Grp3_Eb,          iemOp_Grp3_Ev,
-    /* 0xf8 */  iemOp_clc,              iemOp_stc,              iemOp_cli,              iemOp_sti,
-    /* 0xfc */  iemOp_cld,              iemOp_std,              iemOp_Grp4,             iemOp_Grp5,
-};
+/*
+ * Include the tables.
+ */
+#ifdef IEM_WITH_3DNOW
+# include "IEMAllInstructions3DNow.cpp.h"
+#endif
+#ifdef IEM_WITH_THREE_0F_38
+# include "IEMAllInstructionsThree0f38.cpp.h"
+#endif
+#ifdef IEM_WITH_THREE_0F_3A
+# include "IEMAllInstructionsThree0f3a.cpp.h"
+#endif
+#include "IEMAllInstructionsTwoByte0f.cpp.h"
+#ifdef IEM_WITH_VEX
+# include "IEMAllInstructionsVexMap1.cpp.h"
+# include "IEMAllInstructionsVexMap2.cpp.h"
+# include "IEMAllInstructionsVexMap3.cpp.h"
+#endif
+#include "IEMAllInstructionsOneByte.cpp.h"
 
 
-/** @} */
 
 #ifdef _MSC_VER
 # pragma warning(pop)
 #endif
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructions3DNow.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructions3DNow.cpp.h
new file mode 100644
index 0000000..4d1d9f3
--- /dev/null
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructions3DNow.cpp.h
@@ -0,0 +1,133 @@
+/* $Id: IEMAllInstructions3DNow.cpp.h $ */
+/** @file
+ * IEM - Instruction Decoding and Emulation, 3DNow!.
+ */
+
+/*
+ * Copyright (C) 2011-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @name 3DNow! instructions (0x0f 0x0f)
+ *
+ * @{
+ */
+
+/** Opcode 0x0f 0x0f 0x0c. */
+FNIEMOP_STUB(iemOp_3Dnow_pi2fw_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x0d. */
+FNIEMOP_STUB(iemOp_3Dnow_pi2fd_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x1c. */
+FNIEMOP_STUB(iemOp_3Dnow_pf2fw_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x1d. */
+FNIEMOP_STUB(iemOp_3Dnow_pf2fd_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x8a. */
+FNIEMOP_STUB(iemOp_3Dnow_pfnacc_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x8e. */
+FNIEMOP_STUB(iemOp_3Dnow_pfpnacc_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x90. */
+FNIEMOP_STUB(iemOp_3Dnow_pfcmpge_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x94. */
+FNIEMOP_STUB(iemOp_3Dnow_pfmin_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x96. */
+FNIEMOP_STUB(iemOp_3Dnow_pfrcp_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x97. */
+FNIEMOP_STUB(iemOp_3Dnow_pfrsqrt_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x9a. */
+FNIEMOP_STUB(iemOp_3Dnow_pfsub_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0x9e. */
+FNIEMOP_STUB(iemOp_3Dnow_pfadd_PQ_Qq);
+
+/** Opcode 0x0f 0x0f 0xa0. */
+FNIEMOP_STUB(iemOp_3Dnow_pfcmpgt_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xa4. */
+FNIEMOP_STUB(iemOp_3Dnow_pfmax_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xa6. */
+FNIEMOP_STUB(iemOp_3Dnow_pfrcpit1_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xa7. */
+FNIEMOP_STUB(iemOp_3Dnow_pfrsqit1_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xaa. */
+FNIEMOP_STUB(iemOp_3Dnow_pfsubr_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xae. */
+FNIEMOP_STUB(iemOp_3Dnow_pfacc_PQ_Qq);
+
+/** Opcode 0x0f 0x0f 0xb0. */
+FNIEMOP_STUB(iemOp_3Dnow_pfcmpeq_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xb4. */
+FNIEMOP_STUB(iemOp_3Dnow_pfmul_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xb6. */
+FNIEMOP_STUB(iemOp_3Dnow_pfrcpit2_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xb7. */
+FNIEMOP_STUB(iemOp_3Dnow_pmulhrw_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xbb. */
+FNIEMOP_STUB(iemOp_3Dnow_pswapd_Pq_Qq);
+
+/** Opcode 0x0f 0x0f 0xbf. */
+FNIEMOP_STUB(iemOp_3Dnow_pavgusb_PQ_Qq);
+
+
+/** Opcode 0x0f 0x0f. */
+FNIEMOP_DEF_1(iemOp_3DNowDispatcher, uint8_t, b)
+{
+    /* This is pretty sparse, use switch instead of table. */
+    switch (b)
+    {
+        case 0x0c: return FNIEMOP_CALL(iemOp_3Dnow_pi2fw_Pq_Qq);
+        case 0x0d: return FNIEMOP_CALL(iemOp_3Dnow_pi2fd_Pq_Qq);
+        case 0x1c: return FNIEMOP_CALL(iemOp_3Dnow_pf2fw_Pq_Qq);
+        case 0x1d: return FNIEMOP_CALL(iemOp_3Dnow_pf2fd_Pq_Qq);
+        case 0x8a: return FNIEMOP_CALL(iemOp_3Dnow_pfnacc_Pq_Qq);
+        case 0x8e: return FNIEMOP_CALL(iemOp_3Dnow_pfpnacc_Pq_Qq);
+        case 0x90: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpge_Pq_Qq);
+        case 0x94: return FNIEMOP_CALL(iemOp_3Dnow_pfmin_Pq_Qq);
+        case 0x96: return FNIEMOP_CALL(iemOp_3Dnow_pfrcp_Pq_Qq);
+        case 0x97: return FNIEMOP_CALL(iemOp_3Dnow_pfrsqrt_Pq_Qq);
+        case 0x9a: return FNIEMOP_CALL(iemOp_3Dnow_pfsub_Pq_Qq);
+        case 0x9e: return FNIEMOP_CALL(iemOp_3Dnow_pfadd_PQ_Qq);
+        case 0xa0: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpgt_Pq_Qq);
+        case 0xa4: return FNIEMOP_CALL(iemOp_3Dnow_pfmax_Pq_Qq);
+        case 0xa6: return FNIEMOP_CALL(iemOp_3Dnow_pfrcpit1_Pq_Qq);
+        case 0xa7: return FNIEMOP_CALL(iemOp_3Dnow_pfrsqit1_Pq_Qq);
+        case 0xaa: return FNIEMOP_CALL(iemOp_3Dnow_pfsubr_Pq_Qq);
+        case 0xae: return FNIEMOP_CALL(iemOp_3Dnow_pfacc_PQ_Qq);
+        case 0xb0: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpeq_Pq_Qq);
+        case 0xb4: return FNIEMOP_CALL(iemOp_3Dnow_pfmul_Pq_Qq);
+        case 0xb6: return FNIEMOP_CALL(iemOp_3Dnow_pfrcpit2_Pq_Qq);
+        case 0xb7: return FNIEMOP_CALL(iemOp_3Dnow_pmulhrw_Pq_Qq);
+        case 0xbb: return FNIEMOP_CALL(iemOp_3Dnow_pswapd_Pq_Qq);
+        case 0xbf: return FNIEMOP_CALL(iemOp_3Dnow_pavgusb_PQ_Qq);
+        default:
+            return IEMOP_RAISE_INVALID_OPCODE();
+    }
+}
+
+/** @} */
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructionsOneByte.cpp.h
similarity index 53%
copy from src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h
copy to src/VBox/VMM/VMMAll/IEMAllInstructionsOneByte.cpp.h
index 1aec818..8c0fe32 100644
--- a/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructionsOneByte.cpp.h
@@ -1,4 +1,4 @@
-/* $Id: IEMAllInstructions.cpp.h $ */
+/* $Id: IEMAllInstructionsOneByte.cpp.h $ */
 /** @file
  * IEM - Instruction Decoding and Emulation.
  */
@@ -21,7830 +21,636 @@
 *******************************************************************************/
 extern const PFNIEMOP g_apfnOneByteMap[256]; /* not static since we need to forward declare it. */
 
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable: 4702) /* Unreachable code like return in iemOp_Grp6_lldt. */
-#endif
-
-
-/**
- * Common worker for instructions like ADD, AND, OR, ++ with a byte
- * memory/register as the destination.
- *
- * @param   pImpl       Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rm_r8, PCIEMOPBINSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
-        IEM_MC_ARG(uint8_t,    u8Src,   1);
-        IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         * Note! We're putting the eflags on the stack here so we can commit them
-         *       after the memory.
-         */
-        uint32_t const fAccess = pImpl->pfnLockedU8 ? IEM_ACCESS_DATA_RW : IEM_ACCESS_DATA_R; /* CMP,TEST */
-        IEM_MC_BEGIN(3, 2);
-        IEM_MC_ARG(uint8_t *,  pu8Dst,           0);
-        IEM_MC_ARG(uint8_t,    u8Src,            1);
-        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        if (!pImpl->pfnLockedU8)
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_MEM_MAP(pu8Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU8, pu8Dst, u8Src, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, fAccess);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for word/dword/qword instructions like ADD, AND, OR, ++ with
- * memory/register as the destination.
- *
- * @param   pImpl       Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rm_rv, PCIEMOPBINSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *, pu16Dst, 0);
-                IEM_MC_ARG(uint16_t,   u16Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *, pu32Dst, 0);
-                IEM_MC_ARG(uint32_t,   u32Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-
-                if (pImpl != &g_iemAImpl_test)
-                    IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *, pu64Dst, 0);
-                IEM_MC_ARG(uint64_t,   u64Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         * Note! We're putting the eflags on the stack here so we can commit them
-         *       after the memory.
-         */
-        uint32_t const fAccess = pImpl->pfnLockedU8 ? IEM_ACCESS_DATA_RW : IEM_ACCESS_DATA_R /* CMP,TEST */;
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *, pu16Dst,          0);
-                IEM_MC_ARG(uint16_t,   u16Src,           1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (!pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu16Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *, pu32Dst,          0);
-                IEM_MC_ARG(uint32_t,   u32Src,           1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (!pImpl->pfnLockedU32)
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu32Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *, pu64Dst,          0);
-                IEM_MC_ARG(uint64_t,   u64Src,           1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (!pImpl->pfnLockedU64)
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MEM_MAP(pu64Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for byte instructions like ADD, AND, OR, ++ with a register as
- * the destination.
- *
- * @param   pImpl       Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpBinaryOperator_r8_rm, PCIEMOPBINSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
-        IEM_MC_ARG(uint8_t,    u8Src,   1);
-        IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-        IEM_MC_FETCH_GREG_U8(u8Src, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_GREG_U8(pu8Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         */
-        IEM_MC_BEGIN(3, 1);
-        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
-        IEM_MC_ARG(uint8_t,    u8Src,   1);
-        IEM_MC_ARG(uint32_t *, pEFlags, 2);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_MEM_U8(u8Src, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-        IEM_MC_REF_GREG_U8(pu8Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for word/dword/qword instructions like ADD, AND, OR, ++ with a
- * register as the destination.
- *
- * @param   pImpl       Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rv_rm, PCIEMOPBINSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *, pu16Dst, 0);
-                IEM_MC_ARG(uint16_t,   u16Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *, pu32Dst, 0);
-                IEM_MC_ARG(uint32_t,   u32Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U32(pu32Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *, pu64Dst, 0);
-                IEM_MC_ARG(uint64_t,   u64Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_FETCH_GREG_U64(u64Src, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint16_t *, pu16Dst, 0);
-                IEM_MC_ARG(uint16_t,   u16Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U16(u16Src, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint32_t *, pu32Dst, 0);
-                IEM_MC_ARG(uint32_t,   u32Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U32(u32Src, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_GREG_U32(pu32Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint64_t *, pu64Dst, 0);
-                IEM_MC_ARG(uint64_t,   u64Src,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U64(u64Src, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for instructions like ADD, AND, OR, ++ with working on AL with
- * a byte immediate.
- *
- * @param   pImpl       Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpBinaryOperator_AL_Ib, PCIEMOPBINSIZES, pImpl)
-{
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-    IEM_MC_BEGIN(3, 0);
-    IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
-    IEM_MC_ARG_CONST(uint8_t,   u8Src,/*=*/ u8Imm,  1);
-    IEM_MC_ARG(uint32_t *,      pEFlags,            2);
-
-    IEM_MC_REF_GREG_U8(pu8Dst, X86_GREG_xAX);
-    IEM_MC_REF_EFLAGS(pEFlags);
-    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
-
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common worker for instructions like ADD, AND, OR, ++ with working on
- * AX/EAX/RAX with a word/dword immediate.
- *
- * @param   pImpl       Pointer to the instruction implementation (assembly).
- */
-FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rAX_Iz, PCIEMOPBINSIZES, pImpl)
-{
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-        {
-            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-            IEM_MC_BEGIN(3, 0);
-            IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-            IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/ u16Imm,    1);
-            IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-            IEM_MC_REF_GREG_U16(pu16Dst, X86_GREG_xAX);
-            IEM_MC_REF_EFLAGS(pEFlags);
-            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-        }
-
-        case IEMMODE_32BIT:
-        {
-            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(&u32Imm);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-            IEM_MC_BEGIN(3, 0);
-            IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-            IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/ u32Imm,    1);
-            IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-            IEM_MC_REF_GREG_U32(pu32Dst, X86_GREG_xAX);
-            IEM_MC_REF_EFLAGS(pEFlags);
-            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-
-            if (pImpl != &g_iemAImpl_test)
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-        }
-
-        case IEMMODE_64BIT:
-        {
-            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64Imm);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-            IEM_MC_BEGIN(3, 0);
-            IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-            IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/ u64Imm,    1);
-            IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-            IEM_MC_REF_GREG_U64(pu64Dst, X86_GREG_xAX);
-            IEM_MC_REF_EFLAGS(pEFlags);
-            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-        }
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcodes 0xf1, 0xd6. */
-FNIEMOP_DEF(iemOp_Invalid)
-{
-    IEMOP_MNEMONIC("Invalid");
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Invalid with RM byte . */
-FNIEMOPRM_DEF(iemOp_InvalidWithRM)
-{
-    RT_NOREF_PV(bRm);
-    IEMOP_MNEMONIC("InvalidWithRM");
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-
-/** @name ..... opcodes.
- *
- * @{
- */
-
-/** @}  */
-
-
-/** @name Two byte opcodes (first byte 0x0f).
- *
- * @{
- */
-
-/** Opcode 0x0f 0x00 /0. */
-FNIEMOPRM_DEF(iemOp_Grp6_sldt)
-{
-    IEMOP_MNEMONIC("sldt Rv/Mw");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_1(OP_SLDT, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Ldtr);
-                IEM_MC_FETCH_LDTR_U16(u16Ldtr);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Ldtr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Ldtr);
-                IEM_MC_FETCH_LDTR_U32(u32Ldtr);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Ldtr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Ldtr);
-                IEM_MC_FETCH_LDTR_U64(u64Ldtr);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Ldtr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint16_t, u16Ldtr);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DECODED_NL_1(OP_SLDT, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        IEM_MC_FETCH_LDTR_U16(u16Ldtr);
-        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Ldtr);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /1. */
-FNIEMOPRM_DEF(iemOp_Grp6_str)
-{
-    IEMOP_MNEMONIC("str Rv/Mw");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_1(OP_STR, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Tr);
-                IEM_MC_FETCH_TR_U16(u16Tr);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Tr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Tr);
-                IEM_MC_FETCH_TR_U32(u32Tr);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Tr);
-                IEM_MC_FETCH_TR_U64(u64Tr);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tr);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint16_t, u16Tr);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DECODED_NL_1(OP_STR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        IEM_MC_FETCH_TR_U16(u16Tr);
-        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Tr);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /2. */
-FNIEMOPRM_DEF(iemOp_Grp6_lldt)
-{
-    IEMOP_MNEMONIC("lldt Ew");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_1(OP_LLDT, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS);
-        IEM_MC_BEGIN(1, 0);
-        IEM_MC_ARG(uint16_t, u16Sel, 0);
-        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_lldt, u16Sel);
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(1, 1);
-        IEM_MC_ARG(uint16_t, u16Sel, 0);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DECODED_NL_1(OP_LLDT, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS);
-        IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO(); /** @todo test order */
-        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_lldt, u16Sel);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /3. */
-FNIEMOPRM_DEF(iemOp_Grp6_ltr)
-{
-    IEMOP_MNEMONIC("ltr Ew");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(1, 0);
-        IEM_MC_ARG(uint16_t, u16Sel, 0);
-        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_ltr, u16Sel);
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(1, 1);
-        IEM_MC_ARG(uint16_t, u16Sel, 0);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO(); /** @todo test ordre */
-        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_ltr, u16Sel);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /3. */
-FNIEMOP_DEF_2(iemOpCommonGrp6VerX, uint8_t, bRm, bool, fWrite)
-{
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_1(fWrite ? OP_VERW : OP_VERR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        IEM_MC_BEGIN(2, 0);
-        IEM_MC_ARG(uint16_t,    u16Sel,            0);
-        IEM_MC_ARG_CONST(bool,  fWriteArg, fWrite, 1);
-        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_2(iemCImpl_VerX, u16Sel, fWriteArg);
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(2, 1);
-        IEM_MC_ARG(uint16_t,    u16Sel,            0);
-        IEM_MC_ARG_CONST(bool,  fWriteArg, fWrite, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DECODED_NL_1(fWrite ? OP_VERW : OP_VERR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-        IEM_MC_CALL_CIMPL_2(iemCImpl_VerX, u16Sel, fWriteArg);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x00 /4. */
-FNIEMOPRM_DEF(iemOp_Grp6_verr)
-{
-    IEMOP_MNEMONIC("verr Ew");
-    IEMOP_HLP_MIN_286();
-    return FNIEMOP_CALL_2(iemOpCommonGrp6VerX, bRm, false);
-}
-
-
-/** Opcode 0x0f 0x00 /5. */
-FNIEMOPRM_DEF(iemOp_Grp6_verw)
-{
-    IEMOP_MNEMONIC("verr Ew");
-    IEMOP_HLP_MIN_286();
-    return FNIEMOP_CALL_2(iemOpCommonGrp6VerX, bRm, true);
-}
-
-
-/**
- * Group 6 jump table.
- */
-IEM_STATIC const PFNIEMOPRM g_apfnGroup6[8] =
-{
-    iemOp_Grp6_sldt,
-    iemOp_Grp6_str,
-    iemOp_Grp6_lldt,
-    iemOp_Grp6_ltr,
-    iemOp_Grp6_verr,
-    iemOp_Grp6_verw,
-    iemOp_InvalidWithRM,
-    iemOp_InvalidWithRM
-};
-
-/** Opcode 0x0f 0x00. */
-FNIEMOP_DEF(iemOp_Grp6)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    return FNIEMOP_CALL_1(g_apfnGroup6[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK], bRm);
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF_1(iemOp_Grp7_sgdt, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("sgdt Ms");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_64BIT_OP_SIZE();
-    IEM_MC_BEGIN(2, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_2(iemCImpl_sgdt, iEffSeg, GCPtrEffSrc);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF(iemOp_Grp7_vmcall)
-{
-    IEMOP_BITCH_ABOUT_STUB();
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF(iemOp_Grp7_vmlaunch)
-{
-    IEMOP_BITCH_ABOUT_STUB();
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF(iemOp_Grp7_vmresume)
-{
-    IEMOP_BITCH_ABOUT_STUB();
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /0. */
-FNIEMOP_DEF(iemOp_Grp7_vmxoff)
-{
-    IEMOP_BITCH_ABOUT_STUB();
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /1. */
-FNIEMOP_DEF_1(iemOp_Grp7_sidt, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("sidt Ms");
-    IEMOP_HLP_MIN_286();
-    IEMOP_HLP_64BIT_OP_SIZE();
-    IEM_MC_BEGIN(2, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_2(iemCImpl_sidt, iEffSeg, GCPtrEffSrc);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 /1. */
-FNIEMOP_DEF(iemOp_Grp7_monitor)
-{
-    IEMOP_MNEMONIC("monitor");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX(); /** @todo Verify that monitor is allergic to lock prefixes. */
-    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_monitor, pVCpu->iem.s.iEffSeg);
-}
-
-
-/** Opcode 0x0f 0x01 /1. */
-FNIEMOP_DEF(iemOp_Grp7_mwait)
-{
-    IEMOP_MNEMONIC("mwait"); /** @todo Verify that mwait is allergic to lock prefixes. */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_mwait);
-}
-
-
-/** Opcode 0x0f 0x01 /2. */
-FNIEMOP_DEF_1(iemOp_Grp7_lgdt, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("lgdt");
-    IEMOP_HLP_64BIT_OP_SIZE();
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
-    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSizeArg,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_lgdt, iEffSeg, GCPtrEffSrc, enmEffOpSizeArg);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 0xd0. */
-FNIEMOP_DEF(iemOp_Grp7_xgetbv)
-{
-    IEMOP_MNEMONIC("xgetbv");
-    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXSaveRstor)
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_xgetbv);
-    }
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 0xd1. */
-FNIEMOP_DEF(iemOp_Grp7_xsetbv)
-{
-    IEMOP_MNEMONIC("xsetbv");
-    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXSaveRstor)
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_xsetbv);
-    }
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x01 /3. */
-FNIEMOP_DEF_1(iemOp_Grp7_lidt, uint8_t, bRm)
-{
-    IEMMODE enmEffOpSize = pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
-                         ? IEMMODE_64BIT
-                         : pVCpu->iem.s.enmEffOpSize;
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                            0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                        1);
-    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSizeArg,/*=*/enmEffOpSize,  2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_lidt, iEffSeg, GCPtrEffSrc, enmEffOpSizeArg);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 0xd8. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmrun);
-
-/** Opcode 0x0f 0x01 0xd9. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmmcall);
-
-/** Opcode 0x0f 0x01 0xda. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmload);
-
-/** Opcode 0x0f 0x01 0xdb. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmsave);
-
-/** Opcode 0x0f 0x01 0xdc. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_stgi);
-
-/** Opcode 0x0f 0x01 0xdd. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_clgi);
-
-/** Opcode 0x0f 0x01 0xde. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_skinit);
-
-/** Opcode 0x0f 0x01 0xdf. */
-FNIEMOP_UD_STUB(iemOp_Grp7_Amd_invlpga);
-
-/** Opcode 0x0f 0x01 /4. */
-FNIEMOP_DEF_1(iemOp_Grp7_smsw, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("smsw");
-    IEMOP_HLP_MIN_286();
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Tmp);
-                IEM_MC_FETCH_CR0_U16(u16Tmp);
-                if (IEM_GET_TARGET_CPU(pVCpu) > IEMTARGETCPU_386)
-                { /* likely */ }
-                else if (IEM_GET_TARGET_CPU(pVCpu) >= IEMTARGETCPU_386)
-                    IEM_MC_OR_LOCAL_U16(u16Tmp, 0xffe0);
-                else
-                    IEM_MC_OR_LOCAL_U16(u16Tmp, 0xfff0);
-                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Tmp);
-                IEM_MC_FETCH_CR0_U32(u32Tmp);
-                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_FETCH_CR0_U64(u64Tmp);
-                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* Ignore operand size here, memory refs are always 16-bit. */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint16_t, u16Tmp);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_CR0_U16(u16Tmp);
-        if (IEM_GET_TARGET_CPU(pVCpu) > IEMTARGETCPU_386)
-        { /* likely */ }
-        else if (pVCpu->iem.s.uTargetCpu >= IEMTARGETCPU_386)
-            IEM_MC_OR_LOCAL_U16(u16Tmp, 0xffe0);
-        else
-            IEM_MC_OR_LOCAL_U16(u16Tmp, 0xfff0);
-        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Tmp);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-        return VINF_SUCCESS;
-    }
-}
-
-
-/** Opcode 0x0f 0x01 /6. */
-FNIEMOP_DEF_1(iemOp_Grp7_lmsw, uint8_t, bRm)
-{
-    /* The operand size is effectively ignored, all is 16-bit and only the
-       lower 3-bits are used. */
-    IEMOP_MNEMONIC("lmsw");
-    IEMOP_HLP_MIN_286();
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(1, 0);
-        IEM_MC_ARG(uint16_t, u16Tmp, 0);
-        IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_lmsw, u16Tmp);
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(1, 1);
-        IEM_MC_ARG(uint16_t, u16Tmp, 0);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-        IEM_MC_CALL_CIMPL_1(iemCImpl_lmsw, u16Tmp);
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 /7. */
-FNIEMOP_DEF_1(iemOp_Grp7_invlpg, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("invlpg");
-    IEMOP_HLP_MIN_486();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(1, 1);
-    IEM_MC_ARG(RTGCPTR, GCPtrEffDst, 0);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEM_MC_CALL_CIMPL_1(iemCImpl_invlpg, GCPtrEffDst);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x01 /7. */
-FNIEMOP_DEF(iemOp_Grp7_swapgs)
-{
-    IEMOP_MNEMONIC("swapgs");
-    IEMOP_HLP_ONLY_64BIT();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_swapgs);
-}
-
-
-/** Opcode 0x0f 0x01 /7. */
-FNIEMOP_DEF(iemOp_Grp7_rdtscp)
-{
-    NOREF(pVCpu);
-    IEMOP_BITCH_ABOUT_STUB();
-    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
-}
-
-
-/** Opcode 0x0f 0x01. */
-FNIEMOP_DEF(iemOp_Grp7)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_sgdt, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_vmcall);
-                case 2: return FNIEMOP_CALL(iemOp_Grp7_vmlaunch);
-                case 3: return FNIEMOP_CALL(iemOp_Grp7_vmresume);
-                case 4: return FNIEMOP_CALL(iemOp_Grp7_vmxoff);
-            }
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        case 1:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_sidt, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 0: return FNIEMOP_CALL(iemOp_Grp7_monitor);
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_mwait);
-            }
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        case 2:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_lgdt, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 0: return FNIEMOP_CALL(iemOp_Grp7_xgetbv);
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_xsetbv);
-            }
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        case 3:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_lidt, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 0: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmrun);
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmmcall);
-                case 2: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmload);
-                case 3: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmsave);
-                case 4: return FNIEMOP_CALL(iemOp_Grp7_Amd_stgi);
-                case 5: return FNIEMOP_CALL(iemOp_Grp7_Amd_clgi);
-                case 6: return FNIEMOP_CALL(iemOp_Grp7_Amd_skinit);
-                case 7: return FNIEMOP_CALL(iemOp_Grp7_Amd_invlpga);
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-
-        case 4:
-            return FNIEMOP_CALL_1(iemOp_Grp7_smsw, bRm);
-
-        case 5:
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        case 6:
-            return FNIEMOP_CALL_1(iemOp_Grp7_lmsw, bRm);
-
-        case 7:
-            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp7_invlpg, bRm);
-            switch (bRm & X86_MODRM_RM_MASK)
-            {
-                case 0: return FNIEMOP_CALL(iemOp_Grp7_swapgs);
-                case 1: return FNIEMOP_CALL(iemOp_Grp7_rdtscp);
-            }
-            return IEMOP_RAISE_INVALID_OPCODE();
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-/** Opcode 0x0f 0x00 /3. */
-FNIEMOP_DEF_1(iemOpCommonLarLsl_Gv_Ew, bool, fIsLar)
-{
-    IEMOP_HLP_NO_REAL_OR_V86_MODE();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_REG, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,  pu16Dst,           0);
-                IEM_MC_ARG(uint16_t,    u16Sel,            1);
-                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
-
-                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u16, pu16Dst, u16Sel, fIsLarArg);
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_32BIT:
-            case IEMMODE_64BIT:
-            {
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,  pu64Dst,           0);
-                IEM_MC_ARG(uint16_t,    u16Sel,            1);
-                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u64, pu64Dst, u16Sel, fIsLarArg);
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-            {
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint16_t *,  pu16Dst,           0);
-                IEM_MC_ARG(uint16_t,    u16Sel,            1);
-                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
-                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_MEM, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-
-                IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u16, pu16Dst, u16Sel, fIsLarArg);
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            case IEMMODE_32BIT:
-            case IEMMODE_64BIT:
-            {
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint64_t *,  pu64Dst,           0);
-                IEM_MC_ARG(uint16_t,    u16Sel,            1);
-                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
-                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_MEM, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
-/** @todo testcase: make sure it's a 16-bit read. */
-
-                IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u64, pu64Dst, u16Sel, fIsLarArg);
-
-                IEM_MC_END();
-                return VINF_SUCCESS;
-            }
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-
-/** Opcode 0x0f 0x02. */
-FNIEMOP_DEF(iemOp_lar_Gv_Ew)
-{
-    IEMOP_MNEMONIC("lar Gv,Ew");
-    return FNIEMOP_CALL_1(iemOpCommonLarLsl_Gv_Ew, true);
-}
-
-
-/** Opcode 0x0f 0x03. */
-FNIEMOP_DEF(iemOp_lsl_Gv_Ew)
-{
-    IEMOP_MNEMONIC("lsl Gv,Ew");
-    return FNIEMOP_CALL_1(iemOpCommonLarLsl_Gv_Ew, false);
-}
-
-
-/** Opcode 0x0f 0x05. */
-FNIEMOP_DEF(iemOp_syscall)
-{
-    IEMOP_MNEMONIC("syscall"); /** @todo 286 LOADALL   */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_syscall);
-}
-
-
-/** Opcode 0x0f 0x06. */
-FNIEMOP_DEF(iemOp_clts)
-{
-    IEMOP_MNEMONIC("clts");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_clts);
-}
-
-
-/** Opcode 0x0f 0x07. */
-FNIEMOP_DEF(iemOp_sysret)
-{
-    IEMOP_MNEMONIC("sysret");  /** @todo 386 LOADALL   */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_sysret);
-}
-
-
-/** Opcode 0x0f 0x08. */
-FNIEMOP_STUB(iemOp_invd);
-// IEMOP_HLP_MIN_486();
-
-
-/** Opcode 0x0f 0x09. */
-FNIEMOP_DEF(iemOp_wbinvd)
-{
-    IEMOP_MNEMONIC("wbinvd");
-    IEMOP_HLP_MIN_486();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_BEGIN(0, 0);
-    IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO();
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS; /* ignore for now */
-}
-
-
-/** Opcode 0x0f 0x0b. */
-FNIEMOP_DEF(iemOp_ud2)
-{
-    IEMOP_MNEMONIC("ud2");
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-/** Opcode 0x0f 0x0d. */
-FNIEMOP_DEF(iemOp_nop_Ev_GrpP)
-{
-    /* AMD prefetch group, Intel implements this as NOP Ev (and so do we). */
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->f3DNowPrefetch)
-    {
-        IEMOP_MNEMONIC("GrpP");
-        return IEMOP_RAISE_INVALID_OPCODE();
-    }
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_MNEMONIC("GrpP");
-        return IEMOP_RAISE_INVALID_OPCODE();
-    }
-
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 2: /* Aliased to /0 for the time being. */
-        case 4: /* Aliased to /0 for the time being. */
-        case 5: /* Aliased to /0 for the time being. */
-        case 6: /* Aliased to /0 for the time being. */
-        case 7: /* Aliased to /0 for the time being. */
-        case 0: IEMOP_MNEMONIC("prefetch"); break;
-        case 1: IEMOP_MNEMONIC("prefetchw"); break;
-        case 3: IEMOP_MNEMONIC("prefetchw"); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-
-    IEM_MC_BEGIN(0, 1);
-    IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    /* Currently a NOP. */
-    NOREF(GCPtrEffSrc);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x0e. */
-FNIEMOP_STUB(iemOp_femms);
-
-
-/** Opcode 0x0f 0x0f 0x0c. */
-FNIEMOP_STUB(iemOp_3Dnow_pi2fw_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x0d. */
-FNIEMOP_STUB(iemOp_3Dnow_pi2fd_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x1c. */
-FNIEMOP_STUB(iemOp_3Dnow_pf2fw_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x1d. */
-FNIEMOP_STUB(iemOp_3Dnow_pf2fd_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x8a. */
-FNIEMOP_STUB(iemOp_3Dnow_pfnacc_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x8e. */
-FNIEMOP_STUB(iemOp_3Dnow_pfpnacc_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x90. */
-FNIEMOP_STUB(iemOp_3Dnow_pfcmpge_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x94. */
-FNIEMOP_STUB(iemOp_3Dnow_pfmin_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x96. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrcp_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x97. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrsqrt_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x9a. */
-FNIEMOP_STUB(iemOp_3Dnow_pfsub_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0x9e. */
-FNIEMOP_STUB(iemOp_3Dnow_pfadd_PQ_Qq);
-
-/** Opcode 0x0f 0x0f 0xa0. */
-FNIEMOP_STUB(iemOp_3Dnow_pfcmpgt_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xa4. */
-FNIEMOP_STUB(iemOp_3Dnow_pfmax_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xa6. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrcpit1_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xa7. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrsqit1_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xaa. */
-FNIEMOP_STUB(iemOp_3Dnow_pfsubr_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xae. */
-FNIEMOP_STUB(iemOp_3Dnow_pfacc_PQ_Qq);
-
-/** Opcode 0x0f 0x0f 0xb0. */
-FNIEMOP_STUB(iemOp_3Dnow_pfcmpeq_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xb4. */
-FNIEMOP_STUB(iemOp_3Dnow_pfmul_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xb6. */
-FNIEMOP_STUB(iemOp_3Dnow_pfrcpit2_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xb7. */
-FNIEMOP_STUB(iemOp_3Dnow_pmulhrw_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xbb. */
-FNIEMOP_STUB(iemOp_3Dnow_pswapd_Pq_Qq);
-
-/** Opcode 0x0f 0x0f 0xbf. */
-FNIEMOP_STUB(iemOp_3Dnow_pavgusb_PQ_Qq);
-
-
-/** Opcode 0x0f 0x0f. */
-FNIEMOP_DEF(iemOp_3Dnow)
-{
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->f3DNow)
-    {
-        IEMOP_MNEMONIC("3Dnow");
-        return IEMOP_RAISE_INVALID_OPCODE();
-    }
-
-    /* This is pretty sparse, use switch instead of table. */
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    switch (b)
-    {
-        case 0x0c: return FNIEMOP_CALL(iemOp_3Dnow_pi2fw_Pq_Qq);
-        case 0x0d: return FNIEMOP_CALL(iemOp_3Dnow_pi2fd_Pq_Qq);
-        case 0x1c: return FNIEMOP_CALL(iemOp_3Dnow_pf2fw_Pq_Qq);
-        case 0x1d: return FNIEMOP_CALL(iemOp_3Dnow_pf2fd_Pq_Qq);
-        case 0x8a: return FNIEMOP_CALL(iemOp_3Dnow_pfnacc_Pq_Qq);
-        case 0x8e: return FNIEMOP_CALL(iemOp_3Dnow_pfpnacc_Pq_Qq);
-        case 0x90: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpge_Pq_Qq);
-        case 0x94: return FNIEMOP_CALL(iemOp_3Dnow_pfmin_Pq_Qq);
-        case 0x96: return FNIEMOP_CALL(iemOp_3Dnow_pfrcp_Pq_Qq);
-        case 0x97: return FNIEMOP_CALL(iemOp_3Dnow_pfrsqrt_Pq_Qq);
-        case 0x9a: return FNIEMOP_CALL(iemOp_3Dnow_pfsub_Pq_Qq);
-        case 0x9e: return FNIEMOP_CALL(iemOp_3Dnow_pfadd_PQ_Qq);
-        case 0xa0: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpgt_Pq_Qq);
-        case 0xa4: return FNIEMOP_CALL(iemOp_3Dnow_pfmax_Pq_Qq);
-        case 0xa6: return FNIEMOP_CALL(iemOp_3Dnow_pfrcpit1_Pq_Qq);
-        case 0xa7: return FNIEMOP_CALL(iemOp_3Dnow_pfrsqit1_Pq_Qq);
-        case 0xaa: return FNIEMOP_CALL(iemOp_3Dnow_pfsubr_Pq_Qq);
-        case 0xae: return FNIEMOP_CALL(iemOp_3Dnow_pfacc_PQ_Qq);
-        case 0xb0: return FNIEMOP_CALL(iemOp_3Dnow_pfcmpeq_Pq_Qq);
-        case 0xb4: return FNIEMOP_CALL(iemOp_3Dnow_pfmul_Pq_Qq);
-        case 0xb6: return FNIEMOP_CALL(iemOp_3Dnow_pfrcpit2_Pq_Qq);
-        case 0xb7: return FNIEMOP_CALL(iemOp_3Dnow_pmulhrw_Pq_Qq);
-        case 0xbb: return FNIEMOP_CALL(iemOp_3Dnow_pswapd_Pq_Qq);
-        case 0xbf: return FNIEMOP_CALL(iemOp_3Dnow_pavgusb_PQ_Qq);
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x10. */
-FNIEMOP_STUB(iemOp_movups_Vps_Wps__movupd_Vpd_Wpd__movss_Vss_Wss__movsd_Vsd_Wsd);
-
-
-/** Opcode 0x0f 0x11. */
-FNIEMOP_DEF(iemOp_movups_Wps_Vps__movupd_Wpd_Vpd__movss_Wss_Vss__movsd_Vsd_Wsd)
-{
-    /* Quick hack. Need to restructure all of this later some time. */
-    uint32_t const fRelevantPrefix = pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ);
-    if (fRelevantPrefix == 0)
-    {
-        IEMOP_MNEMONIC("movups Wps,Vps");
-        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-        if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        {
-            /*
-             * Register, register.
-             */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-            IEM_MC_BEGIN(0, 0);
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-            IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
-                                  ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            /*
-             * Memory, register.
-             */
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint128_t,                 uSrc); /** @todo optimize this one day... */
-            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ - yes it generally is! */
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-            IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else if (fRelevantPrefix == IEM_OP_PRF_REPNZ)
-    {
-        IEMOP_MNEMONIC("movsd Wsd,Vsd");
-        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-        if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        {
-            /*
-             * Register, register.
-             */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t,                  uSrc);
-
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-            IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_XREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            /*
-             * Memory, register.
-             */
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t,                  uSrc);
-            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-            IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else if (fRelevantPrefix == IEM_OP_PRF_REPZ /*0xf3*/)
-    {
-        IEMOP_MNEMONIC("movss Wss,Vss");
-        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-        if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        {
-            /*
-             * Register, register.
-             */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t,                  uSrc);
-
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-            IEM_MC_FETCH_XREG_U32(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_XREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            /*
-             * Memory, register.
-             */
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint32_t,                  uSrc);
-            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-            IEM_MC_FETCH_XREG_U32(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else
-    {
-        IEMOP_BITCH_ABOUT_STUB();
-        return VERR_IEM_INSTR_NOT_IMPLEMENTED;
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x12. */
-FNIEMOP_STUB(iemOp_movlps_Vq_Mq__movhlps_Vq_Uq__movlpd_Vq_Mq__movsldup_Vq_Wq__movddup_Vq_Wq); //NEXT
-
-
-/** Opcode 0x0f 0x13. */
-FNIEMOP_DEF(iemOp_movlps_Mq_Vq__movlpd_Mq_Vq)
-{
-    /* Quick hack. Need to restructure all of this later some time. */
-    if (pVCpu->iem.s.fPrefixes == IEM_OP_PRF_SIZE_OP)
-    {
-        IEMOP_MNEMONIC("movlpd Mq,Vq");
-        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-        if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        {
-#if 0
-            /*
-             * Register, register.
-             */
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t,                  uSrc);
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-            IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_XREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-#else
-            return IEMOP_RAISE_INVALID_OPCODE();
-#endif
-        }
-        else
-        {
-            /*
-             * Memory, register.
-             */
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t,                  uSrc);
-            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ - yes it generally is! */
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-            IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        return VINF_SUCCESS;
-    }
-
-    IEMOP_BITCH_ABOUT_STUB();
-    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
-}
-
-
-/** Opcode 0x0f 0x14. */
-FNIEMOP_STUB(iemOp_unpckhlps_Vps_Wq__unpcklpd_Vpd_Wq);
-/** Opcode 0x0f 0x15. */
-FNIEMOP_STUB(iemOp_unpckhps_Vps_Wq__unpckhpd_Vpd_Wq);
-/** Opcode 0x0f 0x16. */
-FNIEMOP_STUB(iemOp_movhps_Vq_Mq__movlhps_Vq_Uq__movhpd_Vq_Mq__movshdup_Vq_Wq); //NEXT
-/** Opcode 0x0f 0x17. */
-FNIEMOP_STUB(iemOp_movhps_Mq_Vq__movhpd_Mq_Vq); //NEXT
-
-
-/** Opcode 0x0f 0x18. */
-FNIEMOP_DEF(iemOp_prefetch_Grp16)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 4: /* Aliased to /0 for the time being according to AMD. */
-            case 5: /* Aliased to /0 for the time being according to AMD. */
-            case 6: /* Aliased to /0 for the time being according to AMD. */
-            case 7: /* Aliased to /0 for the time being according to AMD. */
-            case 0: IEMOP_MNEMONIC("prefetchNTA m8"); break;
-            case 1: IEMOP_MNEMONIC("prefetchT0  m8"); break;
-            case 2: IEMOP_MNEMONIC("prefetchT1  m8"); break;
-            case 3: IEMOP_MNEMONIC("prefetchT2  m8"); break;
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        /* Currently a NOP. */
-        NOREF(GCPtrEffSrc);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-        return VINF_SUCCESS;
-    }
-
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x19..0x1f. */
-FNIEMOP_DEF(iemOp_nop_Ev)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        /* Currently a NOP. */
-        NOREF(GCPtrEffSrc);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x20. */
-FNIEMOP_DEF(iemOp_mov_Rd_Cd)
-{
-    /* mod is ignored, as is operand size overrides. */
-    IEMOP_MNEMONIC("mov Rd,Cd");
-    IEMOP_HLP_MIN_386();
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
-    else
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    uint8_t iCrReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK)
-    {
-        /* The lock prefix can be used to encode CR8 accesses on some CPUs. */
-        if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCr8In32Bit)
-            return IEMOP_RAISE_INVALID_OPCODE(); /* #UD takes precedence over #GP(), see test. */
-        iCrReg |= 8;
-    }
-    switch (iCrReg)
-    {
-        case 0: case 2: case 3: case 4: case 8:
-            break;
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-    IEMOP_HLP_DONE_DECODING();
-
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Rd_Cd, (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB, iCrReg);
-}
-
-
-/** Opcode 0x0f 0x21. */
-FNIEMOP_DEF(iemOp_mov_Rd_Dd)
-{
-    IEMOP_MNEMONIC("mov Rd,Dd");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REX_R)
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Rd_Dd,
-                                   (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB,
-                                   ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK));
-}
-
-
-/** Opcode 0x0f 0x22. */
-FNIEMOP_DEF(iemOp_mov_Cd_Rd)
-{
-    /* mod is ignored, as is operand size overrides. */
-    IEMOP_MNEMONIC("mov Cd,Rd");
-    IEMOP_HLP_MIN_386();
-    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
-    else
-        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    uint8_t iCrReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK)
-    {
-        /* The lock prefix can be used to encode CR8 accesses on some CPUs. */
-        if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCr8In32Bit)
-            return IEMOP_RAISE_INVALID_OPCODE(); /* #UD takes precedence over #GP(), see test. */
-        iCrReg |= 8;
-    }
-    switch (iCrReg)
-    {
-        case 0: case 2: case 3: case 4: case 8:
-            break;
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-    IEMOP_HLP_DONE_DECODING();
-
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Cd_Rd, iCrReg, (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0x23. */
-FNIEMOP_DEF(iemOp_mov_Dd_Rd)
-{
-    IEMOP_MNEMONIC("mov Dd,Rd");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REX_R)
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Dd_Rd,
-                                   ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK),
-                                   (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0x24. */
-FNIEMOP_DEF(iemOp_mov_Rd_Td)
-{
-    IEMOP_MNEMONIC("mov Rd,Td");
-    /** @todo works on 386 and 486. */
-    /* The RM byte is not considered, see testcase. */
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x26. */
-FNIEMOP_DEF(iemOp_mov_Td_Rd)
-{
-    IEMOP_MNEMONIC("mov Td,Rd");
-    /** @todo works on 386 and 486. */
-    /* The RM byte is not considered, see testcase. */
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0x28. */
-FNIEMOP_DEF(iemOp_movaps_Vps_Wps__movapd_Vpd_Wpd)
-{
-    IEMOP_MNEMONIC(!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? "movaps r,mr" : "movapd r,mr");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * Register, register.
-         */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-        IEM_MC_BEGIN(0, 0);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
-                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * Register, memory.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint128_t,                 uSrc); /** @todo optimize this one day... */
-        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ */
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-
-        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x29. */
-FNIEMOP_DEF(iemOp_movaps_Wps_Vps__movapd_Wpd_Vpd)
-{
-    IEMOP_MNEMONIC(!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? "movaps mr,r" : "movapd mr,r");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * Register, register.
-         */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
-        IEM_MC_BEGIN(0, 0);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
-                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * Memory, register.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint128_t,                 uSrc); /** @todo optimize this one day... */
-        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ */
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x2a. */
-FNIEMOP_STUB(iemOp_cvtpi2ps_Vps_Qpi__cvtpi2pd_Vpd_Qpi__cvtsi2ss_Vss_Ey__cvtsi2sd_Vsd_Ey); //NEXT
-
-
-/** Opcode 0x0f 0x2b. */
-FNIEMOP_DEF(iemOp_movntps_Mps_Vps__movntpd_Mpd_Vpd)
-{
-    IEMOP_MNEMONIC(!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? "movntps mr,r" : "movntpd mr,r");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * memory, register.
-         */
-        IEM_MC_BEGIN(0, 2);
-        IEM_MC_LOCAL(uint128_t,                 uSrc); /** @todo optimize this one day... */
-        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES(); /** @todo check if this is delayed this long for REPZ/NZ */
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP))
-            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
-        else
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-
-        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    /* The register, register encoding is invalid. */
-    else
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x2c. */
-FNIEMOP_STUB(iemOp_cvttps2pi_Ppi_Wps__cvttpd2pi_Ppi_Wpd__cvttss2si_Gy_Wss__cvttsd2si_Yu_Wsd); //NEXT
-/** Opcode 0x0f 0x2d. */
-FNIEMOP_STUB(iemOp_cvtps2pi_Ppi_Wps__cvtpd2pi_QpiWpd__cvtss2si_Gy_Wss__cvtsd2si_Gy_Wsd);
-/** Opcode 0x0f 0x2e. */
-FNIEMOP_STUB(iemOp_ucomiss_Vss_Wss__ucomisd_Vsd_Wsd); //NEXT
-/** Opcode 0x0f 0x2f. */
-FNIEMOP_STUB(iemOp_comiss_Vss_Wss__comisd_Vsd_Wsd);
-
-
-/** Opcode 0x0f 0x30. */
-FNIEMOP_DEF(iemOp_wrmsr)
-{
-    IEMOP_MNEMONIC("wrmsr");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_wrmsr);
-}
-
-
-/** Opcode 0x0f 0x31. */
-FNIEMOP_DEF(iemOp_rdtsc)
-{
-    IEMOP_MNEMONIC("rdtsc");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rdtsc);
-}
-
-
-/** Opcode 0x0f 0x33. */
-FNIEMOP_DEF(iemOp_rdmsr)
-{
-    IEMOP_MNEMONIC("rdmsr");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rdmsr);
-}
-
-
-/** Opcode 0x0f 0x34. */
-FNIEMOP_STUB(iemOp_rdpmc);
-/** Opcode 0x0f 0x34. */
-FNIEMOP_STUB(iemOp_sysenter);
-/** Opcode 0x0f 0x35. */
-FNIEMOP_STUB(iemOp_sysexit);
-/** Opcode 0x0f 0x37. */
-FNIEMOP_STUB(iemOp_getsec);
-/** Opcode 0x0f 0x38. */
-FNIEMOP_UD_STUB(iemOp_3byte_Esc_A4); /* Here there be dragons... */
-/** Opcode 0x0f 0x3a. */
-FNIEMOP_UD_STUB(iemOp_3byte_Esc_A5); /* Here there be dragons... */
-
-
-/**
- * Implements a conditional move.
- *
- * Wish there was an obvious way to do this where we could share and reduce
- * code bloat.
- *
- * @param   a_Cnd       The conditional "microcode" operation.
- */
-#define CMOV_X(a_Cnd) \
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); \
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT)) \
-    { \
-        switch (pVCpu->iem.s.enmEffOpSize) \
-        { \
-            case IEMMODE_16BIT: \
-                IEM_MC_BEGIN(0, 1); \
-                IEM_MC_LOCAL(uint16_t, u16Tmp); \
-                a_Cnd { \
-                    IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
-                    IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            case IEMMODE_32BIT: \
-                IEM_MC_BEGIN(0, 1); \
-                IEM_MC_LOCAL(uint32_t, u32Tmp); \
-                a_Cnd { \
-                    IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
-                    IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp); \
-                } IEM_MC_ELSE() { \
-                    IEM_MC_CLEAR_HIGH_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            case IEMMODE_64BIT: \
-                IEM_MC_BEGIN(0, 1); \
-                IEM_MC_LOCAL(uint64_t, u64Tmp); \
-                a_Cnd { \
-                    IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
-                    IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            IEM_NOT_REACHED_DEFAULT_CASE_RET(); \
-        } \
-    } \
-    else \
-    { \
-        switch (pVCpu->iem.s.enmEffOpSize) \
-        { \
-            case IEMMODE_16BIT: \
-                IEM_MC_BEGIN(0, 2); \
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
-                IEM_MC_LOCAL(uint16_t, u16Tmp); \
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
-                IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
-                a_Cnd { \
-                    IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            case IEMMODE_32BIT: \
-                IEM_MC_BEGIN(0, 2); \
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
-                IEM_MC_LOCAL(uint32_t, u32Tmp); \
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
-                IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
-                a_Cnd { \
-                    IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp); \
-                } IEM_MC_ELSE() { \
-                    IEM_MC_CLEAR_HIGH_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            case IEMMODE_64BIT: \
-                IEM_MC_BEGIN(0, 2); \
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
-                IEM_MC_LOCAL(uint64_t, u64Tmp); \
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
-                IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
-                a_Cnd { \
-                    IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp); \
-                } IEM_MC_ENDIF(); \
-                IEM_MC_ADVANCE_RIP(); \
-                IEM_MC_END(); \
-                return VINF_SUCCESS; \
-    \
-            IEM_NOT_REACHED_DEFAULT_CASE_RET(); \
-        } \
-    } do {} while (0)
-
-
-
-/** Opcode 0x0f 0x40. */
-FNIEMOP_DEF(iemOp_cmovo_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovo Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x41. */
-FNIEMOP_DEF(iemOp_cmovno_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovno Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x42. */
-FNIEMOP_DEF(iemOp_cmovc_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovc Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF));
-}
-
-
-/** Opcode 0x0f 0x43. */
-FNIEMOP_DEF(iemOp_cmovnc_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnc Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_CF));
-}
-
-
-/** Opcode 0x0f 0x44. */
-FNIEMOP_DEF(iemOp_cmove_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmove Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF));
-}
-
-
-/** Opcode 0x0f 0x45. */
-FNIEMOP_DEF(iemOp_cmovne_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovne Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_ZF));
-}
-
-
-/** Opcode 0x0f 0x46. */
-FNIEMOP_DEF(iemOp_cmovbe_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovbe Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF));
-}
-
-
-/** Opcode 0x0f 0x47. */
-FNIEMOP_DEF(iemOp_cmovnbe_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnbe Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_NO_BITS_SET(X86_EFL_CF | X86_EFL_ZF));
-}
-
-
-/** Opcode 0x0f 0x48. */
-FNIEMOP_DEF(iemOp_cmovs_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovs Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF));
-}
-
-
-/** Opcode 0x0f 0x49. */
-FNIEMOP_DEF(iemOp_cmovns_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovns Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_SF));
-}
-
-
-/** Opcode 0x0f 0x4a. */
-FNIEMOP_DEF(iemOp_cmovp_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovp Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF));
-}
-
-
-/** Opcode 0x0f 0x4b. */
-FNIEMOP_DEF(iemOp_cmovnp_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnp Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_PF));
-}
-
-
-/** Opcode 0x0f 0x4c. */
-FNIEMOP_DEF(iemOp_cmovl_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovl Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x4d. */
-FNIEMOP_DEF(iemOp_cmovnl_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnl Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BITS_EQ(X86_EFL_SF, X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x4e. */
-FNIEMOP_DEF(iemOp_cmovle_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovle Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF));
-}
-
-
-/** Opcode 0x0f 0x4f. */
-FNIEMOP_DEF(iemOp_cmovnle_Gv_Ev)
-{
-    IEMOP_MNEMONIC("cmovnle Gv,Ev");
-    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF));
-}
-
-#undef CMOV_X
-
-/** Opcode 0x0f 0x50. */
-FNIEMOP_STUB(iemOp_movmskps_Gy_Ups__movmskpd_Gy_Upd);
-/** Opcode 0x0f 0x51. */
-FNIEMOP_STUB(iemOp_sqrtps_Wps_Vps__sqrtpd_Wpd_Vpd__sqrtss_Vss_Wss__sqrtsd_Vsd_Wsd);
-/** Opcode 0x0f 0x52. */
-FNIEMOP_STUB(iemOp_rsqrtps_Wps_Vps__rsqrtss_Vss_Wss);
-/** Opcode 0x0f 0x53. */
-FNIEMOP_STUB(iemOp_rcpps_Wps_Vps__rcpss_Vs_Wss);
-/** Opcode 0x0f 0x54. */
-FNIEMOP_STUB(iemOp_andps_Vps_Wps__andpd_Wpd_Vpd);
-/** Opcode 0x0f 0x55. */
-FNIEMOP_STUB(iemOp_andnps_Vps_Wps__andnpd_Wpd_Vpd);
-/** Opcode 0x0f 0x56. */
-FNIEMOP_STUB(iemOp_orps_Wpd_Vpd__orpd_Wpd_Vpd);
-/** Opcode 0x0f 0x57. */
-FNIEMOP_STUB(iemOp_xorps_Vps_Wps__xorpd_Wpd_Vpd);
-/** Opcode 0x0f 0x58. */
-FNIEMOP_STUB(iemOp_addps_Vps_Wps__addpd_Vpd_Wpd__addss_Vss_Wss__addsd_Vsd_Wsd); //NEXT
-/** Opcode 0x0f 0x59. */
-FNIEMOP_STUB(iemOp_mulps_Vps_Wps__mulpd_Vpd_Wpd__mulss_Vss__Wss__mulsd_Vsd_Wsd);//NEXT
-/** Opcode 0x0f 0x5a. */
-FNIEMOP_STUB(iemOp_cvtps2pd_Vpd_Wps__cvtpd2ps_Vps_Wpd__cvtss2sd_Vsd_Wss__cvtsd2ss_Vss_Wsd);
-/** Opcode 0x0f 0x5b. */
-FNIEMOP_STUB(iemOp_cvtdq2ps_Vps_Wdq__cvtps2dq_Vdq_Wps__cvtps2dq_Vdq_Wps);
-/** Opcode 0x0f 0x5c. */
-FNIEMOP_STUB(iemOp_subps_Vps_Wps__subpd_Vps_Wdp__subss_Vss_Wss__subsd_Vsd_Wsd);
-/** Opcode 0x0f 0x5d. */
-FNIEMOP_STUB(iemOp_minps_Vps_Wps__minpd_Vpd_Wpd__minss_Vss_Wss__minsd_Vsd_Wsd);
-/** Opcode 0x0f 0x5e. */
-FNIEMOP_STUB(iemOp_divps_Vps_Wps__divpd_Vpd_Wpd__divss_Vss_Wss__divsd_Vsd_Wsd);
-/** Opcode 0x0f 0x5f. */
-FNIEMOP_STUB(iemOp_maxps_Vps_Wps__maxpd_Vpd_Wpd__maxss_Vss_Wss__maxsd_Vsd_Wsd);
-
-
-/**
- * Common worker for SSE2 and MMX instructions on the forms:
- *      pxxxx xmm1, xmm2/mem128
- *      pxxxx mm1, mm2/mem32
- *
- * The 2nd operand is the first half of a register, which in the memory case
- * means a 32-bit memory access for MMX and 128-bit aligned 64-bit or 128-bit
- * memory accessed for MMX.
- *
- * Exceptions type 4.
- */
-FNIEMOP_DEF_1(iemOpCommonMmxSse_LowLow_To_Full, PCIEMOPMEDIAF1L1, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint128_t *,          pDst, 0);
-                IEM_MC_ARG(uint64_t const *,     pSrc, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_XREG_U64_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint128_t *,                 pDst,       0);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U64_ALIGN_U128(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            if (!pImpl->pfnU64)
-                return IEMOP_RAISE_INVALID_OPCODE();
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint64_t *,          pDst, 0);
-                IEM_MC_ARG(uint32_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_REF_MREG_U32_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint64_t *,                  pDst,       0);
-                IEM_MC_LOCAL(uint32_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint32_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U32(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x60. */
-FNIEMOP_DEF(iemOp_punpcklbw_Pq_Qd__punpcklbw_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpcklbw");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_LowLow_To_Full, &g_iemAImpl_punpcklbw);
-}
-
-
-/** Opcode 0x0f 0x61. */
-FNIEMOP_DEF(iemOp_punpcklwd_Pq_Qd__punpcklwd_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpcklwd"); /** @todo AMD mark the MMX version as 3DNow!. Intel says MMX CPUID req. */
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_LowLow_To_Full, &g_iemAImpl_punpcklwd);
-}
-
-
-/** Opcode 0x0f 0x62. */
-FNIEMOP_DEF(iemOp_punpckldq_Pq_Qd__punpckldq_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckldq");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_LowLow_To_Full, &g_iemAImpl_punpckldq);
-}
-
-
-/** Opcode 0x0f 0x63. */
-FNIEMOP_STUB(iemOp_packsswb_Pq_Qq__packsswb_Vdq_Wdq);
-/** Opcode 0x0f 0x64. */
-FNIEMOP_STUB(iemOp_pcmpgtb_Pq_Qq__pcmpgtb_Vdq_Wdq);
-/** Opcode 0x0f 0x65. */
-FNIEMOP_STUB(iemOp_pcmpgtw_Pq_Qq__pcmpgtw_Vdq_Wdq);
-/** Opcode 0x0f 0x66. */
-FNIEMOP_STUB(iemOp_pcmpgtd_Pq_Qq__pcmpgtd_Vdq_Wdq);
-/** Opcode 0x0f 0x67. */
-FNIEMOP_STUB(iemOp_packuswb_Pq_Qq__packuswb_Vdq_Wdq);
-
-
-/**
- * Common worker for SSE2 and MMX instructions on the forms:
- *      pxxxx xmm1, xmm2/mem128
- *      pxxxx mm1, mm2/mem64
- *
- * The 2nd operand is the second half of a register, which in the memory case
- * means a 64-bit memory access for MMX, and for MMX a 128-bit aligned access
- * where it may read the full 128 bits or only the upper 64 bits.
- *
- * Exceptions type 4.
- */
-FNIEMOP_DEF_1(iemOpCommonMmxSse_HighHigh_To_Full, PCIEMOPMEDIAF1H1, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint128_t *,          pDst, 0);
-                IEM_MC_ARG(uint128_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint128_t *,                 pDst,       0);
-                IEM_MC_LOCAL(uint128_t,                 uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint128_t const *, pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); /* Most CPUs probably only right high qword */
-
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            if (!pImpl->pfnU64)
-                return IEMOP_RAISE_INVALID_OPCODE();
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint64_t *,          pDst, 0);
-                IEM_MC_ARG(uint64_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint64_t *,                  pDst,       0);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x68. */
-FNIEMOP_DEF(iemOp_punpckhbw_Pq_Qq__punpckhbw_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckhbw");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_HighHigh_To_Full, &g_iemAImpl_punpckhbw);
-}
-
-
-/** Opcode 0x0f 0x69. */
-FNIEMOP_DEF(iemOp_punpckhwd_Pq_Qd__punpckhwd_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckhwd");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_HighHigh_To_Full, &g_iemAImpl_punpckhwd);
-}
-
-
-/** Opcode 0x0f 0x6a. */
-FNIEMOP_DEF(iemOp_punpckhdq_Pq_Qd__punpckhdq_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckhdq");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_HighHigh_To_Full, &g_iemAImpl_punpckhdq);
-}
-
-/** Opcode 0x0f 0x6b. */
-FNIEMOP_STUB(iemOp_packssdw_Pq_Qd__packssdq_Vdq_Wdq);
-
-
-/** Opcode 0x0f 0x6c. */
-FNIEMOP_DEF(iemOp_punpcklqdq_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpcklqdq");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_LowLow_To_Full, &g_iemAImpl_punpcklqdq);
-}
-
-
-/** Opcode 0x0f 0x6d. */
-FNIEMOP_DEF(iemOp_punpckhqdq_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("punpckhqdq");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse_HighHigh_To_Full, &g_iemAImpl_punpckhqdq);
-}
-
-
-/** Opcode 0x0f 0x6e. */
-FNIEMOP_DEF(iemOp_movd_q_Pd_Ey__movd_q_Vy_Ey)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            IEMOP_MNEMONIC("movd/q Wd/q,Ed/q");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* XMM, greg*/
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                    IEM_MC_STORE_XREG_U64_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                    IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* XMM, [mem] */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT(); /** @todo order */
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                    IEM_MC_STORE_XREG_U64_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                    IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("movq/d Pd/q,Ed/q");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* MMX, greg */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                    IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                else
-                    IEM_MC_FETCH_GREG_U32_ZX_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* MMX, [mem] */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                    IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                    IEM_MC_STORE_MREG_U32_ZX_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x6f. */
-FNIEMOP_DEF(iemOp_movq_Pq_Qq__movdqa_Vdq_Wdq__movdqu_Vdq_Wdq)
-{
-    bool fAligned = false;
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE aligned */
-            fAligned = true;
-        case IEM_OP_PRF_REPZ: /* SSE unaligned */
-            if (fAligned)
-                IEMOP_MNEMONIC("movdqa Vdq,Wdq");
-            else
-                IEMOP_MNEMONIC("movdqu Vdq,Wdq");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 0);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
-                                      (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint128_t,  u128Tmp);
-                IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                if (fAligned)
-                    IEM_MC_FETCH_MEM_U128_ALIGN_SSE(u128Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                else
-                    IEM_MC_FETCH_MEM_U128(u128Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u128Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("movq Pq,Qq");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                IEM_MC_FETCH_MREG_U64(u64Tmp, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x70. The immediate here is evil! */
-FNIEMOP_DEF(iemOp_pshufw_Pq_Qq_Ib__pshufd_Vdq_Wdq_Ib__pshufhw_Vdq_Wdq_Ib__pshuflq_Vdq_Wdq_Ib)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-        case IEM_OP_PRF_REPNZ:   /* SSE */
-        case IEM_OP_PRF_REPZ:    /* SSE */
-        {
-            PFNIEMAIMPLMEDIAPSHUF pfnAImpl;
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-            {
-                case IEM_OP_PRF_SIZE_OP:
-                    IEMOP_MNEMONIC("pshufd Vdq,Wdq,Ib");
-                    pfnAImpl = iemAImpl_pshufd;
-                    break;
-                case IEM_OP_PRF_REPNZ:
-                    IEMOP_MNEMONIC("pshuflw Vdq,Wdq,Ib");
-                    pfnAImpl = iemAImpl_pshuflw;
-                    break;
-                case IEM_OP_PRF_REPZ:
-                    IEMOP_MNEMONIC("pshufhw Vdq,Wdq,Ib");
-                    pfnAImpl = iemAImpl_pshufhw;
-                    break;
-                IEM_NOT_REACHED_DEFAULT_CASE_RET();
-            }
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint128_t *,         pDst, 0);
-                IEM_MC_ARG(uint128_t const *,   pSrc, 1);
-                IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_SSE_AIMPL_3(pfnAImpl, pDst, pSrc, bEvilArg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint128_t *,                 pDst,       0);
-                IEM_MC_LOCAL(uint128_t,                 uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint128_t const *, pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
-                IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-
-                IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_SSE_AIMPL_3(pfnAImpl, pDst, pSrc, bEvilArg);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-        }
-
-        case 0: /* MMX Extension */
-            IEMOP_MNEMONIC("pshufw Pq,Qq,Ib");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,          pDst, 0);
-                IEM_MC_ARG(uint64_t const *,    pSrc, 1);
-                IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_CALL_MMX_AIMPL_3(iemAImpl_pshufw, pDst, pSrc, bEvilArg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,                  pDst,       0);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
-                IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
-
-                IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_CALL_MMX_AIMPL_3(iemAImpl_pshufw, pDst, pSrc, bEvilArg);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x71 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp12_psrlw_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x71 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp12_psrlw_Udq_Ib, uint8_t, bRm);
-
-/** Opcode 0x0f 0x71 11/4. */
-FNIEMOP_STUB_1(iemOp_Grp12_psraw_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x71 11/4. */
-FNIEMOP_STUB_1(iemOp_Grp12_psraw_Udq_Ib, uint8_t, bRm);
-
-/** Opcode 0x0f 0x71 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp12_psllw_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x71 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp12_psllw_Udq_Ib, uint8_t, bRm);
-
-
-/** Opcode 0x0f 0x71. */
-FNIEMOP_DEF(iemOp_Grp12)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 1: case 3: case 5: case 7:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 2:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp12_psrlw_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp12_psrlw_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 4:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp12_psraw_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp12_psraw_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 6:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp12_psllw_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp12_psllw_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x0f 0x72 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp13_psrld_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x72 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp13_psrld_Udq_Ib, uint8_t, bRm);
-
-/** Opcode 0x0f 0x72 11/4. */
-FNIEMOP_STUB_1(iemOp_Grp13_psrad_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x72 11/4. */
-FNIEMOP_STUB_1(iemOp_Grp13_psrad_Udq_Ib, uint8_t, bRm);
-
-/** Opcode 0x0f 0x72 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp13_pslld_Nq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x72 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp13_pslld_Udq_Ib, uint8_t, bRm);
-
-
-/** Opcode 0x0f 0x72. */
-FNIEMOP_DEF(iemOp_Grp13)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 1: case 3: case 5: case 7:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 2:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp13_psrld_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp13_psrld_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 4:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp13_psrad_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp13_psrad_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 6:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp13_pslld_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp13_pslld_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x0f 0x73 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp14_psrlq_Nq_Ib,   uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x73 11/2. */
-FNIEMOP_STUB_1(iemOp_Grp14_psrlq_Udq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x73 11/3. */
-FNIEMOP_STUB_1(iemOp_Grp14_psrldq_Udq_Ib, uint8_t, bRm); //NEXT
-
-/** Opcode 0x0f 0x73 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp14_psllq_Nq_Ib,   uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x73 11/6. */
-FNIEMOP_STUB_1(iemOp_Grp14_psllq_Udq_Ib,  uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0x73 11/7. */
-FNIEMOP_STUB_1(iemOp_Grp14_pslldq_Udq_Ib, uint8_t, bRm); //NEXT
-
-
-/** Opcode 0x0f 0x73. */
-FNIEMOP_DEF(iemOp_Grp14)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 1: case 4: case 5:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 2:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp14_psrlq_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp14_psrlq_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 3:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp14_psrldq_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 6:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case 0:                     return FNIEMOP_CALL_1(iemOp_Grp14_psllq_Nq_Ib, bRm);
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp14_psllq_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 7:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
-            {
-                case IEM_OP_PRF_SIZE_OP:    return FNIEMOP_CALL_1(iemOp_Grp14_pslldq_Udq_Ib, bRm);
-                default:                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/**
- * Common worker for SSE2 and MMX instructions on the forms:
- *      pxxx    mm1, mm2/mem64
- *      pxxx    xmm1, xmm2/mem128
- *
- * Proper alignment of the 128-bit operand is enforced.
- * Exceptions type 4. SSE2 and MMX cpuid checks.
- */
-FNIEMOP_DEF_1(iemOpCommonMmxSse2_FullFull_To_Full, PCIEMOPMEDIAF2, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint128_t *,          pDst, 0);
-                IEM_MC_ARG(uint128_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint128_t *,                 pDst,       0);
-                IEM_MC_LOCAL(uint128_t,                 uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint128_t const *, pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_SSE_USAGE();
-                IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(2, 0);
-                IEM_MC_ARG(uint64_t *,          pDst, 0);
-                IEM_MC_ARG(uint64_t const *,    pSrc, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(2, 2);
-                IEM_MC_ARG(uint64_t *,                  pDst,       0);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
-
-                IEM_MC_PREPARE_FPU_USAGE();
-                IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x74. */
-FNIEMOP_DEF(iemOp_pcmpeqb_Pq_Qq__pcmpeqb_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("pcmpeqb");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqb);
-}
-
-
-/** Opcode 0x0f 0x75. */
-FNIEMOP_DEF(iemOp_pcmpeqw_Pq_Qq__pcmpeqw_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("pcmpeqw");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqw);
-}
-
-
-/** Opcode 0x0f 0x76. */
-FNIEMOP_DEF(iemOp_pcmped_Pq_Qq__pcmpeqd_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("pcmpeqd");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqd);
-}
-
-
-/** Opcode 0x0f 0x77. */
-FNIEMOP_STUB(iemOp_emms);
-/** Opcode 0x0f 0x78. */
-FNIEMOP_UD_STUB(iemOp_vmread_AmdGrp17);
-/** Opcode 0x0f 0x79. */
-FNIEMOP_UD_STUB(iemOp_vmwrite);
-/** Opcode 0x0f 0x7c. */
-FNIEMOP_STUB(iemOp_haddpd_Vdp_Wpd__haddps_Vps_Wps);
-/** Opcode 0x0f 0x7d. */
-FNIEMOP_STUB(iemOp_hsubpd_Vpd_Wpd__hsubps_Vps_Wps);
-
-
-/** Opcode 0x0f 0x7e. */
-FNIEMOP_DEF(iemOp_movd_q_Ey_Pd__movd_q_Ey_Vy__movq_Vq_Wq)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            IEMOP_MNEMONIC("movd/q Ed/q,Wd/q");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* greg, XMM */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_XREG_U64(u64Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                    IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_XREG_U32(u32Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                    IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* [mem], XMM */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_XREG_U64(u64Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                    IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_XREG_U32(u32Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                    IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("movq/d Ed/q,Pd/q");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /* greg, MMX */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                    IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_MREG_U32(u32Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                    IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /* [mem], MMX */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-                if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                {
-                    IEM_MC_LOCAL(uint64_t, u64Tmp);
-                    IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                    IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
-                }
-                else
-                {
-                    IEM_MC_LOCAL(uint32_t, u32Tmp);
-                    IEM_MC_FETCH_MREG_U32(u32Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                    IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u32Tmp);
-                }
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0x7f. */
-FNIEMOP_DEF(iemOp_movq_Qq_Pq__movq_movdqa_Wdq_Vdq__movdqu_Wdq_Vdq)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    bool fAligned = false;
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE aligned */
-            fAligned = true;
-        case IEM_OP_PRF_REPZ: /* SSE unaligned */
-            if (fAligned)
-                IEMOP_MNEMONIC("movdqa Wdq,Vdq");
-            else
-                IEMOP_MNEMONIC("movdqu Wdq,Vdq");
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 0);
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
-                IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
-                                      ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint128_t,  u128Tmp);
-                IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-                IEM_MC_FETCH_XREG_U128(u128Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (fAligned)
-                    IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u128Tmp);
-                else
-                    IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u128Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("movq Qq,Pq");
-
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-            {
-                /*
-                 * Register, register.
-                 */
-                /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
-                /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
-                IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_STORE_MREG_U64(bRm & X86_MODRM_RM_MASK, u64Tmp);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            else
-            {
-                /*
-                 * Register, memory.
-                 */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Tmp);
-                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-
-                IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-            }
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-
-/** Opcode 0x0f 0x80. */
-FNIEMOP_DEF(iemOp_jo_Jv)
-{
-    IEMOP_MNEMONIC("jo  Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x81. */
-FNIEMOP_DEF(iemOp_jno_Jv)
-{
-    IEMOP_MNEMONIC("jno Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x82. */
-FNIEMOP_DEF(iemOp_jc_Jv)
-{
-    IEMOP_MNEMONIC("jc/jb/jnae Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x83. */
-FNIEMOP_DEF(iemOp_jnc_Jv)
-{
-    IEMOP_MNEMONIC("jnc/jnb/jae Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x84. */
-FNIEMOP_DEF(iemOp_je_Jv)
-{
-    IEMOP_MNEMONIC("je/jz Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x85. */
-FNIEMOP_DEF(iemOp_jne_Jv)
-{
-    IEMOP_MNEMONIC("jne/jnz Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x86. */
-FNIEMOP_DEF(iemOp_jbe_Jv)
-{
-    IEMOP_MNEMONIC("jbe/jna Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x87. */
-FNIEMOP_DEF(iemOp_jnbe_Jv)
-{
-    IEMOP_MNEMONIC("jnbe/ja Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x88. */
-FNIEMOP_DEF(iemOp_js_Jv)
-{
-    IEMOP_MNEMONIC("js  Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x89. */
-FNIEMOP_DEF(iemOp_jns_Jv)
-{
-    IEMOP_MNEMONIC("jns Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8a. */
-FNIEMOP_DEF(iemOp_jp_Jv)
-{
-    IEMOP_MNEMONIC("jp  Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8b. */
-FNIEMOP_DEF(iemOp_jnp_Jv)
-{
-    IEMOP_MNEMONIC("jo  Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8c. */
-FNIEMOP_DEF(iemOp_jl_Jv)
-{
-    IEMOP_MNEMONIC("jl/jnge Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8d. */
-FNIEMOP_DEF(iemOp_jnl_Jv)
-{
-    IEMOP_MNEMONIC("jnl/jge Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8e. */
-FNIEMOP_DEF(iemOp_jle_Jv)
-{
-    IEMOP_MNEMONIC("jle/jng Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ELSE() {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x8f. */
-FNIEMOP_DEF(iemOp_jnle_Jv)
-{
-    IEMOP_MNEMONIC("jnle/jg Jv");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-    {
-        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S16(i16Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    else
-    {
-        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_ADVANCE_RIP();
-        } IEM_MC_ELSE() {
-            IEM_MC_REL_JMP_S32(i32Imm);
-        } IEM_MC_ENDIF();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x90. */
-FNIEMOP_DEF(iemOp_seto_Eb)
-{
-    IEMOP_MNEMONIC("seto Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x91. */
-FNIEMOP_DEF(iemOp_setno_Eb)
-{
-    IEMOP_MNEMONIC("setno Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x92. */
-FNIEMOP_DEF(iemOp_setc_Eb)
-{
-    IEMOP_MNEMONIC("setc Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x93. */
-FNIEMOP_DEF(iemOp_setnc_Eb)
-{
-    IEMOP_MNEMONIC("setnc Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x94. */
-FNIEMOP_DEF(iemOp_sete_Eb)
-{
-    IEMOP_MNEMONIC("sete Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x95. */
-FNIEMOP_DEF(iemOp_setne_Eb)
-{
-    IEMOP_MNEMONIC("setne Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x96. */
-FNIEMOP_DEF(iemOp_setbe_Eb)
-{
-    IEMOP_MNEMONIC("setbe Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x97. */
-FNIEMOP_DEF(iemOp_setnbe_Eb)
-{
-    IEMOP_MNEMONIC("setnbe Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x98. */
-FNIEMOP_DEF(iemOp_sets_Eb)
-{
-    IEMOP_MNEMONIC("sets Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x99. */
-FNIEMOP_DEF(iemOp_setns_Eb)
-{
-    IEMOP_MNEMONIC("setns Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9a. */
-FNIEMOP_DEF(iemOp_setp_Eb)
-{
-    IEMOP_MNEMONIC("setnp Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9b. */
-FNIEMOP_DEF(iemOp_setnp_Eb)
-{
-    IEMOP_MNEMONIC("setnp Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9c. */
-FNIEMOP_DEF(iemOp_setl_Eb)
-{
-    IEMOP_MNEMONIC("setl Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9d. */
-FNIEMOP_DEF(iemOp_setnl_Eb)
-{
-    IEMOP_MNEMONIC("setnl Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9e. */
-FNIEMOP_DEF(iemOp_setle_Eb)
-{
-    IEMOP_MNEMONIC("setle Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0x9f. */
-FNIEMOP_DEF(iemOp_setnle_Eb)
-{
-    IEMOP_MNEMONIC("setnle Eb");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
-     *        any way. AMD says it's "unused", whatever that means.  We're
-     *        ignoring for now. */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register target */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_BEGIN(0, 0);
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /* memory target */
-        IEM_MC_BEGIN(0, 1);
-        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        } IEM_MC_ELSE() {
-            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
-        } IEM_MC_ENDIF();
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Common 'push segment-register' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonPushSReg, uint8_t, iReg)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (iReg < X86_SREG_FS)
-        IEMOP_HLP_NO_64BIT();
-    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint16_t, u16Value);
-            IEM_MC_FETCH_SREG_U16(u16Value, iReg);
-            IEM_MC_PUSH_U16(u16Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_FETCH_SREG_ZX_U32(u32Value, iReg);
-            IEM_MC_PUSH_U32_SREG(u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_FETCH_SREG_ZX_U64(u64Value, iReg);
-            IEM_MC_PUSH_U64(u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            break;
-    }
-
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xa0. */
-FNIEMOP_DEF(iemOp_push_fs)
-{
-    IEMOP_MNEMONIC("push fs");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_FS);
-}
-
-
-/** Opcode 0x0f 0xa1. */
-FNIEMOP_DEF(iemOp_pop_fs)
-{
-    IEMOP_MNEMONIC("pop fs");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_FS, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x0f 0xa2. */
-FNIEMOP_DEF(iemOp_cpuid)
-{
-    IEMOP_MNEMONIC("cpuid");
-    IEMOP_HLP_MIN_486(); /* not all 486es. */
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_cpuid);
-}
-
-
-/**
- * Common worker for iemOp_bt_Ev_Gv, iemOp_btc_Ev_Gv, iemOp_btr_Ev_Gv and
- * iemOp_bts_Ev_Gv.
- */
-FNIEMOP_DEF_1(iemOpCommonBit_Ev_Gv, PCIEMOPBINSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register destination. */
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,        u16Src,                 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_AND_LOCAL_U16(u16Src, 0xf);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,        u32Src,                 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_AND_LOCAL_U32(u32Src, 0x1f);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,        u64Src,                 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
-
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_AND_LOCAL_U64(u64Src, 0x3f);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory destination. */
-
-        uint32_t fAccess;
-        if (pImpl->pfnLockedU16)
-            fAccess = IEM_ACCESS_DATA_RW;
-        else /* BT */
-            fAccess = IEM_ACCESS_DATA_R;
-
-        NOREF(fAccess);
-
-        /** @todo test negative bit offsets! */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,                u16Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-                IEM_MC_LOCAL(int16_t,               i16AddrAdj);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_ASSIGN(i16AddrAdj, u16Src);
-                IEM_MC_AND_ARG_U16(u16Src, 0x0f);
-                IEM_MC_SAR_LOCAL_S16(i16AddrAdj, 4);
-                IEM_MC_SHL_LOCAL_S16(i16AddrAdj, 1);
-                IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(GCPtrEffDst, i16AddrAdj);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,                u32Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-                IEM_MC_LOCAL(int32_t,               i32AddrAdj);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_ASSIGN(i32AddrAdj, u32Src);
-                IEM_MC_AND_ARG_U32(u32Src, 0x1f);
-                IEM_MC_SAR_LOCAL_S32(i32AddrAdj, 5);
-                IEM_MC_SHL_LOCAL_S32(i32AddrAdj, 2);
-                IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(GCPtrEffDst, i32AddrAdj);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 2);
-                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,                u64Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-                IEM_MC_LOCAL(int64_t,               i64AddrAdj);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_ASSIGN(i64AddrAdj, u64Src);
-                IEM_MC_AND_ARG_U64(u64Src, 0x3f);
-                IEM_MC_SAR_LOCAL_S64(i64AddrAdj, 6);
-                IEM_MC_SHL_LOCAL_S64(i64AddrAdj, 3);
-                IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(GCPtrEffDst, i64AddrAdj);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x0f 0xa3. */
-FNIEMOP_DEF(iemOp_bt_Ev_Gv)
-{
-    IEMOP_MNEMONIC("bt  Gv,Gv");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_bt);
-}
-
-
-/**
- * Common worker for iemOp_shrd_Ev_Gv_Ib and iemOp_shld_Ev_Gv_Ib.
- */
-FNIEMOP_DEF_1(iemOpCommonShldShrd_Ib, PCIEMOPSHIFTDBLSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF | X86_EFL_OF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,        u16Src,                 1);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,        u32Src,                 1);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,        u64Src,                 1);
-                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,                u16Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,                u32Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,                u64Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
-                IEM_MC_ASSIGN(cShiftArg, cShift);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/**
- * Common worker for iemOp_shrd_Ev_Gv_CL and iemOp_shld_Ev_Gv_CL.
- */
-FNIEMOP_DEF_1(iemOpCommonShldShrd_CL, PCIEMOPSHIFTDBLSIZES, pImpl)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF | X86_EFL_OF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,        u16Src,                 1);
-                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,        u32Src,                 1);
-                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,        u64Src,                 1);
-                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,                u16Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,                u32Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 2);
-                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,                u64Src,                 1);
-                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-
-/** Opcode 0x0f 0xa4. */
-FNIEMOP_DEF(iemOp_shld_Ev_Gv_Ib)
-{
-    IEMOP_MNEMONIC("shld Ev,Gv,Ib");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, &g_iemAImpl_shld);
-}
-
-
-/** Opcode 0x0f 0xa5. */
-FNIEMOP_DEF(iemOp_shld_Ev_Gv_CL)
-{
-    IEMOP_MNEMONIC("shld Ev,Gv,CL");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, &g_iemAImpl_shld);
-}
-
-
-/** Opcode 0x0f 0xa8. */
-FNIEMOP_DEF(iemOp_push_gs)
-{
-    IEMOP_MNEMONIC("push gs");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_GS);
-}
-
-
-/** Opcode 0x0f 0xa9. */
-FNIEMOP_DEF(iemOp_pop_gs)
-{
-    IEMOP_MNEMONIC("pop gs");
-    IEMOP_HLP_MIN_386();
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_GS, pVCpu->iem.s.enmEffOpSize);
-}
-
-
-/** Opcode 0x0f 0xaa. */
-FNIEMOP_STUB(iemOp_rsm);
-//IEMOP_HLP_MIN_386();
-
-
-/** Opcode 0x0f 0xab. */
-FNIEMOP_DEF(iemOp_bts_Ev_Gv)
-{
-    IEMOP_MNEMONIC("bts Ev,Gv");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_bts);
-}
-
-
-/** Opcode 0x0f 0xac. */
-FNIEMOP_DEF(iemOp_shrd_Ev_Gv_Ib)
-{
-    IEMOP_MNEMONIC("shrd Ev,Gv,Ib");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, &g_iemAImpl_shrd);
-}
-
-
-/** Opcode 0x0f 0xad. */
-FNIEMOP_DEF(iemOp_shrd_Ev_Gv_CL)
-{
-    IEMOP_MNEMONIC("shrd Ev,Gv,CL");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, &g_iemAImpl_shrd);
-}
-
-
-/** Opcode 0x0f 0xae mem/0. */
-FNIEMOP_DEF_1(iemOp_Grp15_fxsave,   uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fxsave m512");
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fFxSaveRstor)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
-    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_fxsave, iEffSeg, GCPtrEff, enmEffOpSize);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xae mem/1. */
-FNIEMOP_DEF_1(iemOp_Grp15_fxrstor,  uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("fxrstor m512");
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fFxSaveRstor)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
-    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
-    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
-    IEM_MC_CALL_CIMPL_3(iemCImpl_fxrstor, iEffSeg, GCPtrEff, enmEffOpSize);
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xae mem/2. */
-FNIEMOP_STUB_1(iemOp_Grp15_ldmxcsr,  uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/3. */
-FNIEMOP_STUB_1(iemOp_Grp15_stmxcsr,  uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/4. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_xsave,    uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/5. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_xrstor,   uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_xsaveopt, uint8_t, bRm);
-
-/** Opcode 0x0f 0xae mem/7. */
-FNIEMOP_STUB_1(iemOp_Grp15_clflush,  uint8_t, bRm);
-
-
-/** Opcode 0x0f 0xae 11b/5. */
-FNIEMOP_DEF_1(iemOp_Grp15_lfence,   uint8_t, bRm)
-{
-    RT_NOREF_PV(bRm);
-    IEMOP_MNEMONIC("lfence");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(0, 0);
-    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_lfence);
-    else
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xae 11b/6. */
-FNIEMOP_DEF_1(iemOp_Grp15_mfence,   uint8_t, bRm)
-{
-    RT_NOREF_PV(bRm);
-    IEMOP_MNEMONIC("mfence");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(0, 0);
-    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_mfence);
-    else
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xae 11b/7. */
-FNIEMOP_DEF_1(iemOp_Grp15_sfence,   uint8_t, bRm)
-{
-    RT_NOREF_PV(bRm);
-    IEMOP_MNEMONIC("sfence");
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    IEM_MC_BEGIN(0, 0);
-    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_sfence);
-    else
-        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
-    IEM_MC_ADVANCE_RIP();
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0xf3 0x0f 0xae 11b/0. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_rdfsbase, uint8_t, bRm);
-
-/** Opcode 0xf3 0x0f 0xae 11b/1. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_rdgsbase, uint8_t, bRm);
-
-/** Opcode 0xf3 0x0f 0xae 11b/2. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_wrfsbase, uint8_t, bRm);
-
-/** Opcode 0xf3 0x0f 0xae 11b/3. */
-FNIEMOP_UD_STUB_1(iemOp_Grp15_wrgsbase, uint8_t, bRm);
-
-
-/** Opcode 0x0f 0xae. */
-FNIEMOP_DEF(iemOp_Grp15)
-{
-    IEMOP_HLP_MIN_586(); /* Not entirely accurate nor needed, but useful for debugging 286 code. */
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-        {
-            case 0: return FNIEMOP_CALL_1(iemOp_Grp15_fxsave,  bRm);
-            case 1: return FNIEMOP_CALL_1(iemOp_Grp15_fxrstor, bRm);
-            case 2: return FNIEMOP_CALL_1(iemOp_Grp15_ldmxcsr, bRm);
-            case 3: return FNIEMOP_CALL_1(iemOp_Grp15_stmxcsr, bRm);
-            case 4: return FNIEMOP_CALL_1(iemOp_Grp15_xsave,   bRm);
-            case 5: return FNIEMOP_CALL_1(iemOp_Grp15_xrstor,  bRm);
-            case 6: return FNIEMOP_CALL_1(iemOp_Grp15_xsaveopt,bRm);
-            case 7: return FNIEMOP_CALL_1(iemOp_Grp15_clflush, bRm);
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ | IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_LOCK))
-        {
-            case 0:
-                switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-                {
-                    case 0: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 1: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 2: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 3: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 4: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 5: return FNIEMOP_CALL_1(iemOp_Grp15_lfence, bRm);
-                    case 6: return FNIEMOP_CALL_1(iemOp_Grp15_mfence, bRm);
-                    case 7: return FNIEMOP_CALL_1(iemOp_Grp15_sfence, bRm);
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-
-            case IEM_OP_PRF_REPZ:
-                switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-                {
-                    case 0: return FNIEMOP_CALL_1(iemOp_Grp15_rdfsbase, bRm);
-                    case 1: return FNIEMOP_CALL_1(iemOp_Grp15_rdgsbase, bRm);
-                    case 2: return FNIEMOP_CALL_1(iemOp_Grp15_wrfsbase, bRm);
-                    case 3: return FNIEMOP_CALL_1(iemOp_Grp15_wrgsbase, bRm);
-                    case 4: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 5: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 6: return IEMOP_RAISE_INVALID_OPCODE();
-                    case 7: return IEMOP_RAISE_INVALID_OPCODE();
-                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
-                }
-                break;
-
-            default:
-                return IEMOP_RAISE_INVALID_OPCODE();
-        }
-    }
-}
-
-
-/** Opcode 0x0f 0xaf. */
-FNIEMOP_DEF(iemOp_imul_Gv_Ev)
-{
-    IEMOP_MNEMONIC("imul Gv,Ev");
-    IEMOP_HLP_MIN_386();
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_imul_two);
-}
-
-
-/** Opcode 0x0f 0xb0. */
-FNIEMOP_DEF(iemOp_cmpxchg_Eb_Gb)
-{
-    IEMOP_MNEMONIC("cmpxchg Eb,Gb");
-    IEMOP_HLP_MIN_486();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING();
-        IEM_MC_BEGIN(4, 0);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
-        IEM_MC_ARG(uint8_t *,       pu8Al,                  1);
-        IEM_MC_ARG(uint8_t,         u8Src,                  2);
-        IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_GREG_U8(pu8Al, X86_GREG_xAX);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8, pu8Dst, pu8Al, u8Src, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8_locked, pu8Dst, pu8Al, u8Src, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        IEM_MC_BEGIN(4, 3);
-        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
-        IEM_MC_ARG(uint8_t *,       pu8Al,                  1);
-        IEM_MC_ARG(uint8_t,         u8Src,                  2);
-        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
-        IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
-        IEM_MC_LOCAL(uint8_t,       u8Al);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEMOP_HLP_DONE_DECODING();
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_FETCH_GREG_U8(u8Al, X86_GREG_xAX);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        IEM_MC_REF_LOCAL(pu8Al, u8Al);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8, pu8Dst, pu8Al, u8Src, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8_locked, pu8Dst, pu8Al, u8Src, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_STORE_GREG_U8(X86_GREG_xAX, u8Al);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    return VINF_SUCCESS;
-}
-
-/** Opcode 0x0f 0xb1. */
-FNIEMOP_DEF(iemOp_cmpxchg_Ev_Gv)
-{
-    IEMOP_MNEMONIC("cmpxchg Ev,Gv");
-    IEMOP_HLP_MIN_486();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t *,      pu16Ax,                 1);
-                IEM_MC_ARG(uint16_t,        u16Src,                 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U16(pu16Ax, X86_GREG_xAX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16, pu16Dst, pu16Ax, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16_locked, pu16Dst, pu16Ax, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t *,      pu32Eax,                1);
-                IEM_MC_ARG(uint32_t,        u32Src,                 2);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U32(pu32Eax, X86_GREG_xAX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32, pu32Dst, pu32Eax, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32_locked, pu32Dst, pu32Eax, u32Src, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Eax);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t *,      pu64Rax,                1);
-#ifdef RT_ARCH_X86
-                IEM_MC_ARG(uint64_t *,      pu64Src,                2);
-#else
-                IEM_MC_ARG(uint64_t,        u64Src,                 2);
-#endif
-                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U64(pu64Rax, X86_GREG_xAX);
-                IEM_MC_REF_EFLAGS(pEFlags);
-#ifdef RT_ARCH_X86
-                IEM_MC_REF_GREG_U64(pu64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, pu64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, pu64Src, pEFlags);
-#else
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, u64Src, pEFlags);
-#endif
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(4, 3);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
-                IEM_MC_ARG(uint16_t *,      pu16Ax,                 1);
-                IEM_MC_ARG(uint16_t,        u16Src,                 2);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
-                IEM_MC_LOCAL(uint16_t,      u16Ax);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING();
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U16(u16Ax, X86_GREG_xAX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_REF_LOCAL(pu16Ax, u16Ax);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16, pu16Dst, pu16Ax, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16_locked, pu16Dst, pu16Ax, u16Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U16(X86_GREG_xAX, u16Ax);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(4, 3);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
-                IEM_MC_ARG(uint32_t *,      pu32Eax,                 1);
-                IEM_MC_ARG(uint32_t,        u32Src,                 2);
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
-                IEM_MC_LOCAL(uint32_t,      u32Eax);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING();
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_FETCH_GREG_U32(u32Eax, X86_GREG_xAX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_REF_LOCAL(pu32Eax, u32Eax);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32, pu32Dst, pu32Eax, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32_locked, pu32Dst, pu32Eax, u32Src, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u32Eax);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(4, 3);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
-                IEM_MC_ARG(uint64_t *,      pu64Rax,                1);
-#ifdef RT_ARCH_X86
-                IEM_MC_ARG(uint64_t *,      pu64Src,                2);
-#else
-                IEM_MC_ARG(uint64_t,        u64Src,                 2);
-#endif
-                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
-                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
-                IEM_MC_LOCAL(uint64_t,      u64Rax);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING();
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                IEM_MC_FETCH_GREG_U64(u64Rax, X86_GREG_xAX);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_REF_LOCAL(pu64Rax, u64Rax);
-#ifdef RT_ARCH_X86
-                IEM_MC_REF_GREG_U64(pu64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, pu64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, pu64Src, pEFlags);
-#else
-                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, u64Src, pEFlags);
-#endif
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U64(X86_GREG_xAX, u64Rax);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-FNIEMOP_DEF_2(iemOpCommonLoadSRegAndGreg, uint8_t, iSegReg, uint8_t, bRm)
-{
-    Assert((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT)); /* Caller checks this */
-    uint8_t const iGReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
-
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(5, 1);
-            IEM_MC_ARG(uint16_t,        uSel,                                    0);
-            IEM_MC_ARG(uint16_t,        offSeg,                                  1);
-            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
-            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
-            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
-            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
-            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 2);
-            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(5, 1);
-            IEM_MC_ARG(uint16_t,        uSel,                                    0);
-            IEM_MC_ARG(uint32_t,        offSeg,                                  1);
-            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
-            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
-            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
-            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U32(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
-            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 4);
-            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(5, 1);
-            IEM_MC_ARG(uint16_t,        uSel,                                    0);
-            IEM_MC_ARG(uint64_t,        offSeg,                                  1);
-            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
-            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
-            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
-            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            if (IEM_IS_GUEST_CPU_AMD(pVCpu)) /** @todo testcase: rev 3.15 of the amd manuals claims it only loads a 32-bit greg. */
-                IEM_MC_FETCH_MEM_U32_SX_U64(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
-            else
-                IEM_MC_FETCH_MEM_U64(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
-            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 8);
-            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x0f 0xb2. */
-FNIEMOP_DEF(iemOp_lss_Gv_Mp)
-{
-    IEMOP_MNEMONIC("lss Gv,Mp");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_SS, bRm);
-}
-
-
-/** Opcode 0x0f 0xb3. */
-FNIEMOP_DEF(iemOp_btr_Ev_Gv)
-{
-    IEMOP_MNEMONIC("btr Ev,Gv");
-    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_btr);
-}
-
-
-/** Opcode 0x0f 0xb4. */
-FNIEMOP_DEF(iemOp_lfs_Gv_Mp)
-{
-    IEMOP_MNEMONIC("lfs Gv,Mp");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_FS, bRm);
-}
-
-
-/** Opcode 0x0f 0xb5. */
-FNIEMOP_DEF(iemOp_lgs_Gv_Mp)
-{
-    IEMOP_MNEMONIC("lgs Gv,Mp");
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_GS, bRm);
-}
-
-
-/** Opcode 0x0f 0xb6. */
-FNIEMOP_DEF(iemOp_movzx_Gv_Eb)
-{
-    IEMOP_MNEMONIC("movzx Gv,Eb");
-    IEMOP_HLP_MIN_386();
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_FETCH_GREG_U8_ZX_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_FETCH_GREG_U8_ZX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_FETCH_GREG_U8_ZX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_ZX_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_ZX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_ZX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x0f 0xb7. */
-FNIEMOP_DEF(iemOp_movzx_Gv_Ew)
-{
-    IEMOP_MNEMONIC("movzx Gv,Ew");
-    IEMOP_HLP_MIN_386();
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Not entirely sure how the operand size prefix is handled here,
-     *        assuming that it will be ignored. Would be nice to have a few
-     *        test for this. */
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        {
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_FETCH_GREG_U16_ZX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_FETCH_GREG_U16_ZX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        {
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16_ZX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16_ZX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xb8. */
-FNIEMOP_STUB(iemOp_popcnt_Gv_Ev_jmpe);
-
-
-/** Opcode 0x0f 0xb9. */
-FNIEMOP_DEF(iemOp_Grp10)
-{
-    Log(("iemOp_Grp10 -> #UD\n"));
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0xba. */
-FNIEMOP_DEF(iemOp_Grp8)
-{
-    IEMOP_HLP_MIN_386();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    PCIEMOPBINSIZES pImpl;
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 1: case 2: case 3:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 4: pImpl = &g_iemAImpl_bt;  IEMOP_MNEMONIC("bt  Ev,Ib"); break;
-        case 5: pImpl = &g_iemAImpl_bts; IEMOP_MNEMONIC("bts Ev,Ib"); break;
-        case 6: pImpl = &g_iemAImpl_btr; IEMOP_MNEMONIC("btr Ev,Ib"); break;
-        case 7: pImpl = &g_iemAImpl_btc; IEMOP_MNEMONIC("btc Ev,Ib"); break;
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
-
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /* register destination. */
-        uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
-                IEM_MC_ARG_CONST(uint16_t,  u16Src, /*=*/ u8Bit & 0x0f, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
-                IEM_MC_ARG_CONST(uint32_t,  u32Src, /*=*/ u8Bit & 0x1f, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
-                IEM_MC_ARG_CONST(uint64_t,  u64Src, /*=*/ u8Bit & 0x3f, 1);
-                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /* memory destination. */
-
-        uint32_t fAccess;
-        if (pImpl->pfnLockedU16)
-            fAccess = IEM_ACCESS_DATA_RW;
-        else /* BT */
-            fAccess = IEM_ACCESS_DATA_R;
-
-        /** @todo test negative bit offsets! */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
-                IEM_MC_ARG(uint16_t,                u16Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
-                IEM_MC_ASSIGN(u16Src, u8Bit & 0x0f);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu16Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
-                IEM_MC_ARG(uint32_t,                u32Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
-                IEM_MC_ASSIGN(u32Src, u8Bit & 0x1f);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu32Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 1);
-                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
-                IEM_MC_ARG(uint64_t,                u64Src,                 1);
-                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
-                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
-                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
-                IEM_MC_ASSIGN(u64Src, u8Bit & 0x3f);
-                if (pImpl->pfnLockedU16)
-                    IEMOP_HLP_DONE_DECODING();
-                else
-                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                IEM_MC_MEM_MAP(pu64Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
-
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-
-}
-
-
-/** Opcode 0x0f 0xbb. */
-FNIEMOP_DEF(iemOp_btc_Ev_Gv)
-{
-    IEMOP_MNEMONIC("btc Ev,Gv");
-    IEMOP_HLP_MIN_386();
-    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_btc);
-}
-
-
-/** Opcode 0x0f 0xbc. */
-FNIEMOP_DEF(iemOp_bsf_Gv_Ev)
-{
-    IEMOP_MNEMONIC("bsf Gv,Ev");
-    IEMOP_HLP_MIN_386();
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_bsf);
-}
-
-
-/** Opcode 0x0f 0xbd. */
-FNIEMOP_DEF(iemOp_bsr_Gv_Ev)
-{
-    IEMOP_MNEMONIC("bsr Gv,Ev");
-    IEMOP_HLP_MIN_386();
-    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF);
-    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_bsr);
-}
-
-
-/** Opcode 0x0f 0xbe. */
-FNIEMOP_DEF(iemOp_movsx_Gv_Eb)
-{
-    IEMOP_MNEMONIC("movsx Gv,Eb");
-    IEMOP_HLP_MIN_386();
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_FETCH_GREG_U8_SX_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_FETCH_GREG_U8_SX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 1);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_FETCH_GREG_U8_SX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint16_t, u16Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_SX_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_SX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_FETCH_MEM_U8_SX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-
-/** Opcode 0x0f 0xbf. */
-FNIEMOP_DEF(iemOp_movsx_Gv_Ew)
-{
-    IEMOP_MNEMONIC("movsx Gv,Ew");
-    IEMOP_HLP_MIN_386();
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /** @todo Not entirely sure how the operand size prefix is handled here,
-     *        assuming that it will be ignored. Would be nice to have a few
-     *        test for this. */
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        {
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_FETCH_GREG_U16_SX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_FETCH_GREG_U16_SX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    else
-    {
-        /*
-         * We're loading a register from memory.
-         */
-        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
-        {
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16_SX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-        else
-        {
-            IEM_MC_BEGIN(0, 2);
-            IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-            IEM_MC_FETCH_MEM_U16_SX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
-            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-        }
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xc0. */
-FNIEMOP_DEF(iemOp_xadd_Eb_Gb)
-{
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_HLP_MIN_486();
-    IEMOP_MNEMONIC("xadd Eb,Gb");
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        IEM_MC_BEGIN(3, 0);
-        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
-        IEM_MC_ARG(uint8_t *,  pu8Reg,  1);
-        IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-        IEM_MC_REF_GREG_U8(pu8Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_EFLAGS(pEFlags);
-        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8, pu8Dst, pu8Reg, pEFlags);
-
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         */
-        IEM_MC_BEGIN(3, 3);
-        IEM_MC_ARG(uint8_t *,   pu8Dst,          0);
-        IEM_MC_ARG(uint8_t *,   pu8Reg,          1);
-        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-        IEM_MC_LOCAL(uint8_t,  u8RegCopy);
-        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
-
-        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-        IEM_MC_FETCH_GREG_U8(u8RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-        IEM_MC_REF_LOCAL(pu8Reg, u8RegCopy);
-        IEM_MC_FETCH_EFLAGS(EFlags);
-        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-            IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8, pu8Dst, pu8Reg, pEFlags);
-        else
-            IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8_locked, pu8Dst, pu8Reg, pEFlags);
-
-        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
-        IEM_MC_COMMIT_EFLAGS(EFlags);
-        IEM_MC_STORE_GREG_U8(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u8RegCopy);
-        IEM_MC_ADVANCE_RIP();
-        IEM_MC_END();
-        return VINF_SUCCESS;
-    }
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xc1. */
-FNIEMOP_DEF(iemOp_xadd_Ev_Gv)
-{
-    IEMOP_MNEMONIC("xadd Ev,Gv");
-    IEMOP_HLP_MIN_486();
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /*
-     * If rm is denoting a register, no more instruction bytes.
-     */
-    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint16_t *, pu16Dst,  0);
-                IEM_MC_ARG(uint16_t *, pu16Reg,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U16(pu16Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16, pu16Dst, pu16Reg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint32_t *, pu32Dst,  0);
-                IEM_MC_ARG(uint32_t *, pu32Reg,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U32(pu32Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32, pu32Dst, pu32Reg, pEFlags);
-
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Reg);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 0);
-                IEM_MC_ARG(uint64_t *, pu64Dst,  0);
-                IEM_MC_ARG(uint64_t *, pu64Reg,  1);
-                IEM_MC_ARG(uint32_t *, pEFlags, 2);
-
-                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-                IEM_MC_REF_GREG_U64(pu64Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_EFLAGS(pEFlags);
-                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64, pu64Dst, pu64Reg, pEFlags);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-    else
-    {
-        /*
-         * We're accessing memory.
-         */
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_16BIT:
-                IEM_MC_BEGIN(3, 3);
-                IEM_MC_ARG(uint16_t *,  pu16Dst,         0);
-                IEM_MC_ARG(uint16_t *,  pu16Reg,         1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(uint16_t,  u16RegCopy);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U16(u16RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_LOCAL(pu16Reg, u16RegCopy);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16, pu16Dst, pu16Reg, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16_locked, pu16Dst, pu16Reg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16RegCopy);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(3, 3);
-                IEM_MC_ARG(uint32_t *,  pu32Dst,         0);
-                IEM_MC_ARG(uint32_t *,  pu32Reg,         1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(uint32_t,  u32RegCopy);
-                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U32(u32RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_LOCAL(pu32Reg, u32RegCopy);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32, pu32Dst, pu32Reg, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32_locked, pu32Dst, pu32Reg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32RegCopy);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(3, 3);
-                IEM_MC_ARG(uint64_t *,  pu64Dst,         0);
-                IEM_MC_ARG(uint64_t *,  pu64Reg,         1);
-                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
-                IEM_MC_LOCAL(uint64_t,  u64RegCopy);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-                IEM_MC_FETCH_GREG_U64(u64RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_REF_LOCAL(pu64Reg, u64RegCopy);
-                IEM_MC_FETCH_EFLAGS(EFlags);
-                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64, pu64Dst, pu64Reg, pEFlags);
-                else
-                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64_locked, pu64Dst, pu64Reg, pEFlags);
-
-                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
-                IEM_MC_COMMIT_EFLAGS(EFlags);
-                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64RegCopy);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                return VINF_SUCCESS;
-
-            IEM_NOT_REACHED_DEFAULT_CASE_RET();
-        }
-    }
-}
-
-/** Opcode 0x0f 0xc2. */
-FNIEMOP_STUB(iemOp_cmpps_Vps_Wps_Ib__cmppd_Vpd_Wpd_Ib__cmpss_Vss_Wss_Ib__cmpsd_Vsd_Wsd_Ib);
-
-
-/** Opcode 0x0f 0xc3. */
-FNIEMOP_DEF(iemOp_movnti_My_Gy)
-{
-    IEMOP_MNEMONIC("movnti My,Gy");
-
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-
-    /* Only the register -> memory form makes sense, assuming #UD for the other form. */
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        switch (pVCpu->iem.s.enmEffOpSize)
-        {
-            case IEMMODE_32BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-                    return IEMOP_RAISE_INVALID_OPCODE();
-
-                IEM_MC_FETCH_GREG_U32(u32Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u32Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_64BIT:
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
-                    return IEMOP_RAISE_INVALID_OPCODE();
-
-                IEM_MC_FETCH_GREG_U64(u64Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u64Value);
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case IEMMODE_16BIT:
-                /** @todo check this form.   */
-                return IEMOP_RAISE_INVALID_OPCODE();
-        }
-    }
-    else
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode 0x0f 0xc4. */
-FNIEMOP_STUB(iemOp_pinsrw_Pq_Ry_Mw_Ib__pinsrw_Vdq_Ry_Mw_Ib);
-
-/** Opcode 0x0f 0xc5. */
-FNIEMOP_STUB(iemOp_pextrw_Gd_Nq_Ib__pextrw_Gd_Udq_Ib);
-
-/** Opcode 0x0f 0xc6. */
-FNIEMOP_STUB(iemOp_shufps_Vps_Wps_Ib__shufdp_Vpd_Wpd_Ib);
-
-
-/** Opcode 0x0f 0xc7 !11/1. */
-FNIEMOP_DEF_1(iemOp_Grp9_cmpxchg8b_Mq, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("cmpxchg8b Mq");
-
-    IEM_MC_BEGIN(4, 3);
-    IEM_MC_ARG(uint64_t *, pu64MemDst,     0);
-    IEM_MC_ARG(PRTUINT64U, pu64EaxEdx,     1);
-    IEM_MC_ARG(PRTUINT64U, pu64EbxEcx,     2);
-    IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 3);
-    IEM_MC_LOCAL(RTUINT64U, u64EaxEdx);
-    IEM_MC_LOCAL(RTUINT64U, u64EbxEcx);
-    IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
-
-    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
-    IEMOP_HLP_DONE_DECODING();
-    IEM_MC_MEM_MAP(pu64MemDst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
-
-    IEM_MC_FETCH_GREG_U32(u64EaxEdx.s.Lo, X86_GREG_xAX);
-    IEM_MC_FETCH_GREG_U32(u64EaxEdx.s.Hi, X86_GREG_xDX);
-    IEM_MC_REF_LOCAL(pu64EaxEdx, u64EaxEdx);
-
-    IEM_MC_FETCH_GREG_U32(u64EbxEcx.s.Lo, X86_GREG_xBX);
-    IEM_MC_FETCH_GREG_U32(u64EbxEcx.s.Hi, X86_GREG_xCX);
-    IEM_MC_REF_LOCAL(pu64EbxEcx, u64EbxEcx);
-
-    IEM_MC_FETCH_EFLAGS(EFlags);
-    if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
-        IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg8b, pu64MemDst, pu64EaxEdx, pu64EbxEcx, pEFlags);
-    else
-        IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg8b_locked, pu64MemDst, pu64EaxEdx, pu64EbxEcx, pEFlags);
-
-    IEM_MC_MEM_COMMIT_AND_UNMAP(pu64MemDst, IEM_ACCESS_DATA_RW);
-    IEM_MC_COMMIT_EFLAGS(EFlags);
-    IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_ZF)
-        /** @todo Testcase: Check effect of cmpxchg8b on bits 63:32 in rax and rdx. */
-        IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u64EaxEdx.s.Lo);
-        IEM_MC_STORE_GREG_U32(X86_GREG_xDX, u64EaxEdx.s.Hi);
-    IEM_MC_ENDIF();
-    IEM_MC_ADVANCE_RIP();
-
-    IEM_MC_END();
-    return VINF_SUCCESS;
-}
-
-
-/** Opcode REX.W 0x0f 0xc7 !11/1. */
-FNIEMOP_DEF_1(iemOp_Grp9_cmpxchg16b_Mdq, uint8_t, bRm)
-{
-    IEMOP_MNEMONIC("cmpxchg16b Mdq");
-    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCmpXchg16b)
-    {
-        RT_NOREF(bRm);
-        IEMOP_BITCH_ABOUT_STUB();
-        return VERR_IEM_INSTR_NOT_IMPLEMENTED;
-    }
-    Log(("cmpxchg16b -> #UD\n"));
-    return IEMOP_RAISE_INVALID_OPCODE();
-}
-
-
-/** Opcode 0x0f 0xc7 11/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_rdrand_Rv, uint8_t, bRm);
-
-/** Opcode 0x0f 0xc7 !11/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_vmptrld_Mq, uint8_t, bRm);
-
-/** Opcode 0x66 0x0f 0xc7 !11/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_vmclear_Mq, uint8_t, bRm);
-
-/** Opcode 0xf3 0x0f 0xc7 !11/6. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_vmxon_Mq, uint8_t, bRm);
-
-/** Opcode [0xf3] 0x0f 0xc7 !11/7. */
-FNIEMOP_UD_STUB_1(iemOp_Grp9_vmptrst_Mq, uint8_t, bRm);
-
-
-/** Opcode 0x0f 0xc7. */
-FNIEMOP_DEF(iemOp_Grp9)
-{
-    /** @todo Testcase: Check mixing 0x66 and 0xf3. Check the effect of 0xf2. */
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
-    {
-        case 0: case 2: case 3: case 4: case 5:
-            return IEMOP_RAISE_INVALID_OPCODE();
-        case 1:
-            /** @todo Testcase: Check prefix effects on cmpxchg8b/16b. */
-            if (   (bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT)
-                || (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ))) /** @todo Testcase: AMD seems to express a different idea here wrt prefixes. */
-                return IEMOP_RAISE_INVALID_OPCODE();
-            if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
-                return FNIEMOP_CALL_1(iemOp_Grp9_cmpxchg16b_Mdq, bRm);
-            return FNIEMOP_CALL_1(iemOp_Grp9_cmpxchg8b_Mq, bRm);
-        case 6:
-            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-                return FNIEMOP_CALL_1(iemOp_Grp9_rdrand_Rv, bRm);
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ))
-            {
-                case 0:
-                    return FNIEMOP_CALL_1(iemOp_Grp9_vmptrld_Mq, bRm);
-                case IEM_OP_PRF_SIZE_OP:
-                    return FNIEMOP_CALL_1(iemOp_Grp9_vmclear_Mq, bRm);
-                case IEM_OP_PRF_REPZ:
-                    return FNIEMOP_CALL_1(iemOp_Grp9_vmxon_Mq, bRm);
-                default:
-                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        case 7:
-            switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ))
-            {
-                case 0:
-                case IEM_OP_PRF_REPZ:
-                    return FNIEMOP_CALL_1(iemOp_Grp9_vmptrst_Mq, bRm);
-                default:
-                    return IEMOP_RAISE_INVALID_OPCODE();
-            }
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/**
- * Common 'bswap register' helper.
- */
-FNIEMOP_DEF_1(iemOpCommonBswapGReg, uint8_t, iReg)
-{
-    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-    switch (pVCpu->iem.s.enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(1, 0);
-            IEM_MC_ARG(uint32_t *,  pu32Dst, 0);
-            IEM_MC_REF_GREG_U32(pu32Dst, iReg);     /* Don't clear the high dword! */
-            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u16, pu32Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(1, 0);
-            IEM_MC_ARG(uint32_t *,  pu32Dst, 0);
-            IEM_MC_REF_GREG_U32(pu32Dst, iReg);
-            IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
-            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u32, pu32Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(1, 0);
-            IEM_MC_ARG(uint64_t *,  pu64Dst, 0);
-            IEM_MC_REF_GREG_U64(pu64Dst, iReg);
-            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u64, pu64Dst);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        IEM_NOT_REACHED_DEFAULT_CASE_RET();
-    }
-}
-
-
-/** Opcode 0x0f 0xc8. */
-FNIEMOP_DEF(iemOp_bswap_rAX_r8)
-{
-    IEMOP_MNEMONIC("bswap rAX/r8");
-    /* Note! Intel manuals states that R8-R15 can be accessed by using a REX.X
-             prefix.  REX.B is the correct prefix it appears.  For a parallel
-             case, see iemOp_mov_AL_Ib and iemOp_mov_eAX_Iv. */
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xAX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xc9. */
-FNIEMOP_DEF(iemOp_bswap_rCX_r9)
-{
-    IEMOP_MNEMONIC("bswap rCX/r9");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xCX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xca. */
-FNIEMOP_DEF(iemOp_bswap_rDX_r10)
-{
-    IEMOP_MNEMONIC("bswap rDX/r9");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xDX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xcb. */
-FNIEMOP_DEF(iemOp_bswap_rBX_r11)
-{
-    IEMOP_MNEMONIC("bswap rBX/r9");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xBX | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xcc. */
-FNIEMOP_DEF(iemOp_bswap_rSP_r12)
-{
-    IEMOP_MNEMONIC("bswap rSP/r12");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xSP | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xcd. */
-FNIEMOP_DEF(iemOp_bswap_rBP_r13)
-{
-    IEMOP_MNEMONIC("bswap rBP/r13");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xBP | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xce. */
-FNIEMOP_DEF(iemOp_bswap_rSI_r14)
-{
-    IEMOP_MNEMONIC("bswap rSI/r14");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xSI | pVCpu->iem.s.uRexB);
-}
-
-
-/** Opcode 0x0f 0xcf. */
-FNIEMOP_DEF(iemOp_bswap_rDI_r15)
-{
-    IEMOP_MNEMONIC("bswap rDI/r15");
-    IEMOP_HLP_MIN_486();
-    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xDI | pVCpu->iem.s.uRexB);
-}
-
-
-
-/** Opcode 0x0f 0xd0. */
-FNIEMOP_STUB(iemOp_addsubpd_Vpd_Wpd__addsubps_Vps_Wps);
-/** Opcode 0x0f 0xd1. */
-FNIEMOP_STUB(iemOp_psrlw_Pp_Qp__psrlw_Vdp_Wdq);
-/** Opcode 0x0f 0xd2. */
-FNIEMOP_STUB(iemOp_psrld_Pq_Qq__psrld_Vdq_Wdq);
-/** Opcode 0x0f 0xd3. */
-FNIEMOP_STUB(iemOp_psrlq_Pq_Qq__psrlq_Vdq_Wdq);
-/** Opcode 0x0f 0xd4. */
-FNIEMOP_STUB(iemOp_paddq_Pq_Qq__paddq_Vdq_Wdq);
-/** Opcode 0x0f 0xd5. */
-FNIEMOP_STUB(iemOp_pmulq_Pq_Qq__pmullw_Vdq_Wdq);
-/** Opcode 0x0f 0xd6. */
-FNIEMOP_STUB(iemOp_movq_Wq_Vq__movq2dq_Vdq_Nq__movdq2q_Pq_Uq);
-
-
-/** Opcode 0x0f 0xd7. */
-FNIEMOP_DEF(iemOp_pmovmskb_Gd_Nq__pmovmskb_Gd_Udq)
-{
-    /* Docs says register only. */
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT)) /** @todo test that this is registers only. */
-        return IEMOP_RAISE_INVALID_OPCODE();
-
-    /* Note! Taking the lazy approch here wrt the high 32-bits of the GREG. */
-    /** @todo testcase: Check that the instruction implicitly clears the high
-     *        bits in 64-bit mode.  The REX.W is first necessary when VLMAX > 256
-     *        and opcode modifications are made to work with the whole width (not
-     *        just 128). */
-    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-    {
-        case IEM_OP_PRF_SIZE_OP: /* SSE */
-            IEMOP_MNEMONIC("pmovmskb Gd,Nq");
-            IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_SSE | DISOPTYPE_HARMLESS);
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_ARG(uint64_t *,           pDst, 0);
-            IEM_MC_ARG(uint128_t const *,    pSrc, 1);
-            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-            IEM_MC_PREPARE_SSE_USAGE();
-            IEM_MC_REF_GREG_U64(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-            IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
-            IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_pmovmskb_u128, pDst, pSrc);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        case 0: /* MMX */
-            IEMOP_MNEMONIC("pmovmskb Gd,Udq");
-            IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_MMX | DISOPTYPE_HARMLESS);
-            IEM_MC_BEGIN(2, 0);
-            IEM_MC_ARG(uint64_t *,          pDst, 0);
-            IEM_MC_ARG(uint64_t const *,    pSrc, 1);
-            IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
-            IEM_MC_PREPARE_FPU_USAGE();
-            IEM_MC_REF_GREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-            IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
-            IEM_MC_CALL_MMX_AIMPL_2(iemAImpl_pmovmskb_u64, pDst, pSrc);
-            IEM_MC_ADVANCE_RIP();
-            IEM_MC_END();
-            return VINF_SUCCESS;
-
-        default:
-            return IEMOP_RAISE_INVALID_OPCODE();
-    }
-}
-
-
-/** Opcode 0x0f 0xd8. */
-FNIEMOP_STUB(iemOp_psubusb_Pq_Qq__psubusb_Vdq_Wdq);
-/** Opcode 0x0f 0xd9. */
-FNIEMOP_STUB(iemOp_psubusw_Pq_Qq__psubusw_Vdq_Wdq);
-/** Opcode 0x0f 0xda. */
-FNIEMOP_STUB(iemOp_pminub_Pq_Qq__pminub_Vdq_Wdq);
-/** Opcode 0x0f 0xdb. */
-FNIEMOP_STUB(iemOp_pand_Pq_Qq__pand_Vdq_Wdq);
-/** Opcode 0x0f 0xdc. */
-FNIEMOP_STUB(iemOp_paddusb_Pq_Qq__paddusb_Vdq_Wdq);
-/** Opcode 0x0f 0xdd. */
-FNIEMOP_STUB(iemOp_paddusw_Pq_Qq__paddusw_Vdq_Wdq);
-/** Opcode 0x0f 0xde. */
-FNIEMOP_STUB(iemOp_pmaxub_Pq_Qq__pamxub_Vdq_Wdq);
-/** Opcode 0x0f 0xdf. */
-FNIEMOP_STUB(iemOp_pandn_Pq_Qq__pandn_Vdq_Wdq);
-/** Opcode 0x0f 0xe0. */
-FNIEMOP_STUB(iemOp_pavgb_Pq_Qq__pavgb_Vdq_Wdq);
-/** Opcode 0x0f 0xe1. */
-FNIEMOP_STUB(iemOp_psraw_Pq_Qq__psraw_Vdq_Wdq);
-/** Opcode 0x0f 0xe2. */
-FNIEMOP_STUB(iemOp_psrad_Pq_Qq__psrad_Vdq_Wdq);
-/** Opcode 0x0f 0xe3. */
-FNIEMOP_STUB(iemOp_pavgw_Pq_Qq__pavgw_Vdq_Wdq);
-/** Opcode 0x0f 0xe4. */
-FNIEMOP_STUB(iemOp_pmulhuw_Pq_Qq__pmulhuw_Vdq_Wdq);
-/** Opcode 0x0f 0xe5. */
-FNIEMOP_STUB(iemOp_pmulhw_Pq_Qq__pmulhw_Vdq_Wdq);
-/** Opcode 0x0f 0xe6. */
-FNIEMOP_STUB(iemOp_cvttpd2dq_Vdq_Wdp__cvtdq2pd_Vdq_Wpd__cvtpd2dq_Vdq_Wpd);
-
-
-/** Opcode 0x0f 0xe7. */
-FNIEMOP_DEF(iemOp_movntq_Mq_Pq__movntdq_Mdq_Vdq)
-{
-    IEMOP_MNEMONIC(!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_OP) ? "movntq mr,r" : "movntdq mr,r");
-    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
-    {
-        /*
-         * Register, memory.
-         */
-/** @todo check when the REPNZ/Z bits kick in. Same as lock, probably... */
-        switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
-        {
-
-            case IEM_OP_PRF_SIZE_OP: /* SSE */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint128_t,                 uSrc);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
-
-                IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
-                IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
-
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            case 0: /* MMX */
-                IEM_MC_BEGIN(0, 2);
-                IEM_MC_LOCAL(uint64_t,                  uSrc);
-                IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
-
-                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
-                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
-                IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
-                IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
-
-                IEM_MC_FETCH_MREG_U64(uSrc, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
-                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+/* Instruction group definitions: */
 
-                IEM_MC_ADVANCE_RIP();
-                IEM_MC_END();
-                break;
-
-            default:
-                return IEMOP_RAISE_INVALID_OPCODE();
-        }
-    }
-    /* The register, register encoding is invalid. */
-    else
-        return IEMOP_RAISE_INVALID_OPCODE();
-    return VINF_SUCCESS;
-}
+/** @defgroup og_gen    General
+ * @{ */
+ /** @defgroup og_gen_arith     Arithmetic
+  * @{  */
+  /** @defgroup og_gen_arith_bin    Binary numbers */
+  /** @defgroup og_gen_arith_dec    Decimal numbers */
+ /** @} */
+/** @} */
 
+/** @defgroup og_stack Stack
+ * @{ */
+ /** @defgroup og_stack_sreg    Segment registers */
+/** @} */
 
-/** Opcode 0x0f 0xe8. */
-FNIEMOP_STUB(iemOp_psubsb_Pq_Qq__psubsb_Vdq_Wdq);
-/** Opcode 0x0f 0xe9. */
-FNIEMOP_STUB(iemOp_psubsw_Pq_Qq__psubsw_Vdq_Wdq);
-/** Opcode 0x0f 0xea. */
-FNIEMOP_STUB(iemOp_pminsw_Pq_Qq__pminsw_Vdq_Wdq);
-/** Opcode 0x0f 0xeb. */
-FNIEMOP_STUB(iemOp_por_Pq_Qq__por_Vdq_Wdq);
-/** Opcode 0x0f 0xec. */
-FNIEMOP_STUB(iemOp_paddsb_Pq_Qq__paddsb_Vdq_Wdq);
-/** Opcode 0x0f 0xed. */
-FNIEMOP_STUB(iemOp_paddsw_Pq_Qq__paddsw_Vdq_Wdq);
-/** Opcode 0x0f 0xee. */
-FNIEMOP_STUB(iemOp_pmaxsw_Pq_Qq__pmaxsw_Vdq_Wdq);
-
-
-/** Opcode 0x0f 0xef. */
-FNIEMOP_DEF(iemOp_pxor_Pq_Qq__pxor_Vdq_Wdq)
-{
-    IEMOP_MNEMONIC("pxor");
-    return FNIEMOP_CALL_1(iemOpCommonMmxSse2_FullFull_To_Full, &g_iemAImpl_pxor);
-}
-
-
-/** Opcode 0x0f 0xf0. */
-FNIEMOP_STUB(iemOp_lddqu_Vdq_Mdq);
-/** Opcode 0x0f 0xf1. */
-FNIEMOP_STUB(iemOp_psllw_Pq_Qq__pslw_Vdq_Wdq);
-/** Opcode 0x0f 0xf2. */
-FNIEMOP_STUB(iemOp_psld_Pq_Qq__pslld_Vdq_Wdq);
-/** Opcode 0x0f 0xf3. */
-FNIEMOP_STUB(iemOp_psllq_Pq_Qq__pslq_Vdq_Wdq);
-/** Opcode 0x0f 0xf4. */
-FNIEMOP_STUB(iemOp_pmuludq_Pq_Qq__pmuludq_Vdq_Wdq);
-/** Opcode 0x0f 0xf5. */
-FNIEMOP_STUB(iemOp_pmaddwd_Pq_Qq__pmaddwd_Vdq_Wdq);
-/** Opcode 0x0f 0xf6. */
-FNIEMOP_STUB(iemOp_psadbw_Pq_Qq__psadbw_Vdq_Wdq);
-/** Opcode 0x0f 0xf7. */
-FNIEMOP_STUB(iemOp_maskmovq_Pq_Nq__maskmovdqu_Vdq_Udq);
-/** Opcode 0x0f 0xf8. */
-FNIEMOP_STUB(iemOp_psubb_Pq_Qq_psubb_Vdq_Wdq); //NEXT
-/** Opcode 0x0f 0xf9. */
-FNIEMOP_STUB(iemOp_psubw_Pq_Qq__psubw_Vdq_Wdq);
-/** Opcode 0x0f 0xfa. */
-FNIEMOP_STUB(iemOp_psubd_Pq_Qq__psubd_Vdq_Wdq);
-/** Opcode 0x0f 0xfb. */
-FNIEMOP_STUB(iemOp_psubq_Pq_Qq__psbuq_Vdq_Wdq);
-/** Opcode 0x0f 0xfc. */
-FNIEMOP_STUB(iemOp_paddb_Pq_Qq__paddb_Vdq_Wdq);
-/** Opcode 0x0f 0xfd. */
-FNIEMOP_STUB(iemOp_paddw_Pq_Qq__paddw_Vdq_Wdq);
-/** Opcode 0x0f 0xfe. */
-FNIEMOP_STUB(iemOp_paddd_Pq_Qq__paddd_Vdq_Wdq);
-
-
-IEM_STATIC const PFNIEMOP g_apfnTwoByteMap[256] =
-{
-    /* 0x00 */  iemOp_Grp6,
-    /* 0x01 */  iemOp_Grp7,
-    /* 0x02 */  iemOp_lar_Gv_Ew,
-    /* 0x03 */  iemOp_lsl_Gv_Ew,
-    /* 0x04 */  iemOp_Invalid,
-    /* 0x05 */  iemOp_syscall,
-    /* 0x06 */  iemOp_clts,
-    /* 0x07 */  iemOp_sysret,
-    /* 0x08 */  iemOp_invd,
-    /* 0x09 */  iemOp_wbinvd,
-    /* 0x0a */  iemOp_Invalid,
-    /* 0x0b */  iemOp_ud2,
-    /* 0x0c */  iemOp_Invalid,
-    /* 0x0d */  iemOp_nop_Ev_GrpP,
-    /* 0x0e */  iemOp_femms,
-    /* 0x0f */  iemOp_3Dnow,
-    /* 0x10 */  iemOp_movups_Vps_Wps__movupd_Vpd_Wpd__movss_Vss_Wss__movsd_Vsd_Wsd,
-    /* 0x11 */  iemOp_movups_Wps_Vps__movupd_Wpd_Vpd__movss_Wss_Vss__movsd_Vsd_Wsd,
-    /* 0x12 */  iemOp_movlps_Vq_Mq__movhlps_Vq_Uq__movlpd_Vq_Mq__movsldup_Vq_Wq__movddup_Vq_Wq,
-    /* 0x13 */  iemOp_movlps_Mq_Vq__movlpd_Mq_Vq,
-    /* 0x14 */  iemOp_unpckhlps_Vps_Wq__unpcklpd_Vpd_Wq,
-    /* 0x15 */  iemOp_unpckhps_Vps_Wq__unpckhpd_Vpd_Wq,
-    /* 0x16 */  iemOp_movhps_Vq_Mq__movlhps_Vq_Uq__movhpd_Vq_Mq__movshdup_Vq_Wq,
-    /* 0x17 */  iemOp_movhps_Mq_Vq__movhpd_Mq_Vq,
-    /* 0x18 */  iemOp_prefetch_Grp16,
-    /* 0x19 */  iemOp_nop_Ev,
-    /* 0x1a */  iemOp_nop_Ev,
-    /* 0x1b */  iemOp_nop_Ev,
-    /* 0x1c */  iemOp_nop_Ev,
-    /* 0x1d */  iemOp_nop_Ev,
-    /* 0x1e */  iemOp_nop_Ev,
-    /* 0x1f */  iemOp_nop_Ev,
-    /* 0x20 */  iemOp_mov_Rd_Cd,
-    /* 0x21 */  iemOp_mov_Rd_Dd,
-    /* 0x22 */  iemOp_mov_Cd_Rd,
-    /* 0x23 */  iemOp_mov_Dd_Rd,
-    /* 0x24 */  iemOp_mov_Rd_Td,
-    /* 0x25 */  iemOp_Invalid,
-    /* 0x26 */  iemOp_mov_Td_Rd,
-    /* 0x27 */  iemOp_Invalid,
-    /* 0x28 */  iemOp_movaps_Vps_Wps__movapd_Vpd_Wpd,
-    /* 0x29 */  iemOp_movaps_Wps_Vps__movapd_Wpd_Vpd,
-    /* 0x2a */  iemOp_cvtpi2ps_Vps_Qpi__cvtpi2pd_Vpd_Qpi__cvtsi2ss_Vss_Ey__cvtsi2sd_Vsd_Ey,
-    /* 0x2b */  iemOp_movntps_Mps_Vps__movntpd_Mpd_Vpd,
-    /* 0x2c */  iemOp_cvttps2pi_Ppi_Wps__cvttpd2pi_Ppi_Wpd__cvttss2si_Gy_Wss__cvttsd2si_Yu_Wsd,
-    /* 0x2d */  iemOp_cvtps2pi_Ppi_Wps__cvtpd2pi_QpiWpd__cvtss2si_Gy_Wss__cvtsd2si_Gy_Wsd,
-    /* 0x2e */  iemOp_ucomiss_Vss_Wss__ucomisd_Vsd_Wsd,
-    /* 0x2f */  iemOp_comiss_Vss_Wss__comisd_Vsd_Wsd,
-    /* 0x30 */  iemOp_wrmsr,
-    /* 0x31 */  iemOp_rdtsc,
-    /* 0x32 */  iemOp_rdmsr,
-    /* 0x33 */  iemOp_rdpmc,
-    /* 0x34 */  iemOp_sysenter,
-    /* 0x35 */  iemOp_sysexit,
-    /* 0x36 */  iemOp_Invalid,
-    /* 0x37 */  iemOp_getsec,
-    /* 0x38 */  iemOp_3byte_Esc_A4,
-    /* 0x39 */  iemOp_Invalid,
-    /* 0x3a */  iemOp_3byte_Esc_A5,
-    /* 0x3b */  iemOp_Invalid,
-    /* 0x3c */  iemOp_Invalid,
-    /* 0x3d */  iemOp_Invalid,
-    /* 0x3e */  iemOp_Invalid,
-    /* 0x3f */  iemOp_Invalid,
-    /* 0x40 */  iemOp_cmovo_Gv_Ev,
-    /* 0x41 */  iemOp_cmovno_Gv_Ev,
-    /* 0x42 */  iemOp_cmovc_Gv_Ev,
-    /* 0x43 */  iemOp_cmovnc_Gv_Ev,
-    /* 0x44 */  iemOp_cmove_Gv_Ev,
-    /* 0x45 */  iemOp_cmovne_Gv_Ev,
-    /* 0x46 */  iemOp_cmovbe_Gv_Ev,
-    /* 0x47 */  iemOp_cmovnbe_Gv_Ev,
-    /* 0x48 */  iemOp_cmovs_Gv_Ev,
-    /* 0x49 */  iemOp_cmovns_Gv_Ev,
-    /* 0x4a */  iemOp_cmovp_Gv_Ev,
-    /* 0x4b */  iemOp_cmovnp_Gv_Ev,
-    /* 0x4c */  iemOp_cmovl_Gv_Ev,
-    /* 0x4d */  iemOp_cmovnl_Gv_Ev,
-    /* 0x4e */  iemOp_cmovle_Gv_Ev,
-    /* 0x4f */  iemOp_cmovnle_Gv_Ev,
-    /* 0x50 */  iemOp_movmskps_Gy_Ups__movmskpd_Gy_Upd,
-    /* 0x51 */  iemOp_sqrtps_Wps_Vps__sqrtpd_Wpd_Vpd__sqrtss_Vss_Wss__sqrtsd_Vsd_Wsd,
-    /* 0x52 */  iemOp_rsqrtps_Wps_Vps__rsqrtss_Vss_Wss,
-    /* 0x53 */  iemOp_rcpps_Wps_Vps__rcpss_Vs_Wss,
-    /* 0x54 */  iemOp_andps_Vps_Wps__andpd_Wpd_Vpd,
-    /* 0x55 */  iemOp_andnps_Vps_Wps__andnpd_Wpd_Vpd,
-    /* 0x56 */  iemOp_orps_Wpd_Vpd__orpd_Wpd_Vpd,
-    /* 0x57 */  iemOp_xorps_Vps_Wps__xorpd_Wpd_Vpd,
-    /* 0x58 */  iemOp_addps_Vps_Wps__addpd_Vpd_Wpd__addss_Vss_Wss__addsd_Vsd_Wsd,
-    /* 0x59 */  iemOp_mulps_Vps_Wps__mulpd_Vpd_Wpd__mulss_Vss__Wss__mulsd_Vsd_Wsd,
-    /* 0x5a */  iemOp_cvtps2pd_Vpd_Wps__cvtpd2ps_Vps_Wpd__cvtss2sd_Vsd_Wss__cvtsd2ss_Vss_Wsd,
-    /* 0x5b */  iemOp_cvtdq2ps_Vps_Wdq__cvtps2dq_Vdq_Wps__cvtps2dq_Vdq_Wps,
-    /* 0x5c */  iemOp_subps_Vps_Wps__subpd_Vps_Wdp__subss_Vss_Wss__subsd_Vsd_Wsd,
-    /* 0x5d */  iemOp_minps_Vps_Wps__minpd_Vpd_Wpd__minss_Vss_Wss__minsd_Vsd_Wsd,
-    /* 0x5e */  iemOp_divps_Vps_Wps__divpd_Vpd_Wpd__divss_Vss_Wss__divsd_Vsd_Wsd,
-    /* 0x5f */  iemOp_maxps_Vps_Wps__maxpd_Vpd_Wpd__maxss_Vss_Wss__maxsd_Vsd_Wsd,
-    /* 0x60 */  iemOp_punpcklbw_Pq_Qd__punpcklbw_Vdq_Wdq,
-    /* 0x61 */  iemOp_punpcklwd_Pq_Qd__punpcklwd_Vdq_Wdq,
-    /* 0x62 */  iemOp_punpckldq_Pq_Qd__punpckldq_Vdq_Wdq,
-    /* 0x63 */  iemOp_packsswb_Pq_Qq__packsswb_Vdq_Wdq,
-    /* 0x64 */  iemOp_pcmpgtb_Pq_Qq__pcmpgtb_Vdq_Wdq,
-    /* 0x65 */  iemOp_pcmpgtw_Pq_Qq__pcmpgtw_Vdq_Wdq,
-    /* 0x66 */  iemOp_pcmpgtd_Pq_Qq__pcmpgtd_Vdq_Wdq,
-    /* 0x67 */  iemOp_packuswb_Pq_Qq__packuswb_Vdq_Wdq,
-    /* 0x68 */  iemOp_punpckhbw_Pq_Qq__punpckhbw_Vdq_Wdq,
-    /* 0x69 */  iemOp_punpckhwd_Pq_Qd__punpckhwd_Vdq_Wdq,
-    /* 0x6a */  iemOp_punpckhdq_Pq_Qd__punpckhdq_Vdq_Wdq,
-    /* 0x6b */  iemOp_packssdw_Pq_Qd__packssdq_Vdq_Wdq,
-    /* 0x6c */  iemOp_punpcklqdq_Vdq_Wdq,
-    /* 0x6d */  iemOp_punpckhqdq_Vdq_Wdq,
-    /* 0x6e */  iemOp_movd_q_Pd_Ey__movd_q_Vy_Ey,
-    /* 0x6f */  iemOp_movq_Pq_Qq__movdqa_Vdq_Wdq__movdqu_Vdq_Wdq,
-    /* 0x70 */  iemOp_pshufw_Pq_Qq_Ib__pshufd_Vdq_Wdq_Ib__pshufhw_Vdq_Wdq_Ib__pshuflq_Vdq_Wdq_Ib,
-    /* 0x71 */  iemOp_Grp12,
-    /* 0x72 */  iemOp_Grp13,
-    /* 0x73 */  iemOp_Grp14,
-    /* 0x74 */  iemOp_pcmpeqb_Pq_Qq__pcmpeqb_Vdq_Wdq,
-    /* 0x75 */  iemOp_pcmpeqw_Pq_Qq__pcmpeqw_Vdq_Wdq,
-    /* 0x76 */  iemOp_pcmped_Pq_Qq__pcmpeqd_Vdq_Wdq,
-    /* 0x77 */  iemOp_emms,
-    /* 0x78 */  iemOp_vmread_AmdGrp17,
-    /* 0x79 */  iemOp_vmwrite,
-    /* 0x7a */  iemOp_Invalid,
-    /* 0x7b */  iemOp_Invalid,
-    /* 0x7c */  iemOp_haddpd_Vdp_Wpd__haddps_Vps_Wps,
-    /* 0x7d */  iemOp_hsubpd_Vpd_Wpd__hsubps_Vps_Wps,
-    /* 0x7e */  iemOp_movd_q_Ey_Pd__movd_q_Ey_Vy__movq_Vq_Wq,
-    /* 0x7f */  iemOp_movq_Qq_Pq__movq_movdqa_Wdq_Vdq__movdqu_Wdq_Vdq,
-    /* 0x80 */  iemOp_jo_Jv,
-    /* 0x81 */  iemOp_jno_Jv,
-    /* 0x82 */  iemOp_jc_Jv,
-    /* 0x83 */  iemOp_jnc_Jv,
-    /* 0x84 */  iemOp_je_Jv,
-    /* 0x85 */  iemOp_jne_Jv,
-    /* 0x86 */  iemOp_jbe_Jv,
-    /* 0x87 */  iemOp_jnbe_Jv,
-    /* 0x88 */  iemOp_js_Jv,
-    /* 0x89 */  iemOp_jns_Jv,
-    /* 0x8a */  iemOp_jp_Jv,
-    /* 0x8b */  iemOp_jnp_Jv,
-    /* 0x8c */  iemOp_jl_Jv,
-    /* 0x8d */  iemOp_jnl_Jv,
-    /* 0x8e */  iemOp_jle_Jv,
-    /* 0x8f */  iemOp_jnle_Jv,
-    /* 0x90 */  iemOp_seto_Eb,
-    /* 0x91 */  iemOp_setno_Eb,
-    /* 0x92 */  iemOp_setc_Eb,
-    /* 0x93 */  iemOp_setnc_Eb,
-    /* 0x94 */  iemOp_sete_Eb,
-    /* 0x95 */  iemOp_setne_Eb,
-    /* 0x96 */  iemOp_setbe_Eb,
-    /* 0x97 */  iemOp_setnbe_Eb,
-    /* 0x98 */  iemOp_sets_Eb,
-    /* 0x99 */  iemOp_setns_Eb,
-    /* 0x9a */  iemOp_setp_Eb,
-    /* 0x9b */  iemOp_setnp_Eb,
-    /* 0x9c */  iemOp_setl_Eb,
-    /* 0x9d */  iemOp_setnl_Eb,
-    /* 0x9e */  iemOp_setle_Eb,
-    /* 0x9f */  iemOp_setnle_Eb,
-    /* 0xa0 */  iemOp_push_fs,
-    /* 0xa1 */  iemOp_pop_fs,
-    /* 0xa2 */  iemOp_cpuid,
-    /* 0xa3 */  iemOp_bt_Ev_Gv,
-    /* 0xa4 */  iemOp_shld_Ev_Gv_Ib,
-    /* 0xa5 */  iemOp_shld_Ev_Gv_CL,
-    /* 0xa6 */  iemOp_Invalid,
-    /* 0xa7 */  iemOp_Invalid,
-    /* 0xa8 */  iemOp_push_gs,
-    /* 0xa9 */  iemOp_pop_gs,
-    /* 0xaa */  iemOp_rsm,
-    /* 0xab */  iemOp_bts_Ev_Gv,
-    /* 0xac */  iemOp_shrd_Ev_Gv_Ib,
-    /* 0xad */  iemOp_shrd_Ev_Gv_CL,
-    /* 0xae */  iemOp_Grp15,
-    /* 0xaf */  iemOp_imul_Gv_Ev,
-    /* 0xb0 */  iemOp_cmpxchg_Eb_Gb,
-    /* 0xb1 */  iemOp_cmpxchg_Ev_Gv,
-    /* 0xb2 */  iemOp_lss_Gv_Mp,
-    /* 0xb3 */  iemOp_btr_Ev_Gv,
-    /* 0xb4 */  iemOp_lfs_Gv_Mp,
-    /* 0xb5 */  iemOp_lgs_Gv_Mp,
-    /* 0xb6 */  iemOp_movzx_Gv_Eb,
-    /* 0xb7 */  iemOp_movzx_Gv_Ew,
-    /* 0xb8 */  iemOp_popcnt_Gv_Ev_jmpe,
-    /* 0xb9 */  iemOp_Grp10,
-    /* 0xba */  iemOp_Grp8,
-    /* 0xbd */  iemOp_btc_Ev_Gv,
-    /* 0xbc */  iemOp_bsf_Gv_Ev,
-    /* 0xbd */  iemOp_bsr_Gv_Ev,
-    /* 0xbe */  iemOp_movsx_Gv_Eb,
-    /* 0xbf */  iemOp_movsx_Gv_Ew,
-    /* 0xc0 */  iemOp_xadd_Eb_Gb,
-    /* 0xc1 */  iemOp_xadd_Ev_Gv,
-    /* 0xc2 */  iemOp_cmpps_Vps_Wps_Ib__cmppd_Vpd_Wpd_Ib__cmpss_Vss_Wss_Ib__cmpsd_Vsd_Wsd_Ib,
-    /* 0xc3 */  iemOp_movnti_My_Gy,
-    /* 0xc4 */  iemOp_pinsrw_Pq_Ry_Mw_Ib__pinsrw_Vdq_Ry_Mw_Ib,
-    /* 0xc5 */  iemOp_pextrw_Gd_Nq_Ib__pextrw_Gd_Udq_Ib,
-    /* 0xc6 */  iemOp_shufps_Vps_Wps_Ib__shufdp_Vpd_Wpd_Ib,
-    /* 0xc7 */  iemOp_Grp9,
-    /* 0xc8 */  iemOp_bswap_rAX_r8,
-    /* 0xc9 */  iemOp_bswap_rCX_r9,
-    /* 0xca */  iemOp_bswap_rDX_r10,
-    /* 0xcb */  iemOp_bswap_rBX_r11,
-    /* 0xcc */  iemOp_bswap_rSP_r12,
-    /* 0xcd */  iemOp_bswap_rBP_r13,
-    /* 0xce */  iemOp_bswap_rSI_r14,
-    /* 0xcf */  iemOp_bswap_rDI_r15,
-    /* 0xd0 */  iemOp_addsubpd_Vpd_Wpd__addsubps_Vps_Wps,
-    /* 0xd1 */  iemOp_psrlw_Pp_Qp__psrlw_Vdp_Wdq,
-    /* 0xd2 */  iemOp_psrld_Pq_Qq__psrld_Vdq_Wdq,
-    /* 0xd3 */  iemOp_psrlq_Pq_Qq__psrlq_Vdq_Wdq,
-    /* 0xd4 */  iemOp_paddq_Pq_Qq__paddq_Vdq_Wdq,
-    /* 0xd5 */  iemOp_pmulq_Pq_Qq__pmullw_Vdq_Wdq,
-    /* 0xd6 */  iemOp_movq_Wq_Vq__movq2dq_Vdq_Nq__movdq2q_Pq_Uq,
-    /* 0xd7 */  iemOp_pmovmskb_Gd_Nq__pmovmskb_Gd_Udq,
-    /* 0xd8 */  iemOp_psubusb_Pq_Qq__psubusb_Vdq_Wdq,
-    /* 0xd9 */  iemOp_psubusw_Pq_Qq__psubusw_Vdq_Wdq,
-    /* 0xda */  iemOp_pminub_Pq_Qq__pminub_Vdq_Wdq,
-    /* 0xdb */  iemOp_pand_Pq_Qq__pand_Vdq_Wdq,
-    /* 0xdc */  iemOp_paddusb_Pq_Qq__paddusb_Vdq_Wdq,
-    /* 0xdd */  iemOp_paddusw_Pq_Qq__paddusw_Vdq_Wdq,
-    /* 0xde */  iemOp_pmaxub_Pq_Qq__pamxub_Vdq_Wdq,
-    /* 0xdf */  iemOp_pandn_Pq_Qq__pandn_Vdq_Wdq,
-    /* 0xe0 */  iemOp_pavgb_Pq_Qq__pavgb_Vdq_Wdq,
-    /* 0xe1 */  iemOp_psraw_Pq_Qq__psraw_Vdq_Wdq,
-    /* 0xe2 */  iemOp_psrad_Pq_Qq__psrad_Vdq_Wdq,
-    /* 0xe3 */  iemOp_pavgw_Pq_Qq__pavgw_Vdq_Wdq,
-    /* 0xe4 */  iemOp_pmulhuw_Pq_Qq__pmulhuw_Vdq_Wdq,
-    /* 0xe5 */  iemOp_pmulhw_Pq_Qq__pmulhw_Vdq_Wdq,
-    /* 0xe6 */  iemOp_cvttpd2dq_Vdq_Wdp__cvtdq2pd_Vdq_Wpd__cvtpd2dq_Vdq_Wpd,
-    /* 0xe7 */  iemOp_movntq_Mq_Pq__movntdq_Mdq_Vdq,
-    /* 0xe8 */  iemOp_psubsb_Pq_Qq__psubsb_Vdq_Wdq,
-    /* 0xe9 */  iemOp_psubsw_Pq_Qq__psubsw_Vdq_Wdq,
-    /* 0xea */  iemOp_pminsw_Pq_Qq__pminsw_Vdq_Wdq,
-    /* 0xeb */  iemOp_por_Pq_Qq__por_Vdq_Wdq,
-    /* 0xec */  iemOp_paddsb_Pq_Qq__paddsb_Vdq_Wdq,
-    /* 0xed */  iemOp_paddsw_Pq_Qq__paddsw_Vdq_Wdq,
-    /* 0xee */  iemOp_pmaxsw_Pq_Qq__pmaxsw_Vdq_Wdq,
-    /* 0xef */  iemOp_pxor_Pq_Qq__pxor_Vdq_Wdq,
-    /* 0xf0 */  iemOp_lddqu_Vdq_Mdq,
-    /* 0xf1 */  iemOp_psllw_Pq_Qq__pslw_Vdq_Wdq,
-    /* 0xf2 */  iemOp_psld_Pq_Qq__pslld_Vdq_Wdq,
-    /* 0xf3 */  iemOp_psllq_Pq_Qq__pslq_Vdq_Wdq,
-    /* 0xf4 */  iemOp_pmuludq_Pq_Qq__pmuludq_Vdq_Wdq,
-    /* 0xf5 */  iemOp_pmaddwd_Pq_Qq__pmaddwd_Vdq_Wdq,
-    /* 0xf6 */  iemOp_psadbw_Pq_Qq__psadbw_Vdq_Wdq,
-    /* 0xf7 */  iemOp_maskmovq_Pq_Nq__maskmovdqu_Vdq_Udq,
-    /* 0xf8 */  iemOp_psubb_Pq_Qq_psubb_Vdq_Wdq,
-    /* 0xf9 */  iemOp_psubw_Pq_Qq__psubw_Vdq_Wdq,
-    /* 0xfa */  iemOp_psubd_Pq_Qq__psubd_Vdq_Wdq,
-    /* 0xfb */  iemOp_psubq_Pq_Qq__psbuq_Vdq_Wdq,
-    /* 0xfc */  iemOp_paddb_Pq_Qq__paddb_Vdq_Wdq,
-    /* 0xfd */  iemOp_paddw_Pq_Qq__paddw_Vdq_Wdq,
-    /* 0xfe */  iemOp_paddd_Pq_Qq__paddd_Vdq_Wdq,
-    /* 0xff */  iemOp_Invalid
-};
+/** @defgroup og_prefix     Prefixes */
+/** @defgroup og_escapes    Escape bytes */
 
-/** @}  */
 
 
 /** @name One byte opcodes.
- *
  * @{
  */
 
-/** Opcode 0x00. */
+/* Instruction specification format - work in progress:  */
+
+/**
+ * @opcode      0x00
+ * @opmnemonic  add
+ * @op1         rm:Eb
+ * @op2         reg:Gb
+ * @opmaps      one
+ * @openc       ModR/M
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @ophints     harmless ignores_op_size
+ * @opstats     add_Eb_Gb
+ * @opgroup     og_gen_arith_bin
+ * @optest              op1=1   op2=1   -> op1=2   efl&|=nc,pe,na,nz,pl,nv
+ * @optest      efl|=cf op1=1   op2=2   -> op1=3   efl&|=nc,po,na,nz,pl,nv
+ * @optest              op1=254 op2=1   -> op1=255 efl&|=nc,po,na,nz,ng,nv
+ * @optest              op1=128 op2=128 -> op1=0   efl&|=ov,pl,zf,na,po,cf
+ */
 FNIEMOP_DEF(iemOp_add_Eb_Gb)
 {
-    IEMOP_MNEMONIC("add Eb,Gb");
+    IEMOP_MNEMONIC2(MR, ADD, add, Eb, Gb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE | IEMOPHINT_LOCK_ALLOWED);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_add);
 }
 
 
-/** Opcode 0x01. */
+/**
+ * @opcode      0x01
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @optest               op1=1  op2=1  -> op1=2  efl&|=nc,pe,na,nz,pl,nv
+ * @optest      efl|=cf  op1=2  op2=2  -> op1=4  efl&|=nc,pe,na,nz,pl,nv
+ * @optest      efl&~=cf op1=-1 op2=1  -> op1=0  efl&|=cf,po,af,zf,pl,nv
+ * @optest               op1=-1 op2=-1 -> op1=-2 efl&|=cf,pe,af,nz,ng,nv
+ */
 FNIEMOP_DEF(iemOp_add_Ev_Gv)
 {
-    IEMOP_MNEMONIC("add Ev,Gv");
+    IEMOP_MNEMONIC2(MR, ADD, add, Ev, Gv, DISOPTYPE_HARMLESS, IEMOPHINT_LOCK_ALLOWED);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_add);
 }
 
 
-/** Opcode 0x02. */
+/**
+ * @opcode      0x02
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opcopytests iemOp_add_Eb_Gb
+ */
 FNIEMOP_DEF(iemOp_add_Gb_Eb)
 {
-    IEMOP_MNEMONIC("add Gb,Eb");
+    IEMOP_MNEMONIC2(RM, ADD, add, Gb, Eb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_add);
 }
 
 
-/** Opcode 0x03. */
+/**
+ * @opcode      0x03
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opcopytests iemOp_add_Ev_Gv
+ */
 FNIEMOP_DEF(iemOp_add_Gv_Ev)
 {
-    IEMOP_MNEMONIC("add Gv,Ev");
+    IEMOP_MNEMONIC2(RM, ADD, add, Gv, Ev, DISOPTYPE_HARMLESS, 0);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_add);
 }
 
 
-/** Opcode 0x04. */
+/**
+ * @opcode      0x04
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opcopytests iemOp_add_Eb_Gb
+ */
 FNIEMOP_DEF(iemOp_add_Al_Ib)
 {
-    IEMOP_MNEMONIC("add al,Ib");
+    IEMOP_MNEMONIC2(FIXED, ADD, add, AL, Ib, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_add);
 }
 
 
-/** Opcode 0x05. */
+/**
+ * @opcode      0x05
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @optest      op1=1 op2=1 -> op1=2 efl&|=nv,pl,nz,na,pe
+ * @optest      efl|=cf  op1=2  op2=2  -> op1=4  efl&|=nc,pe,na,nz,pl,nv
+ * @optest      efl&~=cf op1=-1 op2=1  -> op1=0  efl&|=cf,po,af,zf,pl,nv
+ * @optest               op1=-1 op2=-1 -> op1=-2 efl&|=cf,pe,af,nz,ng,nv
+ */
 FNIEMOP_DEF(iemOp_add_eAX_Iz)
 {
-    IEMOP_MNEMONIC("add rAX,Iz");
+    IEMOP_MNEMONIC2(FIXED, ADD, add, rAX, Iz, DISOPTYPE_HARMLESS, 0);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_add);
 }
 
 
-/** Opcode 0x06. */
+/**
+ * @opcode      0x06
+ * @opgroup     og_stack_sreg
+ */
 FNIEMOP_DEF(iemOp_push_ES)
 {
-    IEMOP_MNEMONIC("push es");
+    IEMOP_MNEMONIC1(FIXED, PUSH, push, ES, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64, 0);
+    IEMOP_HLP_NO_64BIT();
     return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_ES);
 }
 
 
-/** Opcode 0x07. */
+/**
+ * @opcode      0x07
+ * @opgroup     og_stack_sreg
+ */
 FNIEMOP_DEF(iemOp_pop_ES)
 {
-    IEMOP_MNEMONIC("pop es");
+    IEMOP_MNEMONIC1(FIXED, POP, pop, ES, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64, 0);
     IEMOP_HLP_NO_64BIT();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_ES, pVCpu->iem.s.enmEffOpSize);
 }
 
 
-/** Opcode 0x08. */
+/**
+ * @opcode      0x08
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ * @optest                  op1=7 op2=12 -> op1=15   efl&|=nc,po,na,nz,pl,nv
+ * @optest      efl|=of,cf  op1=0 op2=0  -> op1=0    efl&|=nc,po,na,zf,pl,nv
+ * @optest            op1=0xee op2=0x11  -> op1=0xff efl&|=nc,po,na,nz,ng,nv
+ * @optest            op1=0xff op2=0xff  -> op1=0xff efl&|=nc,po,na,nz,ng,nv
+ */
 FNIEMOP_DEF(iemOp_or_Eb_Gb)
 {
-    IEMOP_MNEMONIC("or  Eb,Gb");
+    IEMOP_MNEMONIC2(MR, OR, or, Eb, Gb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE | IEMOPHINT_LOCK_ALLOWED);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_or);
 }
 
 
-/** Opcode 0x09. */
+/*
+ * @opcode      0x09
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ * @optest      efl|=of,cf  op1=12 op2=7 -> op1=15   efl&|=nc,po,na,nz,pl,nv
+ * @optest      efl|=of,cf  op1=0 op2=0  -> op1=0    efl&|=nc,po,na,zf,pl,nv
+ * @optest      op1=-2 op2=1  -> op1=-1 efl&|=nc,po,na,nz,ng,nv
+ * @optest      o16 / op1=0x5a5a             op2=0xa5a5             -> op1=-1 efl&|=nc,po,na,nz,ng,nv
+ * @optest      o32 / op1=0x5a5a5a5a         op2=0xa5a5a5a5         -> op1=-1 efl&|=nc,po,na,nz,ng,nv
+ * @optest      o64 / op1=0x5a5a5a5a5a5a5a5a op2=0xa5a5a5a5a5a5a5a5 -> op1=-1 efl&|=nc,po,na,nz,ng,nv
+ */
 FNIEMOP_DEF(iemOp_or_Ev_Gv)
 {
-    IEMOP_MNEMONIC("or  Ev,Gv ");
+    IEMOP_MNEMONIC2(MR, OR, or, Ev, Gv, DISOPTYPE_HARMLESS, IEMOPHINT_LOCK_ALLOWED);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_or);
 }
 
 
-/** Opcode 0x0a. */
+/**
+ * @opcode      0x0a
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ * @opcopytests iemOp_or_Eb_Gb
+ */
 FNIEMOP_DEF(iemOp_or_Gb_Eb)
 {
-    IEMOP_MNEMONIC("or  Gb,Eb");
+    IEMOP_MNEMONIC2(RM, OR, or, Gb, Eb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE | IEMOPHINT_LOCK_ALLOWED);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_or);
 }
 
 
-/** Opcode 0x0b. */
+/**
+ * @opcode      0x0b
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ * @opcopytests iemOp_or_Ev_Gv
+ */
 FNIEMOP_DEF(iemOp_or_Gv_Ev)
 {
-    IEMOP_MNEMONIC("or  Gv,Ev");
+    IEMOP_MNEMONIC2(RM, OR, or, Gv, Ev, DISOPTYPE_HARMLESS, 0);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_or);
 }
 
 
-/** Opcode 0x0c. */
+/**
+ * @opcode      0x0c
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ * @opcopytests iemOp_or_Eb_Gb
+ */
 FNIEMOP_DEF(iemOp_or_Al_Ib)
 {
-    IEMOP_MNEMONIC("or  al,Ib");
+    IEMOP_MNEMONIC2(FIXED, OR, or, AL, Ib, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_or);
 }
 
 
-/** Opcode 0x0d. */
+/**
+ * @opcode      0x0d
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ * @optest      efl|=of,cf  op1=12 op2=7 -> op1=15   efl&|=nc,po,na,nz,pl,nv
+ * @optest      efl|=of,cf  op1=0 op2=0  -> op1=0    efl&|=nc,po,na,zf,pl,nv
+ * @optest      op1=-2 op2=1  -> op1=-1 efl&|=nc,po,na,nz,ng,nv
+ * @optest      o16 / op1=0x5a5a             op2=0xa5a5     -> op1=-1 efl&|=nc,po,na,nz,ng,nv
+ * @optest      o32 / op1=0x5a5a5a5a         op2=0xa5a5a5a5 -> op1=-1 efl&|=nc,po,na,nz,ng,nv
+ * @optest      o64 / op1=0x5a5a5a5a5a5a5a5a op2=0xa5a5a5a5 -> op1=-1 efl&|=nc,po,na,nz,ng,nv
+ * @optest      o64 / op1=0x5a5a5a5aa5a5a5a5 op2=0x5a5a5a5a -> op1=0x5a5a5a5affffffff efl&|=nc,po,na,nz,pl,nv
+ */
 FNIEMOP_DEF(iemOp_or_eAX_Iz)
 {
-    IEMOP_MNEMONIC("or  rAX,Iz");
+    IEMOP_MNEMONIC2(FIXED, OR, or, rAX, Iz, DISOPTYPE_HARMLESS, 0);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_or);
 }
 
 
-/** Opcode 0x0e. */
+/**
+ * @opcode      0x0e
+ * @opgroup     og_stack_sreg
+ */
 FNIEMOP_DEF(iemOp_push_CS)
 {
-    IEMOP_MNEMONIC("push cs");
+    IEMOP_MNEMONIC1(FIXED, PUSH, push, CS, DISOPTYPE_HARMLESS | DISOPTYPE_POTENTIALLY_DANGEROUS | DISOPTYPE_INVALID_64, 0);
+    IEMOP_HLP_NO_64BIT();
     return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_CS);
 }
 
 
-/** Opcode 0x0f. */
+/**
+ * @opcode      0x0f
+ * @opmnemonic  EscTwo0f
+ * @openc       two0f
+ * @opdisenum   OP_2B_ESC
+ * @ophints     harmless
+ * @opgroup     og_escapes
+ */
 FNIEMOP_DEF(iemOp_2byteEscape)
 {
-    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
-    /** @todo PUSH CS on 8086, undefined on 80186. */
-    IEMOP_HLP_MIN_286();
-    return FNIEMOP_CALL(g_apfnTwoByteMap[b]);
+#ifdef VBOX_STRICT
+    /* Sanity check the table the first time around. */
+    static bool s_fTested = false;
+    if (RT_LIKELY(s_fTested)) { /* likely */  }
+    else
+    {
+        s_fTested = true;
+        Assert(g_apfnTwoByteMap[0xbc * 4 + 0] == iemOp_bsf_Gv_Ev);
+        Assert(g_apfnTwoByteMap[0xbc * 4 + 1] == iemOp_bsf_Gv_Ev);
+        Assert(g_apfnTwoByteMap[0xbc * 4 + 2] == iemOp_tzcnt_Gv_Ev);
+        Assert(g_apfnTwoByteMap[0xbc * 4 + 3] == iemOp_bsf_Gv_Ev);
+    }
+#endif
+
+    if (RT_LIKELY(IEM_GET_TARGET_CPU(pVCpu) >= IEMTARGETCPU_286))
+    {
+        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
+        IEMOP_HLP_MIN_286();
+        return FNIEMOP_CALL(g_apfnTwoByteMap[(uintptr_t)b * 4 + pVCpu->iem.s.idxPrefix]);
+    }
+    /* @opdone */
+
+    /*
+     * On the 8086 this is a POP CS instruction.
+     * For the time being we don't specify this this.
+     */
+    IEMOP_MNEMONIC1(FIXED, POP, pop, CS, DISOPTYPE_HARMLESS | DISOPTYPE_POTENTIALLY_DANGEROUS | DISOPTYPE_INVALID_64, IEMOPHINT_SKIP_PYTHON);
+    IEMOP_HLP_NO_64BIT();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_ES, pVCpu->iem.s.enmEffOpSize);
 }
 
-/** Opcode 0x10. */
+/**
+ * @opcode      0x10
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @optest      op1=1 op2=1 efl&~=cf -> op1=2 efl&|=nc,pe,na,nz,pl,nv
+ * @optest      op1=1 op2=1 efl|=cf  -> op1=3 efl&|=nc,po,na,nz,pl,nv
+ * @optest      op1=0xff op2=0 efl|=cf -> op1=0 efl&|=cf,po,af,zf,pl,nv
+ * @optest      op1=0  op2=0 efl|=cf -> op1=1 efl&|=nc,pe,na,nz,pl,nv
+ * @optest      op1=0  op2=0 efl&~=cf -> op1=0 efl&|=nc,po,na,zf,pl,nv
+ */
 FNIEMOP_DEF(iemOp_adc_Eb_Gb)
 {
-    IEMOP_MNEMONIC("adc Eb,Gb");
+    IEMOP_MNEMONIC2(MR, ADC, adc, Eb, Gb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE | IEMOPHINT_LOCK_ALLOWED);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_adc);
 }
 
 
-/** Opcode 0x11. */
+/**
+ * @opcode      0x11
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @optest      op1=1 op2=1 efl&~=cf -> op1=2 efl&|=nc,pe,na,nz,pl,nv
+ * @optest      op1=1 op2=1 efl|=cf  -> op1=3 efl&|=nc,po,na,nz,pl,nv
+ * @optest      op1=-1 op2=0 efl|=cf -> op1=0 efl&|=cf,po,af,zf,pl,nv
+ * @optest      op1=0  op2=0 efl|=cf -> op1=1 efl&|=nc,pe,na,nz,pl,nv
+ * @optest      op1=0  op2=0 efl&~=cf -> op1=0 efl&|=nc,po,na,zf,pl,nv
+ */
 FNIEMOP_DEF(iemOp_adc_Ev_Gv)
 {
-    IEMOP_MNEMONIC("adc Ev,Gv");
+    IEMOP_MNEMONIC2(MR, ADC, adc, Ev, Gv, DISOPTYPE_HARMLESS, IEMOPHINT_LOCK_ALLOWED);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_adc);
 }
 
 
-/** Opcode 0x12. */
+/**
+ * @opcode      0x12
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opcopytests iemOp_adc_Eb_Gb
+ */
 FNIEMOP_DEF(iemOp_adc_Gb_Eb)
 {
-    IEMOP_MNEMONIC("adc Gb,Eb");
+    IEMOP_MNEMONIC2(RM, ADC, adc, Gb, Eb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_adc);
 }
 
 
-/** Opcode 0x13. */
+/**
+ * @opcode      0x13
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opcopytests iemOp_adc_Ev_Gv
+ */
 FNIEMOP_DEF(iemOp_adc_Gv_Ev)
 {
-    IEMOP_MNEMONIC("adc Gv,Ev");
+    IEMOP_MNEMONIC2(RM, ADC, adc, Gv, Ev, DISOPTYPE_HARMLESS, 0);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_adc);
 }
 
 
-/** Opcode 0x14. */
+/**
+ * @opcode      0x14
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opcopytests iemOp_adc_Eb_Gb
+ */
 FNIEMOP_DEF(iemOp_adc_Al_Ib)
 {
-    IEMOP_MNEMONIC("adc al,Ib");
+    IEMOP_MNEMONIC2(FIXED, ADC, adc, AL, Ib, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_adc);
 }
 
 
-/** Opcode 0x15. */
+/**
+ * @opcode      0x15
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opcopytests iemOp_adc_Ev_Gv
+ */
 FNIEMOP_DEF(iemOp_adc_eAX_Iz)
 {
-    IEMOP_MNEMONIC("adc rAX,Iz");
+    IEMOP_MNEMONIC2(FIXED, ADC, adc, rAX, Iz, DISOPTYPE_HARMLESS, 0);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_adc);
 }
 
 
-/** Opcode 0x16. */
+/**
+ * @opcode      0x16
+ */
 FNIEMOP_DEF(iemOp_push_SS)
 {
-    IEMOP_MNEMONIC("push ss");
+    IEMOP_MNEMONIC1(FIXED, PUSH, push, SS, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64 | DISOPTYPE_RRM_DANGEROUS, 0);
+    IEMOP_HLP_NO_64BIT();
     return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_SS);
 }
 
 
-/** Opcode 0x17. */
+/**
+ * @opcode      0x17
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_pop_SS)
 {
-    IEMOP_MNEMONIC("pop ss"); /** @todo implies instruction fusing? */
+    IEMOP_MNEMONIC1(FIXED, POP, pop, SS, DISOPTYPE_HARMLESS | DISOPTYPE_INHIBIT_IRQS | DISOPTYPE_INVALID_64 | DISOPTYPE_RRM_DANGEROUS , 0);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_NO_64BIT();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_SS, pVCpu->iem.s.enmEffOpSize);
 }
 
 
-/** Opcode 0x18. */
+/**
+ * @opcode      0x18
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sbb_Eb_Gb)
 {
-    IEMOP_MNEMONIC("sbb Eb,Gb");
+    IEMOP_MNEMONIC2(MR, SBB, sbb, Eb, Gb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE | IEMOPHINT_LOCK_ALLOWED);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_sbb);
 }
 
 
-/** Opcode 0x19. */
+/**
+ * @opcode      0x19
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sbb_Ev_Gv)
 {
-    IEMOP_MNEMONIC("sbb Ev,Gv");
+    IEMOP_MNEMONIC2(MR, SBB, sbb, Ev, Gv, DISOPTYPE_HARMLESS, IEMOPHINT_LOCK_ALLOWED);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_sbb);
 }
 
 
-/** Opcode 0x1a. */
+/**
+ * @opcode      0x1a
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sbb_Gb_Eb)
 {
-    IEMOP_MNEMONIC("sbb Gb,Eb");
+    IEMOP_MNEMONIC2(RM, SBB, sbb, Gb, Eb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_sbb);
 }
 
 
-/** Opcode 0x1b. */
+/**
+ * @opcode      0x1b
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sbb_Gv_Ev)
 {
-    IEMOP_MNEMONIC("sbb Gv,Ev");
+    IEMOP_MNEMONIC2(RM, SBB, sbb, Gv, Ev, DISOPTYPE_HARMLESS, 0);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_sbb);
 }
 
 
-/** Opcode 0x1c. */
+/**
+ * @opcode      0x1c
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sbb_Al_Ib)
 {
-    IEMOP_MNEMONIC("sbb al,Ib");
+    IEMOP_MNEMONIC2(FIXED, SBB, sbb, AL, Ib, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_sbb);
 }
 
 
-/** Opcode 0x1d. */
+/**
+ * @opcode      0x1d
+ * @opgroup     og_gen_arith_bin
+ * @opfltest    cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sbb_eAX_Iz)
 {
-    IEMOP_MNEMONIC("sbb rAX,Iz");
+    IEMOP_MNEMONIC2(FIXED, SBB, sbb, rAX, Iz, DISOPTYPE_HARMLESS, 0);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_sbb);
 }
 
 
-/** Opcode 0x1e. */
+/**
+ * @opcode      0x1e
+ * @opgroup     og_stack_sreg
+ */
 FNIEMOP_DEF(iemOp_push_DS)
 {
-    IEMOP_MNEMONIC("push ds");
+    IEMOP_MNEMONIC1(FIXED, PUSH, push, DS, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64, 0);
+    IEMOP_HLP_NO_64BIT();
     return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_DS);
 }
 
 
-/** Opcode 0x1f. */
+/**
+ * @opcode      0x1f
+ * @opgroup     og_stack_sreg
+ */
 FNIEMOP_DEF(iemOp_pop_DS)
 {
-    IEMOP_MNEMONIC("pop ds");
+    IEMOP_MNEMONIC1(FIXED, POP, pop, DS, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64 | DISOPTYPE_RRM_DANGEROUS, 0);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_NO_64BIT();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_DS, pVCpu->iem.s.enmEffOpSize);
 }
 
 
-/** Opcode 0x20. */
+/**
+ * @opcode      0x20
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_and_Eb_Gb)
 {
-    IEMOP_MNEMONIC("and Eb,Gb");
+    IEMOP_MNEMONIC2(MR, AND, and, Eb, Gb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE | IEMOPHINT_LOCK_ALLOWED);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_and);
 }
 
 
-/** Opcode 0x21. */
+/**
+ * @opcode      0x21
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_and_Ev_Gv)
 {
-    IEMOP_MNEMONIC("and Ev,Gv");
+    IEMOP_MNEMONIC2(MR, AND, and, Ev, Gv, DISOPTYPE_HARMLESS, IEMOPHINT_LOCK_ALLOWED);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_and);
 }
 
 
-/** Opcode 0x22. */
+/**
+ * @opcode      0x22
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_and_Gb_Eb)
 {
-    IEMOP_MNEMONIC("and Gb,Eb");
+    IEMOP_MNEMONIC2(RM, AND, and, Gb, Eb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_and);
 }
 
 
-/** Opcode 0x23. */
+/**
+ * @opcode      0x23
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_and_Gv_Ev)
 {
-    IEMOP_MNEMONIC("and Gv,Ev");
+    IEMOP_MNEMONIC2(RM, AND, and, Gv, Ev, DISOPTYPE_HARMLESS, 0);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_and);
 }
 
 
-/** Opcode 0x24. */
+/**
+ * @opcode      0x24
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_and_Al_Ib)
 {
-    IEMOP_MNEMONIC("and al,Ib");
+    IEMOP_MNEMONIC2(FIXED, AND, and, AL, Ib, DISOPTYPE_HARMLESS, 0);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_and);
 }
 
 
-/** Opcode 0x25. */
+/**
+ * @opcode      0x25
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_and_eAX_Iz)
 {
-    IEMOP_MNEMONIC("and rAX,Iz");
+    IEMOP_MNEMONIC2(FIXED, AND, and, rAX, Iz, DISOPTYPE_HARMLESS, 0);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_and);
 }
 
 
-/** Opcode 0x26. */
+/**
+ * @opcode      0x26
+ * @opmnemonic  SEG
+ * @op1         ES
+ * @opgroup     og_prefix
+ * @openc       prefix
+ * @opdisenum   OP_SEG
+ * @ophints     harmless
+ */
 FNIEMOP_DEF(iemOp_seg_ES)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg es");
@@ -7856,10 +662,15 @@ FNIEMOP_DEF(iemOp_seg_ES)
 }
 
 
-/** Opcode 0x27. */
+/**
+ * @opcode      0x27
+ * @opfltest    af,cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   of
+ */
 FNIEMOP_DEF(iemOp_daa)
 {
-    IEMOP_MNEMONIC("daa AL");
+    IEMOP_MNEMONIC0(FIXED, DAA, daa, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64, 0); /* express implicit AL register use */
     IEMOP_HLP_NO_64BIT();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF);
@@ -7867,55 +678,87 @@ FNIEMOP_DEF(iemOp_daa)
 }
 
 
-/** Opcode 0x28. */
+/**
+ * @opcode      0x28
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sub_Eb_Gb)
 {
-    IEMOP_MNEMONIC("sub Eb,Gb");
+    IEMOP_MNEMONIC2(MR, SUB, sub, Eb, Gb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE | IEMOPHINT_LOCK_ALLOWED);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_sub);
 }
 
 
-/** Opcode 0x29. */
+/**
+ * @opcode      0x29
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sub_Ev_Gv)
 {
-    IEMOP_MNEMONIC("sub Ev,Gv");
+    IEMOP_MNEMONIC2(MR, SUB, sub, Ev, Gv, DISOPTYPE_HARMLESS, IEMOPHINT_LOCK_ALLOWED);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_sub);
 }
 
 
-/** Opcode 0x2a. */
+/**
+ * @opcode      0x2a
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sub_Gb_Eb)
 {
-    IEMOP_MNEMONIC("sub Gb,Eb");
+    IEMOP_MNEMONIC2(RM, SUB, sub, Gb, Eb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_sub);
 }
 
 
-/** Opcode 0x2b. */
+/**
+ * @opcode      0x2b
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sub_Gv_Ev)
 {
-    IEMOP_MNEMONIC("sub Gv,Ev");
+    IEMOP_MNEMONIC2(RM, SUB, sub, Gv, Ev, DISOPTYPE_HARMLESS, 0);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_sub);
 }
 
 
-/** Opcode 0x2c. */
+/**
+ * @opcode      0x2c
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sub_Al_Ib)
 {
-    IEMOP_MNEMONIC("sub al,Ib");
+    IEMOP_MNEMONIC2(FIXED, SUB, sub, AL, Ib, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_sub);
 }
 
 
-/** Opcode 0x2d. */
+/**
+ * @opcode      0x2d
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ */
 FNIEMOP_DEF(iemOp_sub_eAX_Iz)
 {
-    IEMOP_MNEMONIC("sub rAX,Iz");
+    IEMOP_MNEMONIC2(FIXED, SUB, sub, rAX, Iz, DISOPTYPE_HARMLESS, 0);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_sub);
 }
 
 
-/** Opcode 0x2e. */
+/**
+ * @opcode      0x2e
+ * @opmnemonic  SEG
+ * @op1         CS
+ * @opgroup     og_prefix
+ * @openc       prefix
+ * @opdisenum   OP_SEG
+ * @ophints     harmless
+ */
 FNIEMOP_DEF(iemOp_seg_CS)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg cs");
@@ -7927,10 +770,15 @@ FNIEMOP_DEF(iemOp_seg_CS)
 }
 
 
-/** Opcode 0x2f. */
+/**
+ * @opcode      0x2f
+ * @opfltest    af,cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   of
+ */
 FNIEMOP_DEF(iemOp_das)
 {
-    IEMOP_MNEMONIC("das AL");
+    IEMOP_MNEMONIC0(FIXED, DAS, das, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64, 0); /* express implicit AL register use */
     IEMOP_HLP_NO_64BIT();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF);
@@ -7938,61 +786,105 @@ FNIEMOP_DEF(iemOp_das)
 }
 
 
-/** Opcode 0x30. */
+/**
+ * @opcode      0x30
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_xor_Eb_Gb)
 {
-    IEMOP_MNEMONIC("xor Eb,Gb");
+    IEMOP_MNEMONIC2(MR, XOR, xor, Eb, Gb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE | IEMOPHINT_LOCK_ALLOWED);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_xor);
 }
 
 
-/** Opcode 0x31. */
+/**
+ * @opcode      0x31
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_xor_Ev_Gv)
 {
-    IEMOP_MNEMONIC("xor Ev,Gv");
+    IEMOP_MNEMONIC2(MR, XOR, xor, Ev, Gv, DISOPTYPE_HARMLESS, IEMOPHINT_LOCK_ALLOWED);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_xor);
 }
 
 
-/** Opcode 0x32. */
+/**
+ * @opcode      0x32
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_xor_Gb_Eb)
 {
-    IEMOP_MNEMONIC("xor Gb,Eb");
+    IEMOP_MNEMONIC2(RM, XOR, xor, Gb, Eb, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_xor);
 }
 
 
-/** Opcode 0x33. */
+/**
+ * @opcode      0x33
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_xor_Gv_Ev)
 {
-    IEMOP_MNEMONIC("xor Gv,Ev");
+    IEMOP_MNEMONIC2(RM, XOR, xor, Gv, Ev, DISOPTYPE_HARMLESS, 0);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_xor);
 }
 
 
-/** Opcode 0x34. */
+/**
+ * @opcode      0x34
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_xor_Al_Ib)
 {
-    IEMOP_MNEMONIC("xor al,Ib");
+    IEMOP_MNEMONIC2(FIXED, XOR, xor, AL, Ib, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_xor);
 }
 
 
-/** Opcode 0x35. */
+/**
+ * @opcode      0x35
+ * @opgroup     og_gen_arith_bin
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   af
+ * @opflclear   of,cf
+ */
 FNIEMOP_DEF(iemOp_xor_eAX_Iz)
 {
-    IEMOP_MNEMONIC("xor rAX,Iz");
+    IEMOP_MNEMONIC2(FIXED, XOR, xor, rAX, Iz, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_xor);
 }
 
 
-/** Opcode 0x36. */
+/**
+ * @opcode      0x36
+ * @opmnemonic  SEG
+ * @op1         SS
+ * @opgroup     og_prefix
+ * @openc       prefix
+ * @opdisenum   OP_SEG
+ * @ophints     harmless
+ */
 FNIEMOP_DEF(iemOp_seg_SS)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg ss");
@@ -8004,59 +896,118 @@ FNIEMOP_DEF(iemOp_seg_SS)
 }
 
 
-/** Opcode 0x37. */
-FNIEMOP_STUB(iemOp_aaa);
+/**
+ * @opcode      0x37
+ * @opfltest    af,cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   pf,zf,sf,of
+ * @opgroup     og_gen_arith_dec
+ * @optest              efl&~=af ax=9      -> efl&|=nc,po,na,nz,pl,nv
+ * @optest              efl&~=af ax=0      -> efl&|=nc,po,na,zf,pl,nv
+ * @optest      intel / efl&~=af ax=0x00f0 -> ax=0x0000 efl&|=nc,po,na,zf,pl,nv
+ * @optest      amd   / efl&~=af ax=0x00f0 -> ax=0x0000 efl&|=nc,po,na,nz,pl,nv
+ * @optest              efl&~=af ax=0x00f9 -> ax=0x0009 efl&|=nc,po,na,nz,pl,nv
+ * @optest              efl|=af  ax=0      -> ax=0x0106 efl&|=cf,po,af,nz,pl,nv
+ * @optest              efl|=af  ax=0x0100 -> ax=0x0206 efl&|=cf,po,af,nz,pl,nv
+ * @optest      intel / efl|=af  ax=0x000a -> ax=0x0100 efl&|=cf,po,af,zf,pl,nv
+ * @optest      amd   / efl|=af  ax=0x000a -> ax=0x0100 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      intel / efl|=af  ax=0x010a -> ax=0x0200 efl&|=cf,po,af,zf,pl,nv
+ * @optest      amd   / efl|=af  ax=0x010a -> ax=0x0200 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      intel / efl|=af  ax=0x0f0a -> ax=0x1000 efl&|=cf,po,af,zf,pl,nv
+ * @optest      amd   / efl|=af  ax=0x0f0a -> ax=0x1000 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      intel / efl|=af  ax=0x7f0a -> ax=0x8000 efl&|=cf,po,af,zf,pl,nv
+ * @optest      amd   / efl|=af  ax=0x7f0a -> ax=0x8000 efl&|=cf,pe,af,nz,ng,ov
+ * @optest      intel / efl|=af  ax=0xff0a -> ax=0x0000 efl&|=cf,po,af,zf,pl,nv
+ * @optest      amd   / efl|=af  ax=0xff0a -> ax=0x0000 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      intel / efl&~=af ax=0xff0a -> ax=0x0000 efl&|=cf,po,af,zf,pl,nv
+ * @optest      amd   / efl&~=af ax=0xff0a -> ax=0x0000 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      intel / efl&~=af ax=0x000b -> ax=0x0101 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      amd   / efl&~=af ax=0x000b -> ax=0x0101 efl&|=cf,po,af,nz,pl,nv
+ * @optest      intel / efl&~=af ax=0x000c -> ax=0x0102 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      amd   / efl&~=af ax=0x000c -> ax=0x0102 efl&|=cf,po,af,nz,pl,nv
+ * @optest      intel / efl&~=af ax=0x000d -> ax=0x0103 efl&|=cf,po,af,nz,pl,nv
+ * @optest      amd   / efl&~=af ax=0x000d -> ax=0x0103 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      intel / efl&~=af ax=0x000e -> ax=0x0104 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      amd   / efl&~=af ax=0x000e -> ax=0x0104 efl&|=cf,po,af,nz,pl,nv
+ * @optest      intel / efl&~=af ax=0x000f -> ax=0x0105 efl&|=cf,po,af,nz,pl,nv
+ * @optest      amd   / efl&~=af ax=0x000f -> ax=0x0105 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      intel / efl&~=af ax=0x020f -> ax=0x0305 efl&|=cf,po,af,nz,pl,nv
+ * @optest      amd   / efl&~=af ax=0x020f -> ax=0x0305 efl&|=cf,pe,af,nz,pl,nv
+ */
+FNIEMOP_DEF(iemOp_aaa)
+{
+    IEMOP_MNEMONIC0(FIXED, AAA, aaa, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64, 0); /* express implicit AL/AX register use */
+    IEMOP_HLP_NO_64BIT();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF);
+
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_aaa);
+}
 
 
-/** Opcode 0x38. */
+/**
+ * @opcode      0x38
+ */
 FNIEMOP_DEF(iemOp_cmp_Eb_Gb)
 {
-    IEMOP_MNEMONIC("cmp Eb,Gb");
+    IEMOP_MNEMONIC(cmp_Eb_Gb, "cmp Eb,Gb");
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_cmp);
 }
 
 
-/** Opcode 0x39. */
+/**
+ * @opcode      0x39
+ */
 FNIEMOP_DEF(iemOp_cmp_Ev_Gv)
 {
-    IEMOP_MNEMONIC("cmp Ev,Gv");
+    IEMOP_MNEMONIC(cmp_Ev_Gv, "cmp Ev,Gv");
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_cmp);
 }
 
 
-/** Opcode 0x3a. */
+/**
+ * @opcode      0x3a
+ */
 FNIEMOP_DEF(iemOp_cmp_Gb_Eb)
 {
-    IEMOP_MNEMONIC("cmp Gb,Eb");
+    IEMOP_MNEMONIC(cmp_Gb_Eb, "cmp Gb,Eb");
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_cmp);
 }
 
 
-/** Opcode 0x3b. */
+/**
+ * @opcode      0x3b
+ */
 FNIEMOP_DEF(iemOp_cmp_Gv_Ev)
 {
-    IEMOP_MNEMONIC("cmp Gv,Ev");
+    IEMOP_MNEMONIC(cmp_Gv_Ev, "cmp Gv,Ev");
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_cmp);
 }
 
 
-/** Opcode 0x3c. */
+/**
+ * @opcode      0x3c
+ */
 FNIEMOP_DEF(iemOp_cmp_Al_Ib)
 {
-    IEMOP_MNEMONIC("cmp al,Ib");
+    IEMOP_MNEMONIC(cmp_al_Ib, "cmp al,Ib");
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_cmp);
 }
 
 
-/** Opcode 0x3d. */
+/**
+ * @opcode      0x3d
+ */
 FNIEMOP_DEF(iemOp_cmp_eAX_Iz)
 {
-    IEMOP_MNEMONIC("cmp rAX,Iz");
+    IEMOP_MNEMONIC(cmp_rAX_Iz, "cmp rAX,Iz");
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_cmp);
 }
 
 
-/** Opcode 0x3e. */
+/**
+ * @opcode      0x3e
+ */
 FNIEMOP_DEF(iemOp_seg_DS)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg ds");
@@ -8068,8 +1019,58 @@ FNIEMOP_DEF(iemOp_seg_DS)
 }
 
 
-/** Opcode 0x3f. */
-FNIEMOP_STUB(iemOp_aas);
+/**
+ * @opcode      0x3f
+ * @opfltest    af,cf
+ * @opflmodify  cf,pf,af,zf,sf,of
+ * @opflundef   pf,zf,sf,of
+ * @opgroup     og_gen_arith_dec
+ * @optest            / efl&~=af ax=0x0009 -> efl&|=nc,po,na,nz,pl,nv
+ * @optest            / efl&~=af ax=0x0000 -> efl&|=nc,po,na,zf,pl,nv
+ * @optest      intel / efl&~=af ax=0x00f0 -> ax=0x0000 efl&|=nc,po,na,zf,pl,nv
+ * @optest      amd   / efl&~=af ax=0x00f0 -> ax=0x0000 efl&|=nc,po,na,nz,pl,nv
+ * @optest            / efl&~=af ax=0x00f9 -> ax=0x0009 efl&|=nc,po,na,nz,pl,nv
+ * @optest      intel / efl|=af  ax=0x0000 -> ax=0xfe0a efl&|=cf,po,af,nz,pl,nv
+ * @optest      amd   / efl|=af  ax=0x0000 -> ax=0xfe0a efl&|=cf,po,af,nz,ng,nv
+ * @optest      intel / efl|=af  ax=0x0100 -> ax=0xff0a efl&|=cf,po,af,nz,pl,nv
+ * @optest8     amd   / efl|=af  ax=0x0100 -> ax=0xff0a efl&|=cf,po,af,nz,ng,nv
+ * @optest      intel / efl|=af  ax=0x000a -> ax=0xff04 efl&|=cf,pe,af,nz,pl,nv
+ * @optest10    amd   / efl|=af  ax=0x000a -> ax=0xff04 efl&|=cf,pe,af,nz,ng,nv
+ * @optest            / efl|=af  ax=0x010a -> ax=0x0004 efl&|=cf,pe,af,nz,pl,nv
+ * @optest            / efl|=af  ax=0x020a -> ax=0x0104 efl&|=cf,pe,af,nz,pl,nv
+ * @optest            / efl|=af  ax=0x0f0a -> ax=0x0e04 efl&|=cf,pe,af,nz,pl,nv
+ * @optest            / efl|=af  ax=0x7f0a -> ax=0x7e04 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      intel / efl|=af  ax=0xff0a -> ax=0xfe04 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      amd   / efl|=af  ax=0xff0a -> ax=0xfe04 efl&|=cf,pe,af,nz,ng,nv
+ * @optest      intel / efl&~=af ax=0xff0a -> ax=0xfe04 efl&|=cf,pe,af,nz,pl,nv
+ * @optest      amd   / efl&~=af ax=0xff0a -> ax=0xfe04 efl&|=cf,pe,af,nz,ng,nv
+ * @optest      intel / efl&~=af ax=0xff09 -> ax=0xff09 efl&|=nc,po,na,nz,pl,nv
+ * @optest      amd   / efl&~=af ax=0xff09 -> ax=0xff09 efl&|=nc,po,na,nz,ng,nv
+ * @optest      intel / efl&~=af ax=0x000b -> ax=0xff05 efl&|=cf,po,af,nz,pl,nv
+ * @optest22    amd   / efl&~=af ax=0x000b -> ax=0xff05 efl&|=cf,po,af,nz,ng,nv
+ * @optest      intel / efl&~=af ax=0x000c -> ax=0xff06 efl&|=cf,po,af,nz,pl,nv
+ * @optest24    amd   / efl&~=af ax=0x000c -> ax=0xff06 efl&|=cf,po,af,nz,ng,nv
+ * @optest      intel / efl&~=af ax=0x000d -> ax=0xff07 efl&|=cf,pe,af,nz,pl,nv
+ * @optest26    amd   / efl&~=af ax=0x000d -> ax=0xff07 efl&|=cf,pe,af,nz,ng,nv
+ * @optest      intel / efl&~=af ax=0x000e -> ax=0xff08 efl&|=cf,pe,af,nz,pl,nv
+ * @optest28    amd   / efl&~=af ax=0x000e -> ax=0xff08 efl&|=cf,pe,af,nz,ng,nv
+ * @optest      intel / efl&~=af ax=0x000f -> ax=0xff09 efl&|=cf,po,af,nz,pl,nv
+ * @optest30    amd   / efl&~=af ax=0x000f -> ax=0xff09 efl&|=cf,po,af,nz,ng,nv
+ * @optest31    intel / efl&~=af ax=0x00fa -> ax=0xff04 efl&|=cf,pe,af,nz,pl,nv
+ * @optest32    amd   / efl&~=af ax=0x00fa -> ax=0xff04 efl&|=cf,pe,af,nz,ng,nv
+ * @optest33    intel / efl&~=af ax=0xfffa -> ax=0xfe04 efl&|=cf,pe,af,nz,pl,nv
+ * @optest34    amd   / efl&~=af ax=0xfffa -> ax=0xfe04 efl&|=cf,pe,af,nz,ng,nv
+ */
+FNIEMOP_DEF(iemOp_aas)
+{
+    IEMOP_MNEMONIC0(FIXED, AAS, aas, DISOPTYPE_HARMLESS | DISOPTYPE_INVALID_64, 0); /* express implicit AL/AX register use */
+    IEMOP_HLP_NO_64BIT();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_OF);
+
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_aas);
+}
+
 
 /**
  * Common 'inc/dec/not/neg register' helper.
@@ -8117,7 +1118,9 @@ FNIEMOP_DEF_2(iemOpCommonUnaryGReg, PCIEMOPUNARYSIZES, pImpl, uint8_t, iReg)
 }
 
 
-/** Opcode 0x40. */
+/**
+ * @opcode      0x40
+ */
 FNIEMOP_DEF(iemOp_inc_eAX)
 {
     /*
@@ -8132,12 +1135,14 @@ FNIEMOP_DEF(iemOp_inc_eAX)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("inc eAX");
+    IEMOP_MNEMONIC(inc_eAX, "inc eAX");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xAX);
 }
 
 
-/** Opcode 0x41. */
+/**
+ * @opcode      0x41
+ */
 FNIEMOP_DEF(iemOp_inc_eCX)
 {
     /*
@@ -8153,12 +1158,14 @@ FNIEMOP_DEF(iemOp_inc_eCX)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("inc eCX");
+    IEMOP_MNEMONIC(inc_eCX, "inc eCX");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xCX);
 }
 
 
-/** Opcode 0x42. */
+/**
+ * @opcode      0x42
+ */
 FNIEMOP_DEF(iemOp_inc_eDX)
 {
     /*
@@ -8174,13 +1181,15 @@ FNIEMOP_DEF(iemOp_inc_eDX)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("inc eDX");
+    IEMOP_MNEMONIC(inc_eDX, "inc eDX");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xDX);
 }
 
 
 
-/** Opcode 0x43. */
+/**
+ * @opcode      0x43
+ */
 FNIEMOP_DEF(iemOp_inc_eBX)
 {
     /*
@@ -8197,12 +1206,14 @@ FNIEMOP_DEF(iemOp_inc_eBX)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("inc eBX");
+    IEMOP_MNEMONIC(inc_eBX, "inc eBX");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xBX);
 }
 
 
-/** Opcode 0x44. */
+/**
+ * @opcode      0x44
+ */
 FNIEMOP_DEF(iemOp_inc_eSP)
 {
     /*
@@ -8218,12 +1229,14 @@ FNIEMOP_DEF(iemOp_inc_eSP)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("inc eSP");
+    IEMOP_MNEMONIC(inc_eSP, "inc eSP");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xSP);
 }
 
 
-/** Opcode 0x45. */
+/**
+ * @opcode      0x45
+ */
 FNIEMOP_DEF(iemOp_inc_eBP)
 {
     /*
@@ -8240,12 +1253,14 @@ FNIEMOP_DEF(iemOp_inc_eBP)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("inc eBP");
+    IEMOP_MNEMONIC(inc_eBP, "inc eBP");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xBP);
 }
 
 
-/** Opcode 0x46. */
+/**
+ * @opcode      0x46
+ */
 FNIEMOP_DEF(iemOp_inc_eSI)
 {
     /*
@@ -8262,12 +1277,14 @@ FNIEMOP_DEF(iemOp_inc_eSI)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("inc eSI");
+    IEMOP_MNEMONIC(inc_eSI, "inc eSI");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xSI);
 }
 
 
-/** Opcode 0x47. */
+/**
+ * @opcode      0x47
+ */
 FNIEMOP_DEF(iemOp_inc_eDI)
 {
     /*
@@ -8285,12 +1302,14 @@ FNIEMOP_DEF(iemOp_inc_eDI)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("inc eDI");
+    IEMOP_MNEMONIC(inc_eDI, "inc eDI");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xDI);
 }
 
 
-/** Opcode 0x48. */
+/**
+ * @opcode      0x48
+ */
 FNIEMOP_DEF(iemOp_dec_eAX)
 {
     /*
@@ -8306,12 +1325,14 @@ FNIEMOP_DEF(iemOp_dec_eAX)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("dec eAX");
+    IEMOP_MNEMONIC(dec_eAX, "dec eAX");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xAX);
 }
 
 
-/** Opcode 0x49. */
+/**
+ * @opcode      0x49
+ */
 FNIEMOP_DEF(iemOp_dec_eCX)
 {
     /*
@@ -8328,12 +1349,14 @@ FNIEMOP_DEF(iemOp_dec_eCX)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("dec eCX");
+    IEMOP_MNEMONIC(dec_eCX, "dec eCX");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xCX);
 }
 
 
-/** Opcode 0x4a. */
+/**
+ * @opcode      0x4a
+ */
 FNIEMOP_DEF(iemOp_dec_eDX)
 {
     /*
@@ -8350,12 +1373,14 @@ FNIEMOP_DEF(iemOp_dec_eDX)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("dec eDX");
+    IEMOP_MNEMONIC(dec_eDX, "dec eDX");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xDX);
 }
 
 
-/** Opcode 0x4b. */
+/**
+ * @opcode      0x4b
+ */
 FNIEMOP_DEF(iemOp_dec_eBX)
 {
     /*
@@ -8373,12 +1398,14 @@ FNIEMOP_DEF(iemOp_dec_eBX)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("dec eBX");
+    IEMOP_MNEMONIC(dec_eBX, "dec eBX");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xBX);
 }
 
 
-/** Opcode 0x4c. */
+/**
+ * @opcode      0x4c
+ */
 FNIEMOP_DEF(iemOp_dec_eSP)
 {
     /*
@@ -8395,12 +1422,14 @@ FNIEMOP_DEF(iemOp_dec_eSP)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("dec eSP");
+    IEMOP_MNEMONIC(dec_eSP, "dec eSP");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xSP);
 }
 
 
-/** Opcode 0x4d. */
+/**
+ * @opcode      0x4d
+ */
 FNIEMOP_DEF(iemOp_dec_eBP)
 {
     /*
@@ -8418,12 +1447,14 @@ FNIEMOP_DEF(iemOp_dec_eBP)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("dec eBP");
+    IEMOP_MNEMONIC(dec_eBP, "dec eBP");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xBP);
 }
 
 
-/** Opcode 0x4e. */
+/**
+ * @opcode      0x4e
+ */
 FNIEMOP_DEF(iemOp_dec_eSI)
 {
     /*
@@ -8441,12 +1472,14 @@ FNIEMOP_DEF(iemOp_dec_eSI)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("dec eSI");
+    IEMOP_MNEMONIC(dec_eSI, "dec eSI");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xSI);
 }
 
 
-/** Opcode 0x4f. */
+/**
+ * @opcode      0x4f
+ */
 FNIEMOP_DEF(iemOp_dec_eDI)
 {
     /*
@@ -8465,7 +1498,7 @@ FNIEMOP_DEF(iemOp_dec_eDI)
         return FNIEMOP_CALL(g_apfnOneByteMap[b]);
     }
 
-    IEMOP_MNEMONIC("dec eDI");
+    IEMOP_MNEMONIC(dec_eDI, "dec eDI");
     return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xDI);
 }
 
@@ -8517,42 +1550,52 @@ FNIEMOP_DEF_1(iemOpCommonPushGReg, uint8_t, iReg)
 }
 
 
-/** Opcode 0x50. */
+/**
+ * @opcode      0x50
+ */
 FNIEMOP_DEF(iemOp_push_eAX)
 {
-    IEMOP_MNEMONIC("push rAX");
+    IEMOP_MNEMONIC(push_rAX, "push rAX");
     return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xAX);
 }
 
 
-/** Opcode 0x51. */
+/**
+ * @opcode      0x51
+ */
 FNIEMOP_DEF(iemOp_push_eCX)
 {
-    IEMOP_MNEMONIC("push rCX");
+    IEMOP_MNEMONIC(push_rCX, "push rCX");
     return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xCX);
 }
 
 
-/** Opcode 0x52. */
+/**
+ * @opcode      0x52
+ */
 FNIEMOP_DEF(iemOp_push_eDX)
 {
-    IEMOP_MNEMONIC("push rDX");
+    IEMOP_MNEMONIC(push_rDX, "push rDX");
     return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xDX);
 }
 
 
-/** Opcode 0x53. */
+/**
+ * @opcode      0x53
+ */
 FNIEMOP_DEF(iemOp_push_eBX)
 {
-    IEMOP_MNEMONIC("push rBX");
+    IEMOP_MNEMONIC(push_rBX, "push rBX");
     return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xBX);
 }
 
 
-/** Opcode 0x54. */
+/**
+ * @opcode      0x54
+ */
 FNIEMOP_DEF(iemOp_push_eSP)
 {
-    IEMOP_MNEMONIC("push rSP");
+    IEMOP_MNEMONIC(push_rSP, "push rSP");
     if (IEM_GET_TARGET_CPU(pVCpu) == IEMTARGETCPU_8086)
     {
         IEM_MC_BEGIN(0, 1);
@@ -8567,26 +1610,32 @@ FNIEMOP_DEF(iemOp_push_eSP)
 }
 
 
-/** Opcode 0x55. */
+/**
+ * @opcode      0x55
+ */
 FNIEMOP_DEF(iemOp_push_eBP)
 {
-    IEMOP_MNEMONIC("push rBP");
+    IEMOP_MNEMONIC(push_rBP, "push rBP");
     return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xBP);
 }
 
 
-/** Opcode 0x56. */
+/**
+ * @opcode      0x56
+ */
 FNIEMOP_DEF(iemOp_push_eSI)
 {
-    IEMOP_MNEMONIC("push rSI");
+    IEMOP_MNEMONIC(push_rSI, "push rSI");
     return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xSI);
 }
 
 
-/** Opcode 0x57. */
+/**
+ * @opcode      0x57
+ */
 FNIEMOP_DEF(iemOp_push_eDI)
 {
-    IEMOP_MNEMONIC("push rDI");
+    IEMOP_MNEMONIC(push_rDI, "push rDI");
     return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xDI);
 }
 
@@ -8639,42 +1688,52 @@ FNIEMOP_DEF_1(iemOpCommonPopGReg, uint8_t, iReg)
 }
 
 
-/** Opcode 0x58. */
+/**
+ * @opcode      0x58
+ */
 FNIEMOP_DEF(iemOp_pop_eAX)
 {
-    IEMOP_MNEMONIC("pop rAX");
+    IEMOP_MNEMONIC(pop_rAX, "pop rAX");
     return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xAX);
 }
 
 
-/** Opcode 0x59. */
+/**
+ * @opcode      0x59
+ */
 FNIEMOP_DEF(iemOp_pop_eCX)
 {
-    IEMOP_MNEMONIC("pop rCX");
+    IEMOP_MNEMONIC(pop_rCX, "pop rCX");
     return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xCX);
 }
 
 
-/** Opcode 0x5a. */
+/**
+ * @opcode      0x5a
+ */
 FNIEMOP_DEF(iemOp_pop_eDX)
 {
-    IEMOP_MNEMONIC("pop rDX");
+    IEMOP_MNEMONIC(pop_rDX, "pop rDX");
     return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xDX);
 }
 
 
-/** Opcode 0x5b. */
+/**
+ * @opcode      0x5b
+ */
 FNIEMOP_DEF(iemOp_pop_eBX)
 {
-    IEMOP_MNEMONIC("pop rBX");
+    IEMOP_MNEMONIC(pop_rBX, "pop rBX");
     return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xBX);
 }
 
 
-/** Opcode 0x5c. */
+/**
+ * @opcode      0x5c
+ */
 FNIEMOP_DEF(iemOp_pop_eSP)
 {
-    IEMOP_MNEMONIC("pop rSP");
+    IEMOP_MNEMONIC(pop_rSP, "pop rSP");
     if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
     {
         if (pVCpu->iem.s.uRexB)
@@ -8720,34 +1779,42 @@ FNIEMOP_DEF(iemOp_pop_eSP)
 }
 
 
-/** Opcode 0x5d. */
+/**
+ * @opcode      0x5d
+ */
 FNIEMOP_DEF(iemOp_pop_eBP)
 {
-    IEMOP_MNEMONIC("pop rBP");
+    IEMOP_MNEMONIC(pop_rBP, "pop rBP");
     return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xBP);
 }
 
 
-/** Opcode 0x5e. */
+/**
+ * @opcode      0x5e
+ */
 FNIEMOP_DEF(iemOp_pop_eSI)
 {
-    IEMOP_MNEMONIC("pop rSI");
+    IEMOP_MNEMONIC(pop_rSI, "pop rSI");
     return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xSI);
 }
 
 
-/** Opcode 0x5f. */
+/**
+ * @opcode      0x5f
+ */
 FNIEMOP_DEF(iemOp_pop_eDI)
 {
-    IEMOP_MNEMONIC("pop rDI");
+    IEMOP_MNEMONIC(pop_rDI, "pop rDI");
     return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xDI);
 }
 
 
-/** Opcode 0x60. */
+/**
+ * @opcode      0x60
+ */
 FNIEMOP_DEF(iemOp_pusha)
 {
-    IEMOP_MNEMONIC("pusha");
+    IEMOP_MNEMONIC(pusha, "pusha");
     IEMOP_HLP_MIN_186();
     IEMOP_HLP_NO_64BIT();
     if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
@@ -8757,28 +1824,172 @@ FNIEMOP_DEF(iemOp_pusha)
 }
 
 
-/** Opcode 0x61. */
-FNIEMOP_DEF(iemOp_popa)
+/**
+ * @opcode      0x61
+ */
+FNIEMOP_DEF(iemOp_popa__mvex)
 {
-    IEMOP_MNEMONIC("popa");
-    IEMOP_HLP_MIN_186();
-    IEMOP_HLP_NO_64BIT();
-    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
-        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_popa_16);
-    Assert(pVCpu->iem.s.enmEffOpSize == IEMMODE_32BIT);
-    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_popa_32);
+    if (pVCpu->iem.s.enmCpuMode != IEMMODE_64BIT)
+    {
+        IEMOP_MNEMONIC(popa, "popa");
+        IEMOP_HLP_MIN_186();
+        IEMOP_HLP_NO_64BIT();
+        if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+            return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_popa_16);
+        Assert(pVCpu->iem.s.enmEffOpSize == IEMMODE_32BIT);
+        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_popa_32);
+    }
+    IEMOP_MNEMONIC(mvex, "mvex");
+    Log(("mvex prefix is not supported!\n"));
+    return IEMOP_RAISE_INVALID_OPCODE();
 }
 
 
-/** Opcode 0x62. */
-FNIEMOP_STUB(iemOp_bound_Gv_Ma_evex);
-//    IEMOP_HLP_MIN_186();
+/**
+ * @opcode      0x62
+ * @opmnemonic  bound
+ * @op1         Gv
+ * @op2         Ma
+ * @opmincpu    80186
+ * @ophints     harmless invalid_64
+ * @optest      op1=0 op2=0 ->
+ * @optest      op1=1 op2=0 -> value.xcpt=5
+ * @optest      o16 / op1=0xffff op2=0x0000fffe ->
+ * @optest      o16 / op1=0xfffe op2=0x0000fffe ->
+ * @optest      o16 / op1=0x7fff op2=0x0000fffe -> value.xcpt=5
+ * @optest      o16 / op1=0x7fff op2=0x7ffffffe ->
+ * @optest      o16 / op1=0x7fff op2=0xfffe8000 -> value.xcpt=5
+ * @optest      o16 / op1=0x8000 op2=0xfffe8000 ->
+ * @optest      o16 / op1=0xffff op2=0xfffe8000 -> value.xcpt=5
+ * @optest      o16 / op1=0xfffe op2=0xfffe8000 ->
+ * @optest      o16 / op1=0xfffe op2=0x8000fffe -> value.xcpt=5
+ * @optest      o16 / op1=0x8000 op2=0x8000fffe -> value.xcpt=5
+ * @optest      o16 / op1=0x0000 op2=0x8000fffe -> value.xcpt=5
+ * @optest      o16 / op1=0x0001 op2=0x8000fffe -> value.xcpt=5
+ * @optest      o16 / op1=0xffff op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x0000 op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x0001 op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x0002 op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x0003 op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x0004 op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x000e op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x000f op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x0010 op2=0x0001000f -> value.xcpt=5
+ * @optest      o16 / op1=0x0011 op2=0x0001000f -> value.xcpt=5
+ * @optest      o32 / op1=0xffffffff op2=0x00000000fffffffe ->
+ * @optest      o32 / op1=0xfffffffe op2=0x00000000fffffffe ->
+ * @optest      o32 / op1=0x7fffffff op2=0x00000000fffffffe -> value.xcpt=5
+ * @optest      o32 / op1=0x7fffffff op2=0x7ffffffffffffffe ->
+ * @optest      o32 / op1=0x7fffffff op2=0xfffffffe80000000 -> value.xcpt=5
+ * @optest      o32 / op1=0x80000000 op2=0xfffffffe80000000 ->
+ * @optest      o32 / op1=0xffffffff op2=0xfffffffe80000000 -> value.xcpt=5
+ * @optest      o32 / op1=0xfffffffe op2=0xfffffffe80000000 ->
+ * @optest      o32 / op1=0xfffffffe op2=0x80000000fffffffe -> value.xcpt=5
+ * @optest      o32 / op1=0x80000000 op2=0x80000000fffffffe -> value.xcpt=5
+ * @optest      o32 / op1=0x00000000 op2=0x80000000fffffffe -> value.xcpt=5
+ * @optest      o32 / op1=0x00000002 op2=0x80000000fffffffe -> value.xcpt=5
+ * @optest      o32 / op1=0x00000001 op2=0x0000000100000003 -> value.xcpt=5
+ * @optest      o32 / op1=0x00000002 op2=0x0000000100000003 -> value.xcpt=5
+ * @optest      o32 / op1=0x00000003 op2=0x0000000100000003 -> value.xcpt=5
+ * @optest      o32 / op1=0x00000004 op2=0x0000000100000003 -> value.xcpt=5
+ * @optest      o32 / op1=0x00000005 op2=0x0000000100000003 -> value.xcpt=5
+ * @optest      o32 / op1=0x0000000e op2=0x0000000100000003 -> value.xcpt=5
+ * @optest      o32 / op1=0x0000000f op2=0x0000000100000003 -> value.xcpt=5
+ * @optest      o32 / op1=0x00000010 op2=0x0000000100000003 -> value.xcpt=5
+ */
+FNIEMOP_DEF(iemOp_bound_Gv_Ma__evex)
+{
+    /* The BOUND instruction is invalid 64-bit mode. In legacy and
+       compatability mode it is invalid with MOD=3.
+
+       In 32-bit mode, the EVEX prefix works by having the top two bits (MOD)
+       both be set.  In the Intel EVEX documentation (sdm vol 2) these are simply
+       given as R and X without an exact description, so we assume it builds on
+       the VEX one and means they are inverted wrt REX.R and REX.X.  Thus, just
+       like with the 3-byte VEX, 32-bit code is restrict wrt addressable registers. */
+    uint8_t bRm;
+    if (pVCpu->iem.s.enmCpuMode != IEMMODE_64BIT)
+    {
+        IEMOP_MNEMONIC2(RM_MEM, BOUND, bound, Gv, Ma, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+        IEMOP_HLP_MIN_186();
+        IEM_OPCODE_GET_NEXT_U8(&bRm);
+        if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+        {
+            /** @todo testcase: check that there are two memory accesses involved.  Check
+             *        whether they're both read before the \#BR triggers. */
+            if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+            {
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t,    u16Index,       0); /* Note! All operands are actually signed. Lazy unsigned bird. */
+                IEM_MC_ARG(uint16_t,    u16LowerBounds, 1);
+                IEM_MC_ARG(uint16_t,    u16UpperBounds, 2);
+                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+                IEM_MC_FETCH_GREG_U16(u16Index, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+                IEM_MC_FETCH_MEM_U16(u16LowerBounds, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+                IEM_MC_FETCH_MEM_U16_DISP(u16UpperBounds, pVCpu->iem.s.iEffSeg, GCPtrEffSrc, 2);
+
+                IEM_MC_CALL_CIMPL_3(iemCImpl_bound_16, u16Index, u16LowerBounds, u16UpperBounds); /* returns */
+                IEM_MC_END();
+            }
+            else /* 32-bit operands */
+            {
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint32_t,    u32Index,       0); /* Note! All operands are actually signed. Lazy unsigned bird. */
+                IEM_MC_ARG(uint32_t,    u32LowerBounds, 1);
+                IEM_MC_ARG(uint32_t,    u32UpperBounds, 2);
+                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+                IEM_MC_FETCH_GREG_U32(u32Index, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+                IEM_MC_FETCH_MEM_U32(u32LowerBounds, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+                IEM_MC_FETCH_MEM_U32_DISP(u32UpperBounds, pVCpu->iem.s.iEffSeg, GCPtrEffSrc, 4);
+
+                IEM_MC_CALL_CIMPL_3(iemCImpl_bound_32, u32Index, u32LowerBounds, u32UpperBounds); /* returns */
+                IEM_MC_END();
+            }
+        }
+
+        /*
+         * @opdone
+         */
+        if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fAvx512Foundation)
+        {
+            /* Note that there is no need for the CPU to fetch further bytes
+               here because MODRM.MOD == 3. */
+            Log(("evex not supported by the guest CPU!\n"));
+            return IEMOP_RAISE_INVALID_OPCODE();
+        }
+    }
+    else
+    {
+        /** @todo check how this is decoded in 64-bit mode w/o EVEX. Intel probably
+         *        does modr/m read, whereas AMD probably doesn't... */
+        if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fAvx512Foundation)
+        {
+            Log(("evex not supported by the guest CPU!\n"));
+            return FNIEMOP_CALL(iemOp_InvalidAllNeedRM);
+        }
+        IEM_OPCODE_GET_NEXT_U8(&bRm);
+    }
+
+    IEMOP_MNEMONIC(evex, "evex");
+    uint8_t bP2; IEM_OPCODE_GET_NEXT_U8(&bP2);
+    uint8_t bP3; IEM_OPCODE_GET_NEXT_U8(&bP3);
+    Log(("evex prefix is not implemented!\n"));
+    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+}
 
 
 /** Opcode 0x63 - non-64-bit modes. */
 FNIEMOP_DEF(iemOp_arpl_Ew_Gw)
 {
-    IEMOP_MNEMONIC("arpl Ew,Gw");
+    IEMOP_MNEMONIC(arpl_Ew_Gw, "arpl Ew,Gw");
     IEMOP_HLP_MIN_286();
     IEMOP_HLP_NO_REAL_OR_V86_MODE();
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -8826,7 +2037,9 @@ FNIEMOP_DEF(iemOp_arpl_Ew_Gw)
 }
 
 
-/** Opcode 0x63.
+/**
+ * @opcode 0x63
+ *
  * @note This is a weird one. It works like a regular move instruction if
  *       REX.W isn't set, at least according to AMD docs (rev 3.15, 2009-11).
  * @todo This definitely needs a testcase to verify the odd cases.  */
@@ -8834,7 +2047,7 @@ FNIEMOP_DEF(iemOp_movsxd_Gv_Ev)
 {
     Assert(pVCpu->iem.s.enmEffOpSize == IEMMODE_64BIT); /* Caller branched already . */
 
-    IEMOP_MNEMONIC("movsxd Gv,Ev");
+    IEMOP_MNEMONIC(movsxd_Gv_Ev, "movsxd Gv,Ev");
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
 
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
@@ -8869,7 +2082,12 @@ FNIEMOP_DEF(iemOp_movsxd_Gv_Ev)
 }
 
 
-/** Opcode 0x64. */
+/**
+ * @opcode      0x64
+ * @opmnemonic  segfs
+ * @opmincpu    80386
+ * @opgroup     og_prefixes
+ */
 FNIEMOP_DEF(iemOp_seg_FS)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg fs");
@@ -8883,7 +2101,12 @@ FNIEMOP_DEF(iemOp_seg_FS)
 }
 
 
-/** Opcode 0x65. */
+/**
+ * @opcode      0x65
+ * @opmnemonic  seggs
+ * @opmincpu    80386
+ * @opgroup     og_prefixes
+ */
 FNIEMOP_DEF(iemOp_seg_GS)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("seg gs");
@@ -8897,7 +2120,14 @@ FNIEMOP_DEF(iemOp_seg_GS)
 }
 
 
-/** Opcode 0x66. */
+/**
+ * @opcode      0x66
+ * @opmnemonic  opsize
+ * @openc       prefix
+ * @opmincpu    80386
+ * @ophints     harmless
+ * @opgroup     og_prefixes
+ */
 FNIEMOP_DEF(iemOp_op_size)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("op size");
@@ -8906,12 +2136,24 @@ FNIEMOP_DEF(iemOp_op_size)
     pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SIZE_OP;
     iemRecalEffOpSize(pVCpu);
 
+    /* For the 4 entry opcode tables, the operand prefix doesn't not count
+       when REPZ or REPNZ are present. */
+    if (pVCpu->iem.s.idxPrefix == 0)
+        pVCpu->iem.s.idxPrefix = 1;
+
     uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
     return FNIEMOP_CALL(g_apfnOneByteMap[b]);
 }
 
 
-/** Opcode 0x67. */
+/**
+ * @opcode      0x67
+ * @opmnemonic  addrsize
+ * @openc       prefix
+ * @opmincpu    80386
+ * @ophints     harmless
+ * @opgroup     og_prefixes
+ */
 FNIEMOP_DEF(iemOp_addr_size)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("addr size");
@@ -8931,10 +2173,12 @@ FNIEMOP_DEF(iemOp_addr_size)
 }
 
 
-/** Opcode 0x68. */
+/**
+ * @opcode      0x68
+ */
 FNIEMOP_DEF(iemOp_push_Iz)
 {
-    IEMOP_MNEMONIC("push Iz");
+    IEMOP_MNEMONIC(push_Iz, "push Iz");
     IEMOP_HLP_MIN_186();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     switch (pVCpu->iem.s.enmEffOpSize)
@@ -8977,10 +2221,12 @@ FNIEMOP_DEF(iemOp_push_Iz)
 }
 
 
-/** Opcode 0x69. */
+/**
+ * @opcode      0x69
+ */
 FNIEMOP_DEF(iemOp_imul_Gv_Ev_Iz)
 {
-    IEMOP_MNEMONIC("imul Gv,Ev,Iz"); /* Gv = Ev * Iz; */
+    IEMOP_MNEMONIC(imul_Gv_Ev_Iz, "imul Gv,Ev,Iz"); /* Gv = Ev * Iz; */
     IEMOP_HLP_MIN_186();
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
@@ -9138,10 +2384,12 @@ FNIEMOP_DEF(iemOp_imul_Gv_Ev_Iz)
 }
 
 
-/** Opcode 0x6a. */
+/**
+ * @opcode      0x6a
+ */
 FNIEMOP_DEF(iemOp_push_Ib)
 {
-    IEMOP_MNEMONIC("push Ib");
+    IEMOP_MNEMONIC(push_Ib, "push Ib");
     IEMOP_HLP_MIN_186();
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -9166,10 +2414,12 @@ FNIEMOP_DEF(iemOp_push_Ib)
 }
 
 
-/** Opcode 0x6b. */
+/**
+ * @opcode      0x6b
+ */
 FNIEMOP_DEF(iemOp_imul_Gv_Ev_Ib)
 {
-    IEMOP_MNEMONIC("imul Gv,Ev,Ib"); /* Gv = Ev * Iz; */
+    IEMOP_MNEMONIC(imul_Gv_Ev_Ib, "imul Gv,Ev,Ib"); /* Gv = Ev * Iz; */
     IEMOP_HLP_MIN_186();
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
@@ -9321,14 +2571,16 @@ FNIEMOP_DEF(iemOp_imul_Gv_Ev_Ib)
 }
 
 
-/** Opcode 0x6c. */
+/**
+ * @opcode      0x6c
+ */
 FNIEMOP_DEF(iemOp_insb_Yb_DX)
 {
     IEMOP_HLP_MIN_186();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
-        IEMOP_MNEMONIC("rep ins Yb,DX");
+        IEMOP_MNEMONIC(rep_insb_Yb_DX, "rep ins Yb,DX");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_ins_op8_addr16, false);
@@ -9339,7 +2591,7 @@ FNIEMOP_DEF(iemOp_insb_Yb_DX)
     }
     else
     {
-        IEMOP_MNEMONIC("ins Yb,DX");
+        IEMOP_MNEMONIC(ins_Yb_DX, "ins Yb,DX");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_ins_op8_addr16, false);
@@ -9351,14 +2603,16 @@ FNIEMOP_DEF(iemOp_insb_Yb_DX)
 }
 
 
-/** Opcode 0x6d. */
+/**
+ * @opcode      0x6d
+ */
 FNIEMOP_DEF(iemOp_inswd_Yv_DX)
 {
     IEMOP_HLP_MIN_186();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
     {
-        IEMOP_MNEMONIC("rep ins Yv,DX");
+        IEMOP_MNEMONIC(rep_ins_Yv_DX, "rep ins Yv,DX");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -9385,7 +2639,7 @@ FNIEMOP_DEF(iemOp_inswd_Yv_DX)
     }
     else
     {
-        IEMOP_MNEMONIC("ins Yv,DX");
+        IEMOP_MNEMONIC(ins_Yv_DX, "ins Yv,DX");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -9413,14 +2667,16 @@ FNIEMOP_DEF(iemOp_inswd_Yv_DX)
 }
 
 
-/** Opcode 0x6e. */
+/**
+ * @opcode      0x6e
+ */
 FNIEMOP_DEF(iemOp_outsb_Yb_DX)
 {
     IEMOP_HLP_MIN_186();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
-        IEMOP_MNEMONIC("rep outs DX,Yb");
+        IEMOP_MNEMONIC(rep_outsb_DX_Yb, "rep outs DX,Yb");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_rep_outs_op8_addr16, pVCpu->iem.s.iEffSeg, false);
@@ -9431,7 +2687,7 @@ FNIEMOP_DEF(iemOp_outsb_Yb_DX)
     }
     else
     {
-        IEMOP_MNEMONIC("outs DX,Yb");
+        IEMOP_MNEMONIC(outs_DX_Yb, "outs DX,Yb");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_outs_op8_addr16, pVCpu->iem.s.iEffSeg, false);
@@ -9443,14 +2699,16 @@ FNIEMOP_DEF(iemOp_outsb_Yb_DX)
 }
 
 
-/** Opcode 0x6f. */
+/**
+ * @opcode      0x6f
+ */
 FNIEMOP_DEF(iemOp_outswd_Yv_DX)
 {
     IEMOP_HLP_MIN_186();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
     {
-        IEMOP_MNEMONIC("rep outs DX,Yv");
+        IEMOP_MNEMONIC(rep_outs_DX_Yv, "rep outs DX,Yv");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -9477,7 +2735,7 @@ FNIEMOP_DEF(iemOp_outswd_Yv_DX)
     }
     else
     {
-        IEMOP_MNEMONIC("outs DX,Yv");
+        IEMOP_MNEMONIC(outs_DX_Yv, "outs DX,Yv");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -9505,10 +2763,12 @@ FNIEMOP_DEF(iemOp_outswd_Yv_DX)
 }
 
 
-/** Opcode 0x70. */
+/**
+ * @opcode      0x70
+ */
 FNIEMOP_DEF(iemOp_jo_Jb)
 {
-    IEMOP_MNEMONIC("jo  Jb");
+    IEMOP_MNEMONIC(jo_Jb, "jo  Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9524,10 +2784,12 @@ FNIEMOP_DEF(iemOp_jo_Jb)
 }
 
 
-/** Opcode 0x71. */
+/**
+ * @opcode      0x71
+ */
 FNIEMOP_DEF(iemOp_jno_Jb)
 {
-    IEMOP_MNEMONIC("jno Jb");
+    IEMOP_MNEMONIC(jno_Jb, "jno Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9542,10 +2804,12 @@ FNIEMOP_DEF(iemOp_jno_Jb)
     return VINF_SUCCESS;
 }
 
-/** Opcode 0x72. */
+/**
+ * @opcode      0x72
+ */
 FNIEMOP_DEF(iemOp_jc_Jb)
 {
-    IEMOP_MNEMONIC("jc/jnae Jb");
+    IEMOP_MNEMONIC(jc_Jb, "jc/jnae Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9561,10 +2825,12 @@ FNIEMOP_DEF(iemOp_jc_Jb)
 }
 
 
-/** Opcode 0x73. */
+/**
+ * @opcode      0x73
+ */
 FNIEMOP_DEF(iemOp_jnc_Jb)
 {
-    IEMOP_MNEMONIC("jnc/jnb Jb");
+    IEMOP_MNEMONIC(jnc_Jb, "jnc/jnb Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9580,10 +2846,12 @@ FNIEMOP_DEF(iemOp_jnc_Jb)
 }
 
 
-/** Opcode 0x74. */
+/**
+ * @opcode      0x74
+ */
 FNIEMOP_DEF(iemOp_je_Jb)
 {
-    IEMOP_MNEMONIC("je/jz   Jb");
+    IEMOP_MNEMONIC(je_Jb, "je/jz   Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9599,10 +2867,12 @@ FNIEMOP_DEF(iemOp_je_Jb)
 }
 
 
-/** Opcode 0x75. */
+/**
+ * @opcode      0x75
+ */
 FNIEMOP_DEF(iemOp_jne_Jb)
 {
-    IEMOP_MNEMONIC("jne/jnz Jb");
+    IEMOP_MNEMONIC(jne_Jb, "jne/jnz Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9618,10 +2888,12 @@ FNIEMOP_DEF(iemOp_jne_Jb)
 }
 
 
-/** Opcode 0x76. */
+/**
+ * @opcode      0x76
+ */
 FNIEMOP_DEF(iemOp_jbe_Jb)
 {
-    IEMOP_MNEMONIC("jbe/jna Jb");
+    IEMOP_MNEMONIC(jbe_Jb, "jbe/jna Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9637,10 +2909,12 @@ FNIEMOP_DEF(iemOp_jbe_Jb)
 }
 
 
-/** Opcode 0x77. */
+/**
+ * @opcode      0x77
+ */
 FNIEMOP_DEF(iemOp_jnbe_Jb)
 {
-    IEMOP_MNEMONIC("jnbe/ja Jb");
+    IEMOP_MNEMONIC(ja_Jb, "ja/jnbe Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9656,10 +2930,12 @@ FNIEMOP_DEF(iemOp_jnbe_Jb)
 }
 
 
-/** Opcode 0x78. */
+/**
+ * @opcode      0x78
+ */
 FNIEMOP_DEF(iemOp_js_Jb)
 {
-    IEMOP_MNEMONIC("js  Jb");
+    IEMOP_MNEMONIC(js_Jb, "js  Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9675,10 +2951,12 @@ FNIEMOP_DEF(iemOp_js_Jb)
 }
 
 
-/** Opcode 0x79. */
+/**
+ * @opcode      0x79
+ */
 FNIEMOP_DEF(iemOp_jns_Jb)
 {
-    IEMOP_MNEMONIC("jns Jb");
+    IEMOP_MNEMONIC(jns_Jb, "jns Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9694,10 +2972,12 @@ FNIEMOP_DEF(iemOp_jns_Jb)
 }
 
 
-/** Opcode 0x7a. */
+/**
+ * @opcode      0x7a
+ */
 FNIEMOP_DEF(iemOp_jp_Jb)
 {
-    IEMOP_MNEMONIC("jp  Jb");
+    IEMOP_MNEMONIC(jp_Jb, "jp  Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9713,10 +2993,12 @@ FNIEMOP_DEF(iemOp_jp_Jb)
 }
 
 
-/** Opcode 0x7b. */
+/**
+ * @opcode      0x7b
+ */
 FNIEMOP_DEF(iemOp_jnp_Jb)
 {
-    IEMOP_MNEMONIC("jnp Jb");
+    IEMOP_MNEMONIC(jnp_Jb, "jnp Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9732,10 +3014,12 @@ FNIEMOP_DEF(iemOp_jnp_Jb)
 }
 
 
-/** Opcode 0x7c. */
+/**
+ * @opcode      0x7c
+ */
 FNIEMOP_DEF(iemOp_jl_Jb)
 {
-    IEMOP_MNEMONIC("jl/jnge Jb");
+    IEMOP_MNEMONIC(jl_Jb, "jl/jnge Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9751,10 +3035,12 @@ FNIEMOP_DEF(iemOp_jl_Jb)
 }
 
 
-/** Opcode 0x7d. */
+/**
+ * @opcode      0x7d
+ */
 FNIEMOP_DEF(iemOp_jnl_Jb)
 {
-    IEMOP_MNEMONIC("jnl/jge Jb");
+    IEMOP_MNEMONIC(jge_Jb, "jnl/jge Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9770,10 +3056,12 @@ FNIEMOP_DEF(iemOp_jnl_Jb)
 }
 
 
-/** Opcode 0x7e. */
+/**
+ * @opcode      0x7e
+ */
 FNIEMOP_DEF(iemOp_jle_Jb)
 {
-    IEMOP_MNEMONIC("jle/jng Jb");
+    IEMOP_MNEMONIC(jle_Jb, "jle/jng Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9789,10 +3077,12 @@ FNIEMOP_DEF(iemOp_jle_Jb)
 }
 
 
-/** Opcode 0x7f. */
+/**
+ * @opcode      0x7f
+ */
 FNIEMOP_DEF(iemOp_jnle_Jb)
 {
-    IEMOP_MNEMONIC("jnle/jg Jb");
+    IEMOP_MNEMONIC(jg_Jb, "jnle/jg Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -9808,11 +3098,23 @@ FNIEMOP_DEF(iemOp_jnle_Jb)
 }
 
 
-/** Opcode 0x80. */
+/**
+ * @opcode      0x80
+ */
 FNIEMOP_DEF(iemOp_Grp1_Eb_Ib_80)
 {
-    uint8_t bRm;   IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Eb,Ib");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: IEMOP_MNEMONIC(add_Eb_Ib, "add Eb,Ib"); break;
+        case 1: IEMOP_MNEMONIC(or_Eb_Ib,  "or  Eb,Ib"); break;
+        case 2: IEMOP_MNEMONIC(adc_Eb_Ib, "adc Eb,Ib"); break;
+        case 3: IEMOP_MNEMONIC(sbb_Eb_Ib, "sbb Eb,Ib"); break;
+        case 4: IEMOP_MNEMONIC(and_Eb_Ib, "and Eb,Ib"); break;
+        case 5: IEMOP_MNEMONIC(sub_Eb_Ib, "sub Eb,Ib"); break;
+        case 6: IEMOP_MNEMONIC(xor_Eb_Ib, "xor Eb,Ib"); break;
+        case 7: IEMOP_MNEMONIC(cmp_Eb_Ib, "cmp Eb,Ib"); break;
+    }
     PCIEMOPBINSIZES pImpl = g_apIemImplGrp1[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK];
 
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
@@ -9869,11 +3171,23 @@ FNIEMOP_DEF(iemOp_Grp1_Eb_Ib_80)
 }
 
 
-/** Opcode 0x81. */
+/**
+ * @opcode      0x81
+ */
 FNIEMOP_DEF(iemOp_Grp1_Ev_Iz)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Ev,Iz");
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: IEMOP_MNEMONIC(add_Ev_Iz, "add Ev,Iz"); break;
+        case 1: IEMOP_MNEMONIC(or_Ev_Iz,  "or  Ev,Iz"); break;
+        case 2: IEMOP_MNEMONIC(adc_Ev_Iz, "adc Ev,Iz"); break;
+        case 3: IEMOP_MNEMONIC(sbb_Ev_Iz, "sbb Ev,Iz"); break;
+        case 4: IEMOP_MNEMONIC(and_Ev_Iz, "and Ev,Iz"); break;
+        case 5: IEMOP_MNEMONIC(sub_Ev_Iz, "sub Ev,Iz"); break;
+        case 6: IEMOP_MNEMONIC(xor_Ev_Iz, "xor Ev,Iz"); break;
+        case 7: IEMOP_MNEMONIC(cmp_Ev_Iz, "cmp Ev,Iz"); break;
+    }
     PCIEMOPBINSIZES pImpl = g_apIemImplGrp1[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK];
 
     switch (pVCpu->iem.s.enmEffOpSize)
@@ -10048,7 +3362,11 @@ FNIEMOP_DEF(iemOp_Grp1_Ev_Iz)
 }
 
 
-/** Opcode 0x82. */
+/**
+ * @opcode      0x82
+ * @opmnemonic  grp1_82
+ * @opgroup     og_groups
+ */
 FNIEMOP_DEF(iemOp_Grp1_Eb_Ib_82)
 {
     IEMOP_HLP_NO_64BIT(); /** @todo do we need to decode the whole instruction or is this ok? */
@@ -10056,11 +3374,23 @@ FNIEMOP_DEF(iemOp_Grp1_Eb_Ib_82)
 }
 
 
-/** Opcode 0x83. */
+/**
+ * @opcode      0x83
+ */
 FNIEMOP_DEF(iemOp_Grp1_Ev_Ib)
 {
     uint8_t bRm;   IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Ev,Ib");
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: IEMOP_MNEMONIC(add_Ev_Ib, "add Ev,Ib"); break;
+        case 1: IEMOP_MNEMONIC(or_Ev_Ib,  "or  Ev,Ib"); break;
+        case 2: IEMOP_MNEMONIC(adc_Ev_Ib, "adc Ev,Ib"); break;
+        case 3: IEMOP_MNEMONIC(sbb_Ev_Ib, "sbb Ev,Ib"); break;
+        case 4: IEMOP_MNEMONIC(and_Ev_Ib, "and Ev,Ib"); break;
+        case 5: IEMOP_MNEMONIC(sub_Ev_Ib, "sub Ev,Ib"); break;
+        case 6: IEMOP_MNEMONIC(xor_Ev_Ib, "xor Ev,Ib"); break;
+        case 7: IEMOP_MNEMONIC(cmp_Ev_Ib, "cmp Ev,Ib"); break;
+    }
     /* Note! Seems the OR, AND, and XOR instructions are present on CPUs prior
              to the 386 even if absent in the intel reference manuals and some
              3rd party opcode listings. */
@@ -10230,29 +3560,35 @@ FNIEMOP_DEF(iemOp_Grp1_Ev_Ib)
 }
 
 
-/** Opcode 0x84. */
+/**
+ * @opcode      0x84
+ */
 FNIEMOP_DEF(iemOp_test_Eb_Gb)
 {
-    IEMOP_MNEMONIC("test Eb,Gb");
+    IEMOP_MNEMONIC(test_Eb_Gb, "test Eb,Gb");
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_test);
 }
 
 
-/** Opcode 0x85. */
+/**
+ * @opcode      0x85
+ */
 FNIEMOP_DEF(iemOp_test_Ev_Gv)
 {
-    IEMOP_MNEMONIC("test Ev,Gv");
+    IEMOP_MNEMONIC(test_Ev_Gv, "test Ev,Gv");
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_test);
 }
 
 
-/** Opcode 0x86. */
+/**
+ * @opcode      0x86
+ */
 FNIEMOP_DEF(iemOp_xchg_Eb_Gb)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    IEMOP_MNEMONIC("xchg Eb,Gb");
+    IEMOP_MNEMONIC(xchg_Eb_Gb, "xchg Eb,Gb");
 
     /*
      * If rm is denoting a register, no more instruction bytes.
@@ -10297,10 +3633,12 @@ FNIEMOP_DEF(iemOp_xchg_Eb_Gb)
 }
 
 
-/** Opcode 0x87. */
+/**
+ * @opcode      0x87
+ */
 FNIEMOP_DEF(iemOp_xchg_Ev_Gv)
 {
-    IEMOP_MNEMONIC("xchg Ev,Gv");
+    IEMOP_MNEMONIC(xchg_Ev_Gv, "xchg Ev,Gv");
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
 
     /*
@@ -10420,10 +3758,12 @@ FNIEMOP_DEF(iemOp_xchg_Ev_Gv)
 }
 
 
-/** Opcode 0x88. */
+/**
+ * @opcode      0x88
+ */
 FNIEMOP_DEF(iemOp_mov_Eb_Gb)
 {
-    IEMOP_MNEMONIC("mov Eb,Gb");
+    IEMOP_MNEMONIC(mov_Eb_Gb, "mov Eb,Gb");
 
     uint8_t bRm;
     IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -10461,10 +3801,12 @@ FNIEMOP_DEF(iemOp_mov_Eb_Gb)
 }
 
 
-/** Opcode 0x89. */
+/**
+ * @opcode      0x89
+ */
 FNIEMOP_DEF(iemOp_mov_Ev_Gv)
 {
-    IEMOP_MNEMONIC("mov Ev,Gv");
+    IEMOP_MNEMONIC(mov_Ev_Gv, "mov Ev,Gv");
 
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
 
@@ -10552,10 +3894,12 @@ FNIEMOP_DEF(iemOp_mov_Ev_Gv)
 }
 
 
-/** Opcode 0x8a. */
+/**
+ * @opcode      0x8a
+ */
 FNIEMOP_DEF(iemOp_mov_Gb_Eb)
 {
-    IEMOP_MNEMONIC("mov Gb,Eb");
+    IEMOP_MNEMONIC(mov_Gb_Eb, "mov Gb,Eb");
 
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
 
@@ -10591,10 +3935,12 @@ FNIEMOP_DEF(iemOp_mov_Gb_Eb)
 }
 
 
-/** Opcode 0x8b. */
+/**
+ * @opcode      0x8b
+ */
 FNIEMOP_DEF(iemOp_mov_Gv_Ev)
 {
-    IEMOP_MNEMONIC("mov Gv,Ev");
+    IEMOP_MNEMONIC(mov_Gv_Ev, "mov Gv,Ev");
 
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
 
@@ -10682,7 +4028,10 @@ FNIEMOP_DEF(iemOp_mov_Gv_Ev)
 }
 
 
-/** Opcode 0x63. */
+/**
+ * opcode      0x63
+ * @todo Table fixme
+ */
 FNIEMOP_DEF(iemOp_arpl_Ew_Gw_movsx_Gv_Ev)
 {
     if (pVCpu->iem.s.enmCpuMode != IEMMODE_64BIT)
@@ -10693,10 +4042,12 @@ FNIEMOP_DEF(iemOp_arpl_Ew_Gw_movsx_Gv_Ev)
 }
 
 
-/** Opcode 0x8c. */
+/**
+ * @opcode      0x8c
+ */
 FNIEMOP_DEF(iemOp_mov_Ev_Sw)
 {
-    IEMOP_MNEMONIC("mov Ev,Sw");
+    IEMOP_MNEMONIC(mov_Ev_Sw, "mov Ev,Sw");
 
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
 
@@ -10770,10 +4121,12 @@ FNIEMOP_DEF(iemOp_mov_Ev_Sw)
 
 
 
-/** Opcode 0x8d. */
+/**
+ * @opcode      0x8d
+ */
 FNIEMOP_DEF(iemOp_lea_Gv_M)
 {
-    IEMOP_MNEMONIC("lea Gv,M");
+    IEMOP_MNEMONIC(lea_Gv_M, "lea Gv,M");
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
         return IEMOP_RAISE_INVALID_OPCODE(); /* no register form */
@@ -10818,10 +4171,12 @@ FNIEMOP_DEF(iemOp_lea_Gv_M)
 }
 
 
-/** Opcode 0x8e. */
+/**
+ * @opcode      0x8e
+ */
 FNIEMOP_DEF(iemOp_mov_Sw_Ev)
 {
-    IEMOP_MNEMONIC("mov Sw,Ev");
+    IEMOP_MNEMONIC(mov_Sw_Ev, "mov Sw,Ev");
 
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
 
@@ -10887,7 +4242,7 @@ FNIEMOP_DEF_1(iemOp_pop_Ev, uint8_t, bRm)
        For now, we'll do a mostly safe interpreter-only implementation here. */
     /** @todo What's the deal with the 'reg' field and pop Ev?  Ignorning it for
      *        now until tests show it's checked.. */
-    IEMOP_MNEMONIC("pop Ev");
+    IEMOP_MNEMONIC(pop_Ev, "pop Ev");
 
     /* Register access is relatively easy and can share code. */
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
@@ -10970,16 +4325,66 @@ FNIEMOP_DEF_1(iemOp_pop_Ev, uint8_t, bRm)
 }
 
 
-/** Opcode 0x8f. */
-FNIEMOP_DEF(iemOp_Grp1A)
+/**
+ * @opcode      0x8f
+ */
+FNIEMOP_DEF(iemOp_Grp1A__xop)
 {
+    /*
+     * AMD has defined /1 thru /7 as XOP prefix.  The prefix is similar to the
+     * three byte VEX prefix, except that the mmmmm field cannot have the values
+     * 0 thru 7, because it would then be confused with pop Ev (modrm.reg == 0).
+     */
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     if ((bRm & X86_MODRM_REG_MASK) == (0 << X86_MODRM_REG_SHIFT)) /* /0 */
         return FNIEMOP_CALL_1(iemOp_pop_Ev, bRm);
 
-    /* AMD has defined /1 thru /7 as XOP prefix (similar to three byte VEX). */
-    /** @todo XOP decoding. */
-    IEMOP_MNEMONIC("3-byte-xop");
+    IEMOP_MNEMONIC(xop, "xop");
+    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXop)
+    {
+        /** @todo Test when exctly the XOP conformance checks kick in during
+         * instruction decoding and fetching (using \#PF). */
+        uint8_t bXop2;   IEM_OPCODE_GET_NEXT_U8(&bXop2);
+        uint8_t bOpcode; IEM_OPCODE_GET_NEXT_U8(&bOpcode);
+        if (   (  pVCpu->iem.s.fPrefixes
+                & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ | IEM_OP_PRF_LOCK | IEM_OP_PRF_REX))
+            == 0)
+        {
+            pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_XOP;
+            if (bXop2 & 0x80 /* XOP.W */)
+                pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SIZE_REX_W;
+            pVCpu->iem.s.uRexReg    = ~bRm >> (7 - 3);
+            pVCpu->iem.s.uRexIndex  = ~bRm >> (6 - 3);
+            pVCpu->iem.s.uRexB      = ~bRm >> (5 - 3);
+            pVCpu->iem.s.uVex3rdReg = (~bXop2 >> 3) & 0xf;
+            pVCpu->iem.s.uVexLength = (bXop2 >> 2) & 1;
+            pVCpu->iem.s.idxPrefix  = bXop2 & 0x3;
+
+            /** @todo XOP: Just use new tables and decoders. */
+            switch (bRm & 0x1f)
+            {
+                case 8: /* xop opcode map 8. */
+                    IEMOP_BITCH_ABOUT_STUB();
+                    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+
+                case 9: /* xop opcode map 9. */
+                    IEMOP_BITCH_ABOUT_STUB();
+                    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+
+                case 10: /* xop opcode map 10. */
+                    IEMOP_BITCH_ABOUT_STUB();
+                    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+
+                default:
+                    Log(("XOP: Invalid vvvv value: %#x!\n", bRm & 0x1f));
+                    return IEMOP_RAISE_INVALID_OPCODE();
+            }
+        }
+        else
+            Log(("XOP: Invalid prefix mix!\n"));
+    }
+    else
+        Log(("XOP: XOP support disabled!\n"));
     return IEMOP_RAISE_INVALID_OPCODE();
 }
 
@@ -11035,20 +4440,30 @@ FNIEMOP_DEF_1(iemOpCommonXchgGRegRax, uint8_t, iReg)
 }
 
 
-/** Opcode 0x90. */
+/**
+ * @opcode      0x90
+ */
 FNIEMOP_DEF(iemOp_nop)
 {
     /* R8/R8D and RAX/EAX can be exchanged. */
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REX_B)
     {
-        IEMOP_MNEMONIC("xchg r8,rAX");
+        IEMOP_MNEMONIC(xchg_r8_rAX, "xchg r8,rAX");
         return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xAX);
     }
 
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK)
-        IEMOP_MNEMONIC("pause");
+    {
+        IEMOP_MNEMONIC(pause, "pause");
+#ifdef VBOX_WITH_NESTED_HWVIRT
+        /** @todo Pause filter count and threshold with SVM nested hardware virt. */
+        Assert(!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSvmPauseFilter);
+        Assert(!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSvmPauseFilterThreshold);
+#endif
+        IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_PAUSE, SVM_EXIT_PAUSE, 0, 0);
+    }
     else
-        IEMOP_MNEMONIC("nop");
+        IEMOP_MNEMONIC(nop, "nop");
     IEM_MC_BEGIN(0, 0);
     IEM_MC_ADVANCE_RIP();
     IEM_MC_END();
@@ -11056,70 +4471,86 @@ FNIEMOP_DEF(iemOp_nop)
 }
 
 
-/** Opcode 0x91. */
+/**
+ * @opcode      0x91
+ */
 FNIEMOP_DEF(iemOp_xchg_eCX_eAX)
 {
-    IEMOP_MNEMONIC("xchg rCX,rAX");
+    IEMOP_MNEMONIC(xchg_rCX_rAX, "xchg rCX,rAX");
     return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xCX);
 }
 
 
-/** Opcode 0x92. */
+/**
+ * @opcode      0x92
+ */
 FNIEMOP_DEF(iemOp_xchg_eDX_eAX)
 {
-    IEMOP_MNEMONIC("xchg rDX,rAX");
+    IEMOP_MNEMONIC(xchg_rDX_rAX, "xchg rDX,rAX");
     return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xDX);
 }
 
 
-/** Opcode 0x93. */
+/**
+ * @opcode      0x93
+ */
 FNIEMOP_DEF(iemOp_xchg_eBX_eAX)
 {
-    IEMOP_MNEMONIC("xchg rBX,rAX");
+    IEMOP_MNEMONIC(xchg_rBX_rAX, "xchg rBX,rAX");
     return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xBX);
 }
 
 
-/** Opcode 0x94. */
+/**
+ * @opcode      0x94
+ */
 FNIEMOP_DEF(iemOp_xchg_eSP_eAX)
 {
-    IEMOP_MNEMONIC("xchg rSX,rAX");
+    IEMOP_MNEMONIC(xchg_rSX_rAX, "xchg rSX,rAX");
     return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xSP);
 }
 
 
-/** Opcode 0x95. */
+/**
+ * @opcode      0x95
+ */
 FNIEMOP_DEF(iemOp_xchg_eBP_eAX)
 {
-    IEMOP_MNEMONIC("xchg rBP,rAX");
+    IEMOP_MNEMONIC(xchg_rBP_rAX, "xchg rBP,rAX");
     return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xBP);
 }
 
 
-/** Opcode 0x96. */
+/**
+ * @opcode      0x96
+ */
 FNIEMOP_DEF(iemOp_xchg_eSI_eAX)
 {
-    IEMOP_MNEMONIC("xchg rSI,rAX");
+    IEMOP_MNEMONIC(xchg_rSI_rAX, "xchg rSI,rAX");
     return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xSI);
 }
 
 
-/** Opcode 0x97. */
+/**
+ * @opcode      0x97
+ */
 FNIEMOP_DEF(iemOp_xchg_eDI_eAX)
 {
-    IEMOP_MNEMONIC("xchg rDI,rAX");
+    IEMOP_MNEMONIC(xchg_rDI_rAX, "xchg rDI,rAX");
     return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xDI);
 }
 
 
-/** Opcode 0x98. */
+/**
+ * @opcode      0x98
+ */
 FNIEMOP_DEF(iemOp_cbw)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     switch (pVCpu->iem.s.enmEffOpSize)
     {
         case IEMMODE_16BIT:
-            IEMOP_MNEMONIC("cbw");
+            IEMOP_MNEMONIC(cbw, "cbw");
             IEM_MC_BEGIN(0, 1);
             IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 7) {
                 IEM_MC_OR_GREG_U16(X86_GREG_xAX, UINT16_C(0xff00));
@@ -11131,7 +4562,7 @@ FNIEMOP_DEF(iemOp_cbw)
             return VINF_SUCCESS;
 
         case IEMMODE_32BIT:
-            IEMOP_MNEMONIC("cwde");
+            IEMOP_MNEMONIC(cwde, "cwde");
             IEM_MC_BEGIN(0, 1);
             IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 15) {
                 IEM_MC_OR_GREG_U32(X86_GREG_xAX, UINT32_C(0xffff0000));
@@ -11143,7 +4574,7 @@ FNIEMOP_DEF(iemOp_cbw)
             return VINF_SUCCESS;
 
         case IEMMODE_64BIT:
-            IEMOP_MNEMONIC("cdqe");
+            IEMOP_MNEMONIC(cdqe, "cdqe");
             IEM_MC_BEGIN(0, 1);
             IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 31) {
                 IEM_MC_OR_GREG_U64(X86_GREG_xAX, UINT64_C(0xffffffff00000000));
@@ -11159,14 +4590,16 @@ FNIEMOP_DEF(iemOp_cbw)
 }
 
 
-/** Opcode 0x99. */
+/**
+ * @opcode      0x99
+ */
 FNIEMOP_DEF(iemOp_cwd)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     switch (pVCpu->iem.s.enmEffOpSize)
     {
         case IEMMODE_16BIT:
-            IEMOP_MNEMONIC("cwd");
+            IEMOP_MNEMONIC(cwd, "cwd");
             IEM_MC_BEGIN(0, 1);
             IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 15) {
                 IEM_MC_STORE_GREG_U16_CONST(X86_GREG_xDX, UINT16_C(0xffff));
@@ -11178,7 +4611,7 @@ FNIEMOP_DEF(iemOp_cwd)
             return VINF_SUCCESS;
 
         case IEMMODE_32BIT:
-            IEMOP_MNEMONIC("cdq");
+            IEMOP_MNEMONIC(cdq, "cdq");
             IEM_MC_BEGIN(0, 1);
             IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 31) {
                 IEM_MC_STORE_GREG_U32_CONST(X86_GREG_xDX, UINT32_C(0xffffffff));
@@ -11190,7 +4623,7 @@ FNIEMOP_DEF(iemOp_cwd)
             return VINF_SUCCESS;
 
         case IEMMODE_64BIT:
-            IEMOP_MNEMONIC("cqo");
+            IEMOP_MNEMONIC(cqo, "cqo");
             IEM_MC_BEGIN(0, 1);
             IEM_MC_IF_GREG_BIT_SET(X86_GREG_xAX, 63) {
                 IEM_MC_STORE_GREG_U64_CONST(X86_GREG_xDX, UINT64_C(0xffffffffffffffff));
@@ -11206,10 +4639,12 @@ FNIEMOP_DEF(iemOp_cwd)
 }
 
 
-/** Opcode 0x9a. */
+/**
+ * @opcode      0x9a
+ */
 FNIEMOP_DEF(iemOp_call_Ap)
 {
-    IEMOP_MNEMONIC("call Ap");
+    IEMOP_MNEMONIC(call_Ap, "call Ap");
     IEMOP_HLP_NO_64BIT();
 
     /* Decode the far pointer address and pass it on to the far call C implementation. */
@@ -11227,7 +4662,7 @@ FNIEMOP_DEF(iemOp_call_Ap)
 /** Opcode 0x9b. (aka fwait) */
 FNIEMOP_DEF(iemOp_wait)
 {
-    IEMOP_MNEMONIC("wait");
+    IEMOP_MNEMONIC(wait, "wait");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 0);
@@ -11239,7 +4674,9 @@ FNIEMOP_DEF(iemOp_wait)
 }
 
 
-/** Opcode 0x9c. */
+/**
+ * @opcode      0x9c
+ */
 FNIEMOP_DEF(iemOp_pushf_Fv)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11248,7 +4685,9 @@ FNIEMOP_DEF(iemOp_pushf_Fv)
 }
 
 
-/** Opcode 0x9d. */
+/**
+ * @opcode      0x9d
+ */
 FNIEMOP_DEF(iemOp_popf_Fv)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11257,10 +4696,12 @@ FNIEMOP_DEF(iemOp_popf_Fv)
 }
 
 
-/** Opcode 0x9e. */
+/**
+ * @opcode      0x9e
+ */
 FNIEMOP_DEF(iemOp_sahf)
 {
-    IEMOP_MNEMONIC("sahf");
+    IEMOP_MNEMONIC(sahf, "sahf");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     if (   pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
         && !IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fLahfSahf)
@@ -11281,10 +4722,12 @@ FNIEMOP_DEF(iemOp_sahf)
 }
 
 
-/** Opcode 0x9f. */
+/**
+ * @opcode      0x9f
+ */
 FNIEMOP_DEF(iemOp_lahf)
 {
-    IEMOP_MNEMONIC("lahf");
+    IEMOP_MNEMONIC(lahf, "lahf");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     if (   pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
         && !IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fLahfSahf)
@@ -11300,7 +4743,7 @@ FNIEMOP_DEF(iemOp_lahf)
 
 
 /**
- * Macro used by iemOp_mov_Al_Ob, iemOp_mov_rAX_Ov, iemOp_mov_Ob_AL and
+ * Macro used by iemOp_mov_AL_Ob, iemOp_mov_rAX_Ov, iemOp_mov_Ob_AL and
  * iemOp_mov_Ov_rAX to fetch the moffsXX bit of the opcode and fend of lock
  * prefixes.  Will return on failures.
  * @param   a_GCPtrMemOff   The variable to store the offset in.
@@ -11324,8 +4767,10 @@ FNIEMOP_DEF(iemOp_lahf)
         IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX(); \
     } while (0)
 
-/** Opcode 0xa0. */
-FNIEMOP_DEF(iemOp_mov_Al_Ob)
+/**
+ * @opcode      0xa0
+ */
+FNIEMOP_DEF(iemOp_mov_AL_Ob)
 {
     /*
      * Get the offset and fend of lock prefixes.
@@ -11346,13 +4791,15 @@ FNIEMOP_DEF(iemOp_mov_Al_Ob)
 }
 
 
-/** Opcode 0xa1. */
+/**
+ * @opcode      0xa1
+ */
 FNIEMOP_DEF(iemOp_mov_rAX_Ov)
 {
     /*
      * Get the offset and fend of lock prefixes.
      */
-    IEMOP_MNEMONIC("mov rAX,Ov");
+    IEMOP_MNEMONIC(mov_rAX_Ov, "mov rAX,Ov");
     RTGCPTR GCPtrMemOff;
     IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
 
@@ -11393,7 +4840,9 @@ FNIEMOP_DEF(iemOp_mov_rAX_Ov)
 }
 
 
-/** Opcode 0xa2. */
+/**
+ * @opcode      0xa2
+ */
 FNIEMOP_DEF(iemOp_mov_Ob_AL)
 {
     /*
@@ -11415,7 +4864,9 @@ FNIEMOP_DEF(iemOp_mov_Ob_AL)
 }
 
 
-/** Opcode 0xa3. */
+/**
+ * @opcode      0xa3
+ */
 FNIEMOP_DEF(iemOp_mov_Ov_rAX)
 {
     /*
@@ -11479,7 +4930,9 @@ FNIEMOP_DEF(iemOp_mov_Ov_rAX)
         IEM_MC_ADVANCE_RIP(); \
         IEM_MC_END();
 
-/** Opcode 0xa4. */
+/**
+ * @opcode      0xa4
+ */
 FNIEMOP_DEF(iemOp_movsb_Xb_Yb)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11489,7 +4942,7 @@ FNIEMOP_DEF(iemOp_movsb_Xb_Yb)
      */
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
-        IEMOP_MNEMONIC("rep movsb Xb,Yb");
+        IEMOP_MNEMONIC(rep_movsb_Xb_Yb, "rep movsb Xb,Yb");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op8_addr16, pVCpu->iem.s.iEffSeg);
@@ -11498,7 +4951,7 @@ FNIEMOP_DEF(iemOp_movsb_Xb_Yb)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("movsb Xb,Yb");
+    IEMOP_MNEMONIC(movsb_Xb_Yb, "movsb Xb,Yb");
 
     /*
      * Sharing case implementation with movs[wdq] below.
@@ -11514,7 +4967,9 @@ FNIEMOP_DEF(iemOp_movsb_Xb_Yb)
 }
 
 
-/** Opcode 0xa5. */
+/**
+ * @opcode      0xa5
+ */
 FNIEMOP_DEF(iemOp_movswd_Xv_Yv)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11524,7 +4979,7 @@ FNIEMOP_DEF(iemOp_movswd_Xv_Yv)
      */
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
-        IEMOP_MNEMONIC("rep movs Xv,Yv");
+        IEMOP_MNEMONIC(rep_movs_Xv_Yv, "rep movs Xv,Yv");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -11555,7 +5010,7 @@ FNIEMOP_DEF(iemOp_movswd_Xv_Yv)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("movs Xv,Yv");
+    IEMOP_MNEMONIC(movs_Xv_Yv, "movs Xv,Yv");
 
     /*
      * Annoying double switch here.
@@ -11626,7 +5081,9 @@ FNIEMOP_DEF(iemOp_movswd_Xv_Yv)
         IEM_MC_ADVANCE_RIP(); \
         IEM_MC_END(); \
 
-/** Opcode 0xa6. */
+/**
+ * @opcode      0xa6
+ */
 FNIEMOP_DEF(iemOp_cmpsb_Xb_Yb)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11636,7 +5093,7 @@ FNIEMOP_DEF(iemOp_cmpsb_Xb_Yb)
      */
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPZ)
     {
-        IEMOP_MNEMONIC("repe cmps Xb,Yb");
+        IEMOP_MNEMONIC(repz_cmps_Xb_Yb, "repz cmps Xb,Yb");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repe_cmps_op8_addr16, pVCpu->iem.s.iEffSeg);
@@ -11647,7 +5104,7 @@ FNIEMOP_DEF(iemOp_cmpsb_Xb_Yb)
     }
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPNZ)
     {
-        IEMOP_MNEMONIC("repe cmps Xb,Yb");
+        IEMOP_MNEMONIC(repnz_cmps_Xb_Yb, "repnz cmps Xb,Yb");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_repne_cmps_op8_addr16, pVCpu->iem.s.iEffSeg);
@@ -11656,7 +5113,7 @@ FNIEMOP_DEF(iemOp_cmpsb_Xb_Yb)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("cmps Xb,Yb");
+    IEMOP_MNEMONIC(cmps_Xb_Yb, "cmps Xb,Yb");
 
     /*
      * Sharing case implementation with cmps[wdq] below.
@@ -11673,7 +5130,9 @@ FNIEMOP_DEF(iemOp_cmpsb_Xb_Yb)
 }
 
 
-/** Opcode 0xa7. */
+/**
+ * @opcode      0xa7
+ */
 FNIEMOP_DEF(iemOp_cmpswd_Xv_Yv)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11683,7 +5142,7 @@ FNIEMOP_DEF(iemOp_cmpswd_Xv_Yv)
      */
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPZ)
     {
-        IEMOP_MNEMONIC("repe cmps Xv,Yv");
+        IEMOP_MNEMONIC(repe_cmps_Xv_Yv, "repe cmps Xv,Yv");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -11717,7 +5176,7 @@ FNIEMOP_DEF(iemOp_cmpswd_Xv_Yv)
 
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPNZ)
     {
-        IEMOP_MNEMONIC("repne cmps Xv,Yv");
+        IEMOP_MNEMONIC(repne_cmps_Xv_Yv, "repne cmps Xv,Yv");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -11749,7 +5208,7 @@ FNIEMOP_DEF(iemOp_cmpswd_Xv_Yv)
         }
     }
 
-    IEMOP_MNEMONIC("cmps Xv,Yv");
+    IEMOP_MNEMONIC(cmps_Xv_Yv, "cmps Xv,Yv");
 
     /*
      * Annoying double switch here.
@@ -11794,19 +5253,23 @@ FNIEMOP_DEF(iemOp_cmpswd_Xv_Yv)
 
 #undef IEM_CMPS_CASE
 
-/** Opcode 0xa8. */
+/**
+ * @opcode      0xa8
+ */
 FNIEMOP_DEF(iemOp_test_AL_Ib)
 {
-    IEMOP_MNEMONIC("test al,Ib");
+    IEMOP_MNEMONIC(test_al_Ib, "test al,Ib");
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_test);
 }
 
 
-/** Opcode 0xa9. */
+/**
+ * @opcode      0xa9
+ */
 FNIEMOP_DEF(iemOp_test_eAX_Iz)
 {
-    IEMOP_MNEMONIC("test rAX,Iz");
+    IEMOP_MNEMONIC(test_rAX_Iz, "test rAX,Iz");
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
     return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_test);
 }
@@ -11828,7 +5291,9 @@ FNIEMOP_DEF(iemOp_test_eAX_Iz)
         IEM_MC_ADVANCE_RIP(); \
         IEM_MC_END(); \
 
-/** Opcode 0xaa. */
+/**
+ * @opcode      0xaa
+ */
 FNIEMOP_DEF(iemOp_stosb_Yb_AL)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11838,7 +5303,7 @@ FNIEMOP_DEF(iemOp_stosb_Yb_AL)
      */
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
-        IEMOP_MNEMONIC("rep stos Yb,al");
+        IEMOP_MNEMONIC(rep_stos_Yb_al, "rep stos Yb,al");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_al_m16);
@@ -11847,7 +5312,7 @@ FNIEMOP_DEF(iemOp_stosb_Yb_AL)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("stos Yb,al");
+    IEMOP_MNEMONIC(stos_Yb_al, "stos Yb,al");
 
     /*
      * Sharing case implementation with stos[wdq] below.
@@ -11863,7 +5328,9 @@ FNIEMOP_DEF(iemOp_stosb_Yb_AL)
 }
 
 
-/** Opcode 0xab. */
+/**
+ * @opcode      0xab
+ */
 FNIEMOP_DEF(iemOp_stoswd_Yv_eAX)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11873,7 +5340,7 @@ FNIEMOP_DEF(iemOp_stoswd_Yv_eAX)
      */
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
-        IEMOP_MNEMONIC("rep stos Yv,rAX");
+        IEMOP_MNEMONIC(rep_stos_Yv_rAX, "rep stos Yv,rAX");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -11904,7 +5371,7 @@ FNIEMOP_DEF(iemOp_stoswd_Yv_eAX)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("stos Yv,rAX");
+    IEMOP_MNEMONIC(stos_Yv_rAX, "stos Yv,rAX");
 
     /*
      * Annoying double switch here.
@@ -11964,7 +5431,9 @@ FNIEMOP_DEF(iemOp_stoswd_Yv_eAX)
         IEM_MC_ADVANCE_RIP(); \
         IEM_MC_END();
 
-/** Opcode 0xac. */
+/**
+ * @opcode      0xac
+ */
 FNIEMOP_DEF(iemOp_lodsb_AL_Xb)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -11974,7 +5443,7 @@ FNIEMOP_DEF(iemOp_lodsb_AL_Xb)
      */
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
-        IEMOP_MNEMONIC("rep lodsb al,Xb");
+        IEMOP_MNEMONIC(rep_lodsb_AL_Xb, "rep lodsb AL,Xb");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_lods_al_m16, pVCpu->iem.s.iEffSeg);
@@ -11983,7 +5452,7 @@ FNIEMOP_DEF(iemOp_lodsb_AL_Xb)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("lodsb al,Xb");
+    IEMOP_MNEMONIC(lodsb_AL_Xb, "lodsb AL,Xb");
 
     /*
      * Sharing case implementation with stos[wdq] below.
@@ -11999,7 +5468,9 @@ FNIEMOP_DEF(iemOp_lodsb_AL_Xb)
 }
 
 
-/** Opcode 0xad. */
+/**
+ * @opcode      0xad
+ */
 FNIEMOP_DEF(iemOp_lodswd_eAX_Xv)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -12009,7 +5480,7 @@ FNIEMOP_DEF(iemOp_lodswd_eAX_Xv)
      */
     if (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
-        IEMOP_MNEMONIC("rep lods rAX,Xv");
+        IEMOP_MNEMONIC(rep_lods_rAX_Xv, "rep lods rAX,Xv");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -12040,7 +5511,7 @@ FNIEMOP_DEF(iemOp_lodswd_eAX_Xv)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("lods rAX,Xv");
+    IEMOP_MNEMONIC(lods_rAX_Xv, "lods rAX,Xv");
 
     /*
      * Annoying double switch here.
@@ -12106,7 +5577,9 @@ FNIEMOP_DEF(iemOp_lodswd_eAX_Xv)
         IEM_MC_ADVANCE_RIP(); \
         IEM_MC_END();
 
-/** Opcode 0xae. */
+/**
+ * @opcode      0xae
+ */
 FNIEMOP_DEF(iemOp_scasb_AL_Xb)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -12116,7 +5589,7 @@ FNIEMOP_DEF(iemOp_scasb_AL_Xb)
      */
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPZ)
     {
-        IEMOP_MNEMONIC("repe scasb al,Xb");
+        IEMOP_MNEMONIC(repe_scasb_AL_Xb, "repe scasb AL,Xb");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repe_scas_al_m16);
@@ -12127,7 +5600,7 @@ FNIEMOP_DEF(iemOp_scasb_AL_Xb)
     }
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPNZ)
     {
-        IEMOP_MNEMONIC("repne scasb al,Xb");
+        IEMOP_MNEMONIC(repone_scasb_AL_Xb, "repne scasb AL,Xb");
         switch (pVCpu->iem.s.enmEffAddrMode)
         {
             case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_repne_scas_al_m16);
@@ -12136,7 +5609,7 @@ FNIEMOP_DEF(iemOp_scasb_AL_Xb)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("scasb al,Xb");
+    IEMOP_MNEMONIC(scasb_AL_Xb, "scasb AL,Xb");
 
     /*
      * Sharing case implementation with stos[wdq] below.
@@ -12152,7 +5625,9 @@ FNIEMOP_DEF(iemOp_scasb_AL_Xb)
 }
 
 
-/** Opcode 0xaf. */
+/**
+ * @opcode      0xaf
+ */
 FNIEMOP_DEF(iemOp_scaswd_eAX_Xv)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -12162,7 +5637,7 @@ FNIEMOP_DEF(iemOp_scaswd_eAX_Xv)
      */
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPZ)
     {
-        IEMOP_MNEMONIC("repe scas rAX,Xv");
+        IEMOP_MNEMONIC(repe_scas_rAX_Xv, "repe scas rAX,Xv");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -12195,7 +5670,7 @@ FNIEMOP_DEF(iemOp_scaswd_eAX_Xv)
     }
     if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REPNZ)
     {
-        IEMOP_MNEMONIC("repne scas rAX,Xv");
+        IEMOP_MNEMONIC(repne_scas_rAX_Xv, "repne scas rAX,Xv");
         switch (pVCpu->iem.s.enmEffOpSize)
         {
             case IEMMODE_16BIT:
@@ -12226,7 +5701,7 @@ FNIEMOP_DEF(iemOp_scaswd_eAX_Xv)
             IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    IEMOP_MNEMONIC("scas rAX,Xv");
+    IEMOP_MNEMONIC(scas_rAX_Xv, "scas rAX,Xv");
 
     /*
      * Annoying double switch here.
@@ -12288,66 +5763,82 @@ FNIEMOP_DEF_1(iemOpCommonMov_r8_Ib, uint8_t, iReg)
 }
 
 
-/** Opcode 0xb0. */
+/**
+ * @opcode      0xb0
+ */
 FNIEMOP_DEF(iemOp_mov_AL_Ib)
 {
-    IEMOP_MNEMONIC("mov AL,Ib");
+    IEMOP_MNEMONIC(mov_AL_Ib, "mov AL,Ib");
     return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xAX | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xb1. */
+/**
+ * @opcode      0xb1
+ */
 FNIEMOP_DEF(iemOp_CL_Ib)
 {
-    IEMOP_MNEMONIC("mov CL,Ib");
+    IEMOP_MNEMONIC(mov_CL_Ib, "mov CL,Ib");
     return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xCX | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xb2. */
+/**
+ * @opcode      0xb2
+ */
 FNIEMOP_DEF(iemOp_DL_Ib)
 {
-    IEMOP_MNEMONIC("mov DL,Ib");
+    IEMOP_MNEMONIC(mov_DL_Ib, "mov DL,Ib");
     return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xDX | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xb3. */
+/**
+ * @opcode      0xb3
+ */
 FNIEMOP_DEF(iemOp_BL_Ib)
 {
-    IEMOP_MNEMONIC("mov BL,Ib");
+    IEMOP_MNEMONIC(mov_BL_Ib, "mov BL,Ib");
     return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xBX | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xb4. */
+/**
+ * @opcode      0xb4
+ */
 FNIEMOP_DEF(iemOp_mov_AH_Ib)
 {
-    IEMOP_MNEMONIC("mov AH,Ib");
+    IEMOP_MNEMONIC(mov_AH_Ib, "mov AH,Ib");
     return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xSP | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xb5. */
+/**
+ * @opcode      0xb5
+ */
 FNIEMOP_DEF(iemOp_CH_Ib)
 {
-    IEMOP_MNEMONIC("mov CH,Ib");
+    IEMOP_MNEMONIC(mov_CH_Ib, "mov CH,Ib");
     return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xBP | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xb6. */
+/**
+ * @opcode      0xb6
+ */
 FNIEMOP_DEF(iemOp_DH_Ib)
 {
-    IEMOP_MNEMONIC("mov DH,Ib");
+    IEMOP_MNEMONIC(mov_DH_Ib, "mov DH,Ib");
     return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xSI | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xb7. */
+/**
+ * @opcode      0xb7
+ */
 FNIEMOP_DEF(iemOp_BH_Ib)
 {
-    IEMOP_MNEMONIC("mov BH,Ib");
+    IEMOP_MNEMONIC(mov_BH_Ib, "mov BH,Ib");
     return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xDI | pVCpu->iem.s.uRexB);
 }
 
@@ -12402,71 +5893,89 @@ FNIEMOP_DEF_1(iemOpCommonMov_Rv_Iv, uint8_t, iReg)
 }
 
 
-/** Opcode 0xb8. */
+/**
+ * @opcode      0xb8
+ */
 FNIEMOP_DEF(iemOp_eAX_Iv)
 {
-    IEMOP_MNEMONIC("mov rAX,IV");
+    IEMOP_MNEMONIC(mov_rAX_IV, "mov rAX,IV");
     return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xAX | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xb9. */
+/**
+ * @opcode      0xb9
+ */
 FNIEMOP_DEF(iemOp_eCX_Iv)
 {
-    IEMOP_MNEMONIC("mov rCX,IV");
+    IEMOP_MNEMONIC(mov_rCX_IV, "mov rCX,IV");
     return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xCX | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xba. */
+/**
+ * @opcode      0xba
+ */
 FNIEMOP_DEF(iemOp_eDX_Iv)
 {
-    IEMOP_MNEMONIC("mov rDX,IV");
+    IEMOP_MNEMONIC(mov_rDX_IV, "mov rDX,IV");
     return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xDX | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xbb. */
+/**
+ * @opcode      0xbb
+ */
 FNIEMOP_DEF(iemOp_eBX_Iv)
 {
-    IEMOP_MNEMONIC("mov rBX,IV");
+    IEMOP_MNEMONIC(mov_rBX_IV, "mov rBX,IV");
     return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xBX | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xbc. */
+/**
+ * @opcode      0xbc
+ */
 FNIEMOP_DEF(iemOp_eSP_Iv)
 {
-    IEMOP_MNEMONIC("mov rSP,IV");
+    IEMOP_MNEMONIC(mov_rSP_IV, "mov rSP,IV");
     return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xSP | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xbd. */
+/**
+ * @opcode      0xbd
+ */
 FNIEMOP_DEF(iemOp_eBP_Iv)
 {
-    IEMOP_MNEMONIC("mov rBP,IV");
+    IEMOP_MNEMONIC(mov_rBP_IV, "mov rBP,IV");
     return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xBP | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xbe. */
+/**
+ * @opcode      0xbe
+ */
 FNIEMOP_DEF(iemOp_eSI_Iv)
 {
-    IEMOP_MNEMONIC("mov rSI,IV");
+    IEMOP_MNEMONIC(mov_rSI_IV, "mov rSI,IV");
     return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xSI | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xbf. */
+/**
+ * @opcode      0xbf
+ */
 FNIEMOP_DEF(iemOp_eDI_Iv)
 {
-    IEMOP_MNEMONIC("mov rDI,IV");
+    IEMOP_MNEMONIC(mov_rDI_IV, "mov rDI,IV");
     return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xDI | pVCpu->iem.s.uRexB);
 }
 
 
-/** Opcode 0xc0. */
+/**
+ * @opcode      0xc0
+ */
 FNIEMOP_DEF(iemOp_Grp2_Eb_Ib)
 {
     IEMOP_HLP_MIN_186();
@@ -12474,13 +5983,13 @@ FNIEMOP_DEF(iemOp_Grp2_Eb_Ib)
     PCIEMOPSHIFTSIZES pImpl;
     switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
     {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,Ib"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,Ib"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,Ib"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,Ib"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,Ib"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,Ib"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,Ib"); break;
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC(rol_Eb_Ib, "rol Eb,Ib"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC(ror_Eb_Ib, "ror Eb,Ib"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC(rcl_Eb_Ib, "rcl Eb,Ib"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC(rcr_Eb_Ib, "rcr Eb,Ib"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC(shl_Eb_Ib, "shl Eb,Ib"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC(shr_Eb_Ib, "shr Eb,Ib"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC(sar_Eb_Ib, "sar Eb,Ib"); break;
         case 6: return IEMOP_RAISE_INVALID_OPCODE();
         IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe stupid */
     }
@@ -12527,7 +6036,9 @@ FNIEMOP_DEF(iemOp_Grp2_Eb_Ib)
 }
 
 
-/** Opcode 0xc1. */
+/**
+ * @opcode      0xc1
+ */
 FNIEMOP_DEF(iemOp_Grp2_Ev_Ib)
 {
     IEMOP_HLP_MIN_186();
@@ -12535,13 +6046,13 @@ FNIEMOP_DEF(iemOp_Grp2_Ev_Ib)
     PCIEMOPSHIFTSIZES pImpl;
     switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
     {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,Ib"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,Ib"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,Ib"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,Ib"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,Ib"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,Ib"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,Ib"); break;
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC(rol_Ev_Ib, "rol Ev,Ib"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC(ror_Ev_Ib, "ror Ev,Ib"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC(rcl_Ev_Ib, "rcl Ev,Ib"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC(rcr_Ev_Ib, "rcr Ev,Ib"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC(shl_Ev_Ib, "shl Ev,Ib"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC(shr_Ev_Ib, "shr Ev,Ib"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC(sar_Ev_Ib, "sar Ev,Ib"); break;
         case 6: return IEMOP_RAISE_INVALID_OPCODE();
         IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe stupid */
     }
@@ -12668,10 +6179,12 @@ FNIEMOP_DEF(iemOp_Grp2_Ev_Ib)
 }
 
 
-/** Opcode 0xc2. */
+/**
+ * @opcode      0xc2
+ */
 FNIEMOP_DEF(iemOp_retn_Iw)
 {
-    IEMOP_MNEMONIC("retn Iw");
+    IEMOP_MNEMONIC(retn_Iw, "retn Iw");
     uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -12679,84 +6192,143 @@ FNIEMOP_DEF(iemOp_retn_Iw)
 }
 
 
-/** Opcode 0xc3. */
+/**
+ * @opcode      0xc3
+ */
 FNIEMOP_DEF(iemOp_retn)
 {
-    IEMOP_MNEMONIC("retn");
+    IEMOP_MNEMONIC(retn, "retn");
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retn, pVCpu->iem.s.enmEffOpSize, 0);
 }
 
 
-/** Opcode 0xc4. */
-FNIEMOP_DEF(iemOp_les_Gv_Mp_vex2)
+/**
+ * @opcode      0xc4
+ */
+FNIEMOP_DEF(iemOp_les_Gv_Mp__vex3)
 {
+    /* The LDS instruction is invalid 64-bit mode. In legacy and
+       compatability mode it is invalid with MOD=3.
+       The use as a VEX prefix is made possible by assigning the inverted
+       REX.R and REX.X to the two MOD bits, since the REX bits are ignored
+       outside of 64-bit mode.  VEX is not available in real or v86 mode. */
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     if (   pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
-        || (bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
-    {
-        IEMOP_MNEMONIC("2-byte-vex");
-        /* The LES instruction is invalid 64-bit mode. In legacy and
-           compatability mode it is invalid with MOD=3.
-           The use as a VEX prefix is made possible by assigning the inverted
-           REX.R to the top MOD bit, and the top bit in the inverted register
-           specifier to the bottom MOD bit, thereby effectively limiting 32-bit
-           to accessing registers 0..7 in this VEX form. */
-        /** @todo VEX: Just use new tables for it. */
+        || (bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT) )
+    {
+        IEMOP_MNEMONIC(vex3_prefix, "vex3");
+        if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fAvx)
+        {
+            /* Note! The real mode, v8086 mode and invalid prefix checks are
+                     done once the instruction is fully decoded. */
+            uint8_t bVex2;   IEM_OPCODE_GET_NEXT_U8(&bVex2);
+            uint8_t bOpcode; IEM_OPCODE_GET_NEXT_U8(&bOpcode);
+            pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_VEX;
+            if (bVex2 & 0x80 /* VEX.W */)
+                pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_SIZE_REX_W;
+            pVCpu->iem.s.uRexReg    = ~bRm >> (7 - 3);
+            pVCpu->iem.s.uRexIndex  = ~bRm >> (6 - 3);
+            pVCpu->iem.s.uRexB      = ~bRm >> (5 - 3);
+            pVCpu->iem.s.uVex3rdReg = (~bVex2 >> 3) & 0xf;
+            pVCpu->iem.s.uVexLength = (bVex2 >> 2) & 1;
+            pVCpu->iem.s.idxPrefix  = bVex2 & 0x3;
+
+            switch (bRm & 0x1f)
+            {
+                case 1: /* 0x0f lead opcode byte. */
+#ifdef IEM_WITH_VEX
+                    return FNIEMOP_CALL(g_apfnVexMap1[(uintptr_t)bOpcode * 4 + pVCpu->iem.s.idxPrefix]);
+#else
+                    IEMOP_BITCH_ABOUT_STUB();
+                    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+#endif
+
+                case 2: /* 0x0f 0x38 lead opcode bytes. */
+#ifdef IEM_WITH_VEX
+                    return FNIEMOP_CALL(g_apfnVexMap2[(uintptr_t)bOpcode * 4 + pVCpu->iem.s.idxPrefix]);
+#else
+                    IEMOP_BITCH_ABOUT_STUB();
+                    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+#endif
+
+                case 3: /* 0x0f 0x3a lead opcode bytes. */
+#ifdef IEM_WITH_VEX
+                    return FNIEMOP_CALL(g_apfnVexMap3[(uintptr_t)bOpcode * 4 + pVCpu->iem.s.idxPrefix]);
+#else
+                    IEMOP_BITCH_ABOUT_STUB();
+                    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+#endif
+
+                default:
+                    Log(("VEX3: Invalid vvvv value: %#x!\n", bRm & 0x1f));
+                    return IEMOP_RAISE_INVALID_OPCODE();
+            }
+        }
+        Log(("VEX3: AVX support disabled!\n"));
         return IEMOP_RAISE_INVALID_OPCODE();
     }
-    IEMOP_MNEMONIC("les Gv,Mp");
+
+    IEMOP_MNEMONIC(les_Gv_Mp, "les Gv,Mp");
     return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_ES, bRm);
 }
 
 
-/** Opcode 0xc5. */
-FNIEMOP_DEF(iemOp_lds_Gv_Mp_vex3)
+/**
+ * @opcode      0xc5
+ */
+FNIEMOP_DEF(iemOp_lds_Gv_Mp__vex2)
 {
-    /* The LDS instruction is invalid 64-bit mode. In legacy and
+    /* The LES instruction is invalid 64-bit mode. In legacy and
        compatability mode it is invalid with MOD=3.
        The use as a VEX prefix is made possible by assigning the inverted
-       REX.R and REX.X to the two MOD bits, since the REX bits are ignored
-       outside of 64-bit mode.  VEX is not available in real or v86 mode. */
+       REX.R to the top MOD bit, and the top bit in the inverted register
+       specifier to the bottom MOD bit, thereby effectively limiting 32-bit
+       to accessing registers 0..7 in this VEX form. */
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
-    if (pVCpu->iem.s.enmCpuMode != IEMMODE_64BIT)
+    if (   pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
+        || (bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
     {
-        if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+        IEMOP_MNEMONIC(vex2_prefix, "vex2");
+        if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fAvx)
         {
-            IEMOP_MNEMONIC("lds Gv,Mp");
-            return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_DS, bRm);
-        }
-        IEMOP_HLP_NO_REAL_OR_V86_MODE();
-    }
+            /* Note! The real mode, v8086 mode and invalid prefix checks are
+                     done once the instruction is fully decoded. */
+            uint8_t bOpcode; IEM_OPCODE_GET_NEXT_U8(&bOpcode);
+            pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_VEX;
+            pVCpu->iem.s.uRexReg    = ~bRm >> (7 - 3);
+            pVCpu->iem.s.uVex3rdReg = (~bRm >> 3) & 0xf;
+            pVCpu->iem.s.uVexLength = (bRm >> 2) & 1;
+            pVCpu->iem.s.idxPrefix  = bRm & 0x3;
 
-    IEMOP_MNEMONIC("3-byte-vex");
-    /** @todo Test when exctly the VEX conformance checks kick in during
-     * instruction decoding and fetching (using \#PF). */
-    uint8_t bVex1;   IEM_OPCODE_GET_NEXT_U8(&bVex1);
-    uint8_t bVex2;   IEM_OPCODE_GET_NEXT_U8(&bVex2);
-    uint8_t bOpcode; IEM_OPCODE_GET_NEXT_U8(&bOpcode);
-#if 0 /* will make sense of this next week... */
-    if (   !(pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ | IEM_OP_PRF_REPZ | IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REX))
-        &&
-        )
-    {
+#ifdef IEM_WITH_VEX
+            return FNIEMOP_CALL(g_apfnVexMap1[(uintptr_t)bOpcode * 4 + pVCpu->iem.s.idxPrefix]);
+#else
+            IEMOP_BITCH_ABOUT_STUB();
+            return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+#endif
+        }
 
+        /** @todo does intel completely decode the sequence with SIB/disp before \#UD? */
+        Log(("VEX2: AVX support disabled!\n"));
+        return IEMOP_RAISE_INVALID_OPCODE();
     }
-#endif
 
-    /** @todo VEX: Just use new tables for it. */
-    return IEMOP_RAISE_INVALID_OPCODE();
+    IEMOP_MNEMONIC(lds_Gv_Mp, "lds Gv,Mp");
+    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_DS, bRm);
 }
 
 
-/** Opcode 0xc6. */
+/**
+ * @opcode      0xc6
+ */
 FNIEMOP_DEF(iemOp_Grp11_Eb_Ib)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     if ((bRm & X86_MODRM_REG_MASK) != (0 << X86_MODRM_REG_SHIFT)) /* only mov Eb,Ib in this group. */
         return IEMOP_RAISE_INVALID_OPCODE();
-    IEMOP_MNEMONIC("mov Eb,Ib");
+    IEMOP_MNEMONIC(mov_Eb_Ib, "mov Eb,Ib");
 
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
     {
@@ -12784,13 +6356,15 @@ FNIEMOP_DEF(iemOp_Grp11_Eb_Ib)
 }
 
 
-/** Opcode 0xc7. */
+/**
+ * @opcode      0xc7
+ */
 FNIEMOP_DEF(iemOp_Grp11_Ev_Iz)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     if ((bRm & X86_MODRM_REG_MASK) != (0 << X86_MODRM_REG_SHIFT)) /* only mov Eb,Ib in this group. */
         return IEMOP_RAISE_INVALID_OPCODE();
-    IEMOP_MNEMONIC("mov Ev,Iz");
+    IEMOP_MNEMONIC(mov_Ev_Iz, "mov Ev,Iz");
 
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
     {
@@ -12873,10 +6447,12 @@ FNIEMOP_DEF(iemOp_Grp11_Ev_Iz)
 
 
 
-/** Opcode 0xc8. */
+/**
+ * @opcode      0xc8
+ */
 FNIEMOP_DEF(iemOp_enter_Iw_Ib)
 {
-    IEMOP_MNEMONIC("enter Iw,Ib");
+    IEMOP_MNEMONIC(enter_Iw_Ib, "enter Iw,Ib");
     IEMOP_HLP_MIN_186();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     uint16_t cbFrame;        IEM_OPCODE_GET_NEXT_U16(&cbFrame);
@@ -12886,10 +6462,12 @@ FNIEMOP_DEF(iemOp_enter_Iw_Ib)
 }
 
 
-/** Opcode 0xc9. */
+/**
+ * @opcode      0xc9
+ */
 FNIEMOP_DEF(iemOp_leave)
 {
-    IEMOP_MNEMONIC("retn");
+    IEMOP_MNEMONIC(leave, "leave");
     IEMOP_HLP_MIN_186();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -12897,10 +6475,12 @@ FNIEMOP_DEF(iemOp_leave)
 }
 
 
-/** Opcode 0xca. */
+/**
+ * @opcode      0xca
+ */
 FNIEMOP_DEF(iemOp_retf_Iw)
 {
-    IEMOP_MNEMONIC("retf Iw");
+    IEMOP_MNEMONIC(retf_Iw, "retf Iw");
     uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(&u16Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -12908,25 +6488,31 @@ FNIEMOP_DEF(iemOp_retf_Iw)
 }
 
 
-/** Opcode 0xcb. */
+/**
+ * @opcode      0xcb
+ */
 FNIEMOP_DEF(iemOp_retf)
 {
-    IEMOP_MNEMONIC("retf");
+    IEMOP_MNEMONIC(retf, "retf");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retf, pVCpu->iem.s.enmEffOpSize, 0);
 }
 
 
-/** Opcode 0xcc. */
-FNIEMOP_DEF(iemOp_int_3)
+/**
+ * @opcode      0xcc
+ */
+FNIEMOP_DEF(iemOp_int3)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_int, X86_XCPT_BP, true /*fIsBpInstr*/);
 }
 
 
-/** Opcode 0xcd. */
+/**
+ * @opcode      0xcd
+ */
 FNIEMOP_DEF(iemOp_int_Ib)
 {
     uint8_t u8Int; IEM_OPCODE_GET_NEXT_U8(&u8Int);
@@ -12935,10 +6521,12 @@ FNIEMOP_DEF(iemOp_int_Ib)
 }
 
 
-/** Opcode 0xce. */
+/**
+ * @opcode      0xce
+ */
 FNIEMOP_DEF(iemOp_into)
 {
-    IEMOP_MNEMONIC("into");
+    IEMOP_MNEMONIC(into, "into");
     IEMOP_HLP_NO_64BIT();
 
     IEM_MC_BEGIN(2, 0);
@@ -12950,29 +6538,33 @@ FNIEMOP_DEF(iemOp_into)
 }
 
 
-/** Opcode 0xcf. */
+/**
+ * @opcode      0xcf
+ */
 FNIEMOP_DEF(iemOp_iret)
 {
-    IEMOP_MNEMONIC("iret");
+    IEMOP_MNEMONIC(iret, "iret");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_iret, pVCpu->iem.s.enmEffOpSize);
 }
 
 
-/** Opcode 0xd0. */
+/**
+ * @opcode      0xd0
+ */
 FNIEMOP_DEF(iemOp_Grp2_Eb_1)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     PCIEMOPSHIFTSIZES pImpl;
     switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
     {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,1"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,1"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,1"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,1"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,1"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,1"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,1"); break;
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC(rol_Eb_1, "rol Eb,1"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC(ror_Eb_1, "ror Eb,1"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC(rcl_Eb_1, "rcl Eb,1"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC(rcr_Eb_1, "rcr Eb,1"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC(shl_Eb_1, "shl Eb,1"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC(shr_Eb_1, "shr Eb,1"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC(sar_Eb_1, "sar Eb,1"); break;
         case 6: return IEMOP_RAISE_INVALID_OPCODE();
         IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe, well... */
     }
@@ -13017,20 +6609,22 @@ FNIEMOP_DEF(iemOp_Grp2_Eb_1)
 
 
 
-/** Opcode 0xd1. */
+/**
+ * @opcode      0xd1
+ */
 FNIEMOP_DEF(iemOp_Grp2_Ev_1)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     PCIEMOPSHIFTSIZES pImpl;
     switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
     {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,1"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,1"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,1"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,1"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,1"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,1"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,1"); break;
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC(rol_Ev_1, "rol Ev,1"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC(ror_Ev_1, "ror Ev,1"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC(rcl_Ev_1, "rcl Ev,1"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC(rcr_Ev_1, "rcr Ev,1"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC(shl_Ev_1, "shl Ev,1"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC(shr_Ev_1, "shr Ev,1"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC(sar_Ev_1, "sar Ev,1"); break;
         case 6: return IEMOP_RAISE_INVALID_OPCODE();
         IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe, well... */
     }
@@ -13150,20 +6744,22 @@ FNIEMOP_DEF(iemOp_Grp2_Ev_1)
 }
 
 
-/** Opcode 0xd2. */
+/**
+ * @opcode      0xd2
+ */
 FNIEMOP_DEF(iemOp_Grp2_Eb_CL)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     PCIEMOPSHIFTSIZES pImpl;
     switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
     {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,CL"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,CL"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,CL"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,CL"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,CL"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,CL"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,CL"); break;
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC(rol_Eb_CL, "rol Eb,CL"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC(ror_Eb_CL, "ror Eb,CL"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC(rcl_Eb_CL, "rcl Eb,CL"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC(rcr_Eb_CL, "rcr Eb,CL"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC(shl_Eb_CL, "shl Eb,CL"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC(shr_Eb_CL, "shr Eb,CL"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC(sar_Eb_CL, "sar Eb,CL"); break;
         case 6: return IEMOP_RAISE_INVALID_OPCODE();
         IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc, grr. */
     }
@@ -13209,20 +6805,22 @@ FNIEMOP_DEF(iemOp_Grp2_Eb_CL)
 }
 
 
-/** Opcode 0xd3. */
+/**
+ * @opcode      0xd3
+ */
 FNIEMOP_DEF(iemOp_Grp2_Ev_CL)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     PCIEMOPSHIFTSIZES pImpl;
     switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
     {
-        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,CL"); break;
-        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,CL"); break;
-        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,CL"); break;
-        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,CL"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,CL"); break;
-        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,CL"); break;
-        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,CL"); break;
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC(rol_Ev_CL, "rol Ev,CL"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC(ror_Ev_CL, "ror Ev,CL"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC(rcl_Ev_CL, "rcl Ev,CL"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC(rcr_Ev_CL, "rcr Ev,CL"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC(shl_Ev_CL, "shl Ev,CL"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC(shr_Ev_CL, "shr Ev,CL"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC(sar_Ev_CL, "sar Ev,CL"); break;
         case 6: return IEMOP_RAISE_INVALID_OPCODE();
         IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* gcc maybe stupid */
     }
@@ -13347,10 +6945,12 @@ FNIEMOP_DEF(iemOp_Grp2_Ev_CL)
     }
 }
 
-/** Opcode 0xd4. */
+/**
+ * @opcode      0xd4
+ */
 FNIEMOP_DEF(iemOp_aam_Ib)
 {
-    IEMOP_MNEMONIC("aam Ib");
+    IEMOP_MNEMONIC(aam_Ib, "aam Ib");
     uint8_t bImm; IEM_OPCODE_GET_NEXT_U8(&bImm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_NO_64BIT();
@@ -13360,10 +6960,12 @@ FNIEMOP_DEF(iemOp_aam_Ib)
 }
 
 
-/** Opcode 0xd5. */
+/**
+ * @opcode      0xd5
+ */
 FNIEMOP_DEF(iemOp_aad_Ib)
 {
-    IEMOP_MNEMONIC("aad Ib");
+    IEMOP_MNEMONIC(aad_Ib, "aad Ib");
     uint8_t bImm; IEM_OPCODE_GET_NEXT_U8(&bImm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_NO_64BIT();
@@ -13371,10 +6973,12 @@ FNIEMOP_DEF(iemOp_aad_Ib)
 }
 
 
-/** Opcode 0xd6. */
+/**
+ * @opcode      0xd6
+ */
 FNIEMOP_DEF(iemOp_salc)
 {
-    IEMOP_MNEMONIC("salc");
+    IEMOP_MNEMONIC(salc, "salc");
     IEMOP_HLP_MIN_286(); /* (undocument at the time) */
     uint8_t bImm; IEM_OPCODE_GET_NEXT_U8(&bImm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -13392,10 +6996,12 @@ FNIEMOP_DEF(iemOp_salc)
 }
 
 
-/** Opcode 0xd7. */
+/**
+ * @opcode      0xd7
+ */
 FNIEMOP_DEF(iemOp_xlat)
 {
-    IEMOP_MNEMONIC("xlat");
+    IEMOP_MNEMONIC(xlat, "xlat");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     switch (pVCpu->iem.s.enmEffAddrMode)
     {
@@ -13539,7 +7145,7 @@ FNIEMOP_DEF_2(iemOpHlpFpuNoStore_st0_stN_pop, uint8_t, bRm, PFNIEMAIMPLFPUR80FSW
 /** Opcode 0xd8 11/0. */
 FNIEMOP_DEF_1(iemOp_fadd_stN,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fadd st0,stN");
+    IEMOP_MNEMONIC(fadd_st0_stN, "fadd st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fadd_r80_by_r80);
 }
 
@@ -13547,7 +7153,7 @@ FNIEMOP_DEF_1(iemOp_fadd_stN,   uint8_t, bRm)
 /** Opcode 0xd8 11/1. */
 FNIEMOP_DEF_1(iemOp_fmul_stN,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fmul st0,stN");
+    IEMOP_MNEMONIC(fmul_st0_stN, "fmul st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fmul_r80_by_r80);
 }
 
@@ -13555,7 +7161,7 @@ FNIEMOP_DEF_1(iemOp_fmul_stN,   uint8_t, bRm)
 /** Opcode 0xd8 11/2. */
 FNIEMOP_DEF_1(iemOp_fcom_stN,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcom st0,stN");
+    IEMOP_MNEMONIC(fcom_st0_stN, "fcom st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpuNoStore_st0_stN, bRm, iemAImpl_fcom_r80_by_r80);
 }
 
@@ -13563,7 +7169,7 @@ FNIEMOP_DEF_1(iemOp_fcom_stN,   uint8_t, bRm)
 /** Opcode 0xd8 11/3. */
 FNIEMOP_DEF_1(iemOp_fcomp_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcomp st0,stN");
+    IEMOP_MNEMONIC(fcomp_st0_stN, "fcomp st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpuNoStore_st0_stN_pop, bRm, iemAImpl_fcom_r80_by_r80);
 }
 
@@ -13571,7 +7177,7 @@ FNIEMOP_DEF_1(iemOp_fcomp_stN,  uint8_t, bRm)
 /** Opcode 0xd8 11/4. */
 FNIEMOP_DEF_1(iemOp_fsub_stN,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsub st0,stN");
+    IEMOP_MNEMONIC(fsub_st0_stN, "fsub st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fsub_r80_by_r80);
 }
 
@@ -13579,7 +7185,7 @@ FNIEMOP_DEF_1(iemOp_fsub_stN,   uint8_t, bRm)
 /** Opcode 0xd8 11/5. */
 FNIEMOP_DEF_1(iemOp_fsubr_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsubr st0,stN");
+    IEMOP_MNEMONIC(fsubr_st0_stN, "fsubr st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fsubr_r80_by_r80);
 }
 
@@ -13587,7 +7193,7 @@ FNIEMOP_DEF_1(iemOp_fsubr_stN,  uint8_t, bRm)
 /** Opcode 0xd8 11/6. */
 FNIEMOP_DEF_1(iemOp_fdiv_stN,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdiv st0,stN");
+    IEMOP_MNEMONIC(fdiv_st0_stN, "fdiv st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fdiv_r80_by_r80);
 }
 
@@ -13595,7 +7201,7 @@ FNIEMOP_DEF_1(iemOp_fdiv_stN,   uint8_t, bRm)
 /** Opcode 0xd8 11/7. */
 FNIEMOP_DEF_1(iemOp_fdivr_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdivr st0,stN");
+    IEMOP_MNEMONIC(fdivr_st0_stN, "fdivr st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, bRm, iemAImpl_fdivr_r80_by_r80);
 }
 
@@ -13640,7 +7246,7 @@ FNIEMOP_DEF_2(iemOpHlpFpu_st0_m32r, uint8_t, bRm, PFNIEMAIMPLFPUR32, pfnAImpl)
 /** Opcode 0xd8 !11/0. */
 FNIEMOP_DEF_1(iemOp_fadd_m32r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fadd st0,m32r");
+    IEMOP_MNEMONIC(fadd_st0_m32r, "fadd st0,m32r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fadd_r80_by_r32);
 }
 
@@ -13648,7 +7254,7 @@ FNIEMOP_DEF_1(iemOp_fadd_m32r,  uint8_t, bRm)
 /** Opcode 0xd8 !11/1. */
 FNIEMOP_DEF_1(iemOp_fmul_m32r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fmul st0,m32r");
+    IEMOP_MNEMONIC(fmul_st0_m32r, "fmul st0,m32r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fmul_r80_by_r32);
 }
 
@@ -13656,7 +7262,7 @@ FNIEMOP_DEF_1(iemOp_fmul_m32r,  uint8_t, bRm)
 /** Opcode 0xd8 !11/2. */
 FNIEMOP_DEF_1(iemOp_fcom_m32r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcom st0,m32r");
+    IEMOP_MNEMONIC(fcom_st0_m32r, "fcom st0,m32r");
 
     IEM_MC_BEGIN(3, 3);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
@@ -13690,7 +7296,7 @@ FNIEMOP_DEF_1(iemOp_fcom_m32r,  uint8_t, bRm)
 /** Opcode 0xd8 !11/3. */
 FNIEMOP_DEF_1(iemOp_fcomp_m32r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcomp st0,m32r");
+    IEMOP_MNEMONIC(fcomp_st0_m32r, "fcomp st0,m32r");
 
     IEM_MC_BEGIN(3, 3);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
@@ -13724,7 +7330,7 @@ FNIEMOP_DEF_1(iemOp_fcomp_m32r, uint8_t, bRm)
 /** Opcode 0xd8 !11/4. */
 FNIEMOP_DEF_1(iemOp_fsub_m32r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsub st0,m32r");
+    IEMOP_MNEMONIC(fsub_st0_m32r, "fsub st0,m32r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fsub_r80_by_r32);
 }
 
@@ -13732,7 +7338,7 @@ FNIEMOP_DEF_1(iemOp_fsub_m32r,  uint8_t, bRm)
 /** Opcode 0xd8 !11/5. */
 FNIEMOP_DEF_1(iemOp_fsubr_m32r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsubr st0,m32r");
+    IEMOP_MNEMONIC(fsubr_st0_m32r, "fsubr st0,m32r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fsubr_r80_by_r32);
 }
 
@@ -13740,7 +7346,7 @@ FNIEMOP_DEF_1(iemOp_fsubr_m32r, uint8_t, bRm)
 /** Opcode 0xd8 !11/6. */
 FNIEMOP_DEF_1(iemOp_fdiv_m32r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdiv st0,m32r");
+    IEMOP_MNEMONIC(fdiv_st0_m32r, "fdiv st0,m32r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fdiv_r80_by_r32);
 }
 
@@ -13748,12 +7354,14 @@ FNIEMOP_DEF_1(iemOp_fdiv_m32r,  uint8_t, bRm)
 /** Opcode 0xd8 !11/7. */
 FNIEMOP_DEF_1(iemOp_fdivr_m32r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdivr st0,m32r");
+    IEMOP_MNEMONIC(fdivr_st0_m32r, "fdivr st0,m32r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32r, bRm, iemAImpl_fdivr_r80_by_r32);
 }
 
 
-/** Opcode 0xd8. */
+/**
+ * @opcode      0xd8
+ */
 FNIEMOP_DEF(iemOp_EscF0)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -13796,7 +7404,7 @@ FNIEMOP_DEF(iemOp_EscF0)
  * @sa  iemOp_fld_m64r */
 FNIEMOP_DEF_1(iemOp_fld_m32r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fld m32r");
+    IEMOP_MNEMONIC(fld_m32r, "fld m32r");
 
     IEM_MC_BEGIN(2, 3);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
@@ -13829,7 +7437,7 @@ FNIEMOP_DEF_1(iemOp_fld_m32r, uint8_t, bRm)
 /** Opcode 0xd9 !11/2 mem32real */
 FNIEMOP_DEF_1(iemOp_fst_m32r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fst m32r");
+    IEMOP_MNEMONIC(fst_m32r, "fst m32r");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -13865,7 +7473,7 @@ FNIEMOP_DEF_1(iemOp_fst_m32r, uint8_t, bRm)
 /** Opcode 0xd9 !11/3 */
 FNIEMOP_DEF_1(iemOp_fstp_m32r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fstp m32r");
+    IEMOP_MNEMONIC(fstp_m32r, "fstp m32r");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -13901,7 +7509,7 @@ FNIEMOP_DEF_1(iemOp_fstp_m32r, uint8_t, bRm)
 /** Opcode 0xd9 !11/4 */
 FNIEMOP_DEF_1(iemOp_fldenv, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fldenv m14/28byte");
+    IEMOP_MNEMONIC(fldenv, "fldenv m14/28byte");
     IEM_MC_BEGIN(3, 0);
     IEM_MC_ARG_CONST(IEMMODE,           enmEffOpSize, /*=*/ pVCpu->iem.s.enmEffOpSize,  0);
     IEM_MC_ARG(uint8_t,                 iEffSeg,                                    1);
@@ -13920,7 +7528,7 @@ FNIEMOP_DEF_1(iemOp_fldenv, uint8_t, bRm)
 /** Opcode 0xd9 !11/5 */
 FNIEMOP_DEF_1(iemOp_fldcw, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fldcw m2byte");
+    IEMOP_MNEMONIC(fldcw_m2byte, "fldcw m2byte");
     IEM_MC_BEGIN(1, 1);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
     IEM_MC_ARG(uint16_t,                u16Fsw,                                     0);
@@ -13938,7 +7546,7 @@ FNIEMOP_DEF_1(iemOp_fldcw, uint8_t, bRm)
 /** Opcode 0xd9 !11/6 */
 FNIEMOP_DEF_1(iemOp_fnstenv, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fstenv m14/m28byte");
+    IEMOP_MNEMONIC(fstenv, "fstenv m14/m28byte");
     IEM_MC_BEGIN(3, 0);
     IEM_MC_ARG_CONST(IEMMODE,           enmEffOpSize, /*=*/ pVCpu->iem.s.enmEffOpSize,  0);
     IEM_MC_ARG(uint8_t,                 iEffSeg,                                    1);
@@ -13957,7 +7565,7 @@ FNIEMOP_DEF_1(iemOp_fnstenv, uint8_t, bRm)
 /** Opcode 0xd9 !11/7 */
 FNIEMOP_DEF_1(iemOp_fnstcw, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fnstcw m2byte");
+    IEMOP_MNEMONIC(fnstcw_m2byte, "fnstcw m2byte");
     IEM_MC_BEGIN(2, 0);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fcw);
@@ -13976,7 +7584,7 @@ FNIEMOP_DEF_1(iemOp_fnstcw, uint8_t, bRm)
 /** Opcode 0xd9 0xd0, 0xd9 0xd8-0xdf, ++?.  */
 FNIEMOP_DEF(iemOp_fnop)
 {
-    IEMOP_MNEMONIC("fnop");
+    IEMOP_MNEMONIC(fnop, "fnop");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 0);
@@ -13995,7 +7603,7 @@ FNIEMOP_DEF(iemOp_fnop)
 /** Opcode 0xd9 11/0 stN */
 FNIEMOP_DEF_1(iemOp_fld_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fld stN");
+    IEMOP_MNEMONIC(fld_stN, "fld stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     /** @todo Testcase: Check if this raises \#MF?  Intel mentioned it not. AMD
@@ -14024,7 +7632,7 @@ FNIEMOP_DEF_1(iemOp_fld_stN, uint8_t, bRm)
 /** Opcode 0xd9 11/3 stN */
 FNIEMOP_DEF_1(iemOp_fxch_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fxch stN");
+    IEMOP_MNEMONIC(fxch_stN, "fxch stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     /** @todo Testcase: Check if this raises \#MF?  Intel mentioned it not. AMD
@@ -14056,10 +7664,10 @@ FNIEMOP_DEF_1(iemOp_fxch_stN, uint8_t, bRm)
 /** Opcode 0xd9 11/4, 0xdd 11/2. */
 FNIEMOP_DEF_1(iemOp_fstp_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fstp st0,stN");
+    IEMOP_MNEMONIC(fstp_st0_stN, "fstp st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
-    /* fstp st0, st0 is frequendly used as an official 'ffreep st0' sequence. */
+    /* fstp st0, st0 is frequently used as an official 'ffreep st0' sequence. */
     uint8_t const iDstReg = bRm & X86_MODRM_RM_MASK;
     if (!iDstReg)
     {
@@ -14135,7 +7743,7 @@ FNIEMOP_DEF_1(iemOpHlpFpu_st0, PFNIEMAIMPLFPUR80UNARY, pfnAImpl)
 /** Opcode 0xd9 0xe0. */
 FNIEMOP_DEF(iemOp_fchs)
 {
-    IEMOP_MNEMONIC("fchs st0");
+    IEMOP_MNEMONIC(fchs_st0, "fchs st0");
     return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fchs_r80);
 }
 
@@ -14143,7 +7751,7 @@ FNIEMOP_DEF(iemOp_fchs)
 /** Opcode 0xd9 0xe1. */
 FNIEMOP_DEF(iemOp_fabs)
 {
-    IEMOP_MNEMONIC("fabs st0");
+    IEMOP_MNEMONIC(fabs_st0, "fabs st0");
     return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fabs_r80);
 }
 
@@ -14181,7 +7789,7 @@ FNIEMOP_DEF_1(iemOpHlpFpuNoStore_st0, PFNIEMAIMPLFPUR80UNARYFSW, pfnAImpl)
 /** Opcode 0xd9 0xe4. */
 FNIEMOP_DEF(iemOp_ftst)
 {
-    IEMOP_MNEMONIC("ftst st0");
+    IEMOP_MNEMONIC(ftst_st0, "ftst st0");
     return FNIEMOP_CALL_1(iemOpHlpFpuNoStore_st0, iemAImpl_ftst_r80);
 }
 
@@ -14189,7 +7797,7 @@ FNIEMOP_DEF(iemOp_ftst)
 /** Opcode 0xd9 0xe5. */
 FNIEMOP_DEF(iemOp_fxam)
 {
-    IEMOP_MNEMONIC("fxam st0");
+    IEMOP_MNEMONIC(fxam_st0, "fxam st0");
     return FNIEMOP_CALL_1(iemOpHlpFpuNoStore_st0, iemAImpl_fxam_r80);
 }
 
@@ -14226,7 +7834,7 @@ FNIEMOP_DEF_1(iemOpHlpFpuPushConstant, PFNIEMAIMPLFPUR80LDCONST, pfnAImpl)
 /** Opcode 0xd9 0xe8. */
 FNIEMOP_DEF(iemOp_fld1)
 {
-    IEMOP_MNEMONIC("fld1");
+    IEMOP_MNEMONIC(fld1, "fld1");
     return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fld1);
 }
 
@@ -14234,7 +7842,7 @@ FNIEMOP_DEF(iemOp_fld1)
 /** Opcode 0xd9 0xe9. */
 FNIEMOP_DEF(iemOp_fldl2t)
 {
-    IEMOP_MNEMONIC("fldl2t");
+    IEMOP_MNEMONIC(fldl2t, "fldl2t");
     return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldl2t);
 }
 
@@ -14242,14 +7850,14 @@ FNIEMOP_DEF(iemOp_fldl2t)
 /** Opcode 0xd9 0xea. */
 FNIEMOP_DEF(iemOp_fldl2e)
 {
-    IEMOP_MNEMONIC("fldl2e");
+    IEMOP_MNEMONIC(fldl2e, "fldl2e");
     return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldl2e);
 }
 
 /** Opcode 0xd9 0xeb. */
 FNIEMOP_DEF(iemOp_fldpi)
 {
-    IEMOP_MNEMONIC("fldpi");
+    IEMOP_MNEMONIC(fldpi, "fldpi");
     return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldpi);
 }
 
@@ -14257,14 +7865,14 @@ FNIEMOP_DEF(iemOp_fldpi)
 /** Opcode 0xd9 0xec. */
 FNIEMOP_DEF(iemOp_fldlg2)
 {
-    IEMOP_MNEMONIC("fldlg2");
+    IEMOP_MNEMONIC(fldlg2, "fldlg2");
     return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldlg2);
 }
 
 /** Opcode 0xd9 0xed. */
 FNIEMOP_DEF(iemOp_fldln2)
 {
-    IEMOP_MNEMONIC("fldln2");
+    IEMOP_MNEMONIC(fldln2, "fldln2");
     return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldln2);
 }
 
@@ -14272,7 +7880,7 @@ FNIEMOP_DEF(iemOp_fldln2)
 /** Opcode 0xd9 0xee. */
 FNIEMOP_DEF(iemOp_fldz)
 {
-    IEMOP_MNEMONIC("fldz");
+    IEMOP_MNEMONIC(fldz, "fldz");
     return FNIEMOP_CALL_1(iemOpHlpFpuPushConstant, iemAImpl_fldz);
 }
 
@@ -14280,42 +7888,36 @@ FNIEMOP_DEF(iemOp_fldz)
 /** Opcode 0xd9 0xf0. */
 FNIEMOP_DEF(iemOp_f2xm1)
 {
-    IEMOP_MNEMONIC("f2xm1 st0");
+    IEMOP_MNEMONIC(f2xm1_st0, "f2xm1 st0");
     return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_f2xm1_r80);
 }
 
 
-/** Opcode 0xd9 0xf1. */
-FNIEMOP_DEF(iemOp_fylx2)
-{
-    IEMOP_MNEMONIC("fylx2 st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fyl2x_r80);
-}
-
-
 /**
- * Common worker for FPU instructions working on ST0 and having two outputs, one
- * replacing ST0 and one pushed onto the stack.
+ * Common worker for FPU instructions working on STn and ST0, storing the result
+ * in STn, and popping the stack unless IE, DE or ZE was raised.
  *
  * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
  */
-FNIEMOP_DEF_1(iemOpHlpFpuReplace_st0_push, PFNIEMAIMPLFPUR80UNARYTWO, pfnAImpl)
+FNIEMOP_DEF_2(iemOpHlpFpu_stN_st0_pop, uint8_t, bRm, PFNIEMAIMPLFPUR80, pfnAImpl)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
-    IEM_MC_BEGIN(2, 1);
-    IEM_MC_LOCAL(IEMFPURESULTTWO,           FpuResTwo);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULTTWO,  pFpuResTwo, FpuResTwo,  0);
-    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value,              1);
+    IEM_MC_BEGIN(3, 1);
+    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
+    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,        FpuRes,     0);
+    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
+    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value2,                 2);
 
     IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
     IEM_MC_MAYBE_RAISE_FPU_XCPT();
+
     IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
-        IEM_MC_CALL_FPU_AIMPL_2(pfnAImpl, pFpuResTwo, pr80Value);
-        IEM_MC_PUSH_FPU_RESULT_TWO(FpuResTwo);
+    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, bRm & X86_MODRM_RM_MASK, pr80Value2, 0)
+        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pFpuRes, pr80Value1, pr80Value2);
+        IEM_MC_STORE_FPU_RESULT_THEN_POP(FpuRes, bRm & X86_MODRM_RM_MASK);
     IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO();
+        IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP(bRm & X86_MODRM_RM_MASK);
     IEM_MC_ENDIF();
     IEM_MC_ADVANCE_RIP();
 
@@ -14324,39 +7926,37 @@ FNIEMOP_DEF_1(iemOpHlpFpuReplace_st0_push, PFNIEMAIMPLFPUR80UNARYTWO, pfnAImpl)
 }
 
 
-/** Opcode 0xd9 0xf2. */
-FNIEMOP_DEF(iemOp_fptan)
+/** Opcode 0xd9 0xf1. */
+FNIEMOP_DEF(iemOp_fyl2x)
 {
-    IEMOP_MNEMONIC("fptan st0");
-    return FNIEMOP_CALL_1(iemOpHlpFpuReplace_st0_push, iemAImpl_fptan_r80_r80);
+    IEMOP_MNEMONIC(fyl2x_st0, "fyl2x st1,st0");
+    return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, 1, iemAImpl_fyl2x_r80_by_r80);
 }
 
 
 /**
- * Common worker for FPU instructions working on STn and ST0, storing the result
- * in STn, and popping the stack unless IE, DE or ZE was raised.
+ * Common worker for FPU instructions working on ST0 and having two outputs, one
+ * replacing ST0 and one pushed onto the stack.
  *
  * @param   pfnAImpl    Pointer to the instruction implementation (assembly).
  */
-FNIEMOP_DEF_2(iemOpHlpFpu_stN_st0_pop, uint8_t, bRm, PFNIEMAIMPLFPUR80, pfnAImpl)
+FNIEMOP_DEF_1(iemOpHlpFpuReplace_st0_push, PFNIEMAIMPLFPUR80UNARYTWO, pfnAImpl)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
-    IEM_MC_BEGIN(3, 1);
-    IEM_MC_LOCAL(IEMFPURESULT,          FpuRes);
-    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULT, pFpuRes,        FpuRes,     0);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value1,                 1);
-    IEM_MC_ARG(PCRTFLOAT80U,            pr80Value2,                 2);
+    IEM_MC_BEGIN(2, 1);
+    IEM_MC_LOCAL(IEMFPURESULTTWO,           FpuResTwo);
+    IEM_MC_ARG_LOCAL_REF(PIEMFPURESULTTWO,  pFpuResTwo, FpuResTwo,  0);
+    IEM_MC_ARG(PCRTFLOAT80U,                pr80Value,              1);
 
     IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
     IEM_MC_MAYBE_RAISE_FPU_XCPT();
-
     IEM_MC_PREPARE_FPU_USAGE();
-    IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80(pr80Value1, bRm & X86_MODRM_RM_MASK, pr80Value2, 0)
-        IEM_MC_CALL_FPU_AIMPL_3(pfnAImpl, pFpuRes, pr80Value1, pr80Value2);
-        IEM_MC_STORE_FPU_RESULT_THEN_POP(FpuRes, bRm & X86_MODRM_RM_MASK);
+    IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80(pr80Value, 0)
+        IEM_MC_CALL_FPU_AIMPL_2(pfnAImpl, pFpuResTwo, pr80Value);
+        IEM_MC_PUSH_FPU_RESULT_TWO(FpuResTwo);
     IEM_MC_ELSE()
-        IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP(bRm & X86_MODRM_RM_MASK);
+        IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO();
     IEM_MC_ENDIF();
     IEM_MC_ADVANCE_RIP();
 
@@ -14365,10 +7965,18 @@ FNIEMOP_DEF_2(iemOpHlpFpu_stN_st0_pop, uint8_t, bRm, PFNIEMAIMPLFPUR80, pfnAImpl
 }
 
 
+/** Opcode 0xd9 0xf2. */
+FNIEMOP_DEF(iemOp_fptan)
+{
+    IEMOP_MNEMONIC(fptan_st0, "fptan st0");
+    return FNIEMOP_CALL_1(iemOpHlpFpuReplace_st0_push, iemAImpl_fptan_r80_r80);
+}
+
+
 /** Opcode 0xd9 0xf3. */
 FNIEMOP_DEF(iemOp_fpatan)
 {
-    IEMOP_MNEMONIC("fpatan st1,st0");
+    IEMOP_MNEMONIC(fpatan_st1_st0, "fpatan st1,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, 1, iemAImpl_fpatan_r80_by_r80);
 }
 
@@ -14376,7 +7984,7 @@ FNIEMOP_DEF(iemOp_fpatan)
 /** Opcode 0xd9 0xf4. */
 FNIEMOP_DEF(iemOp_fxtract)
 {
-    IEMOP_MNEMONIC("fxtract st0");
+    IEMOP_MNEMONIC(fxtract_st0, "fxtract st0");
     return FNIEMOP_CALL_1(iemOpHlpFpuReplace_st0_push, iemAImpl_fxtract_r80_r80);
 }
 
@@ -14384,7 +7992,7 @@ FNIEMOP_DEF(iemOp_fxtract)
 /** Opcode 0xd9 0xf5. */
 FNIEMOP_DEF(iemOp_fprem1)
 {
-    IEMOP_MNEMONIC("fprem1 st0, st1");
+    IEMOP_MNEMONIC(fprem1_st0_st1, "fprem1 st0,st1");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, 1, iemAImpl_fprem1_r80_by_r80);
 }
 
@@ -14392,7 +8000,7 @@ FNIEMOP_DEF(iemOp_fprem1)
 /** Opcode 0xd9 0xf6. */
 FNIEMOP_DEF(iemOp_fdecstp)
 {
-    IEMOP_MNEMONIC("fdecstp");
+    IEMOP_MNEMONIC(fdecstp, "fdecstp");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     /* Note! C0, C2 and C3 are documented as undefined, we clear them. */
     /** @todo Testcase: Check whether FOP, FPUIP and FPUCS are affected by
@@ -14416,7 +8024,7 @@ FNIEMOP_DEF(iemOp_fdecstp)
 /** Opcode 0xd9 0xf7. */
 FNIEMOP_DEF(iemOp_fincstp)
 {
-    IEMOP_MNEMONIC("fincstp");
+    IEMOP_MNEMONIC(fincstp, "fincstp");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     /* Note! C0, C2 and C3 are documented as undefined, we clear them. */
     /** @todo Testcase: Check whether FOP, FPUIP and FPUCS are affected by
@@ -14440,7 +8048,7 @@ FNIEMOP_DEF(iemOp_fincstp)
 /** Opcode 0xd9 0xf8. */
 FNIEMOP_DEF(iemOp_fprem)
 {
-    IEMOP_MNEMONIC("fprem st0, st1");
+    IEMOP_MNEMONIC(fprem_st0_st1, "fprem st0,st1");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, 1, iemAImpl_fprem_r80_by_r80);
 }
 
@@ -14448,7 +8056,7 @@ FNIEMOP_DEF(iemOp_fprem)
 /** Opcode 0xd9 0xf9. */
 FNIEMOP_DEF(iemOp_fyl2xp1)
 {
-    IEMOP_MNEMONIC("fyl2xp1 st1,st0");
+    IEMOP_MNEMONIC(fyl2xp1_st1_st0, "fyl2xp1 st1,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, 1, iemAImpl_fyl2xp1_r80_by_r80);
 }
 
@@ -14456,7 +8064,7 @@ FNIEMOP_DEF(iemOp_fyl2xp1)
 /** Opcode 0xd9 0xfa. */
 FNIEMOP_DEF(iemOp_fsqrt)
 {
-    IEMOP_MNEMONIC("fsqrt st0");
+    IEMOP_MNEMONIC(fsqrt_st0, "fsqrt st0");
     return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fsqrt_r80);
 }
 
@@ -14464,7 +8072,7 @@ FNIEMOP_DEF(iemOp_fsqrt)
 /** Opcode 0xd9 0xfb. */
 FNIEMOP_DEF(iemOp_fsincos)
 {
-    IEMOP_MNEMONIC("fsincos st0");
+    IEMOP_MNEMONIC(fsincos_st0, "fsincos st0");
     return FNIEMOP_CALL_1(iemOpHlpFpuReplace_st0_push, iemAImpl_fsincos_r80_r80);
 }
 
@@ -14472,7 +8080,7 @@ FNIEMOP_DEF(iemOp_fsincos)
 /** Opcode 0xd9 0xfc. */
 FNIEMOP_DEF(iemOp_frndint)
 {
-    IEMOP_MNEMONIC("frndint st0");
+    IEMOP_MNEMONIC(frndint_st0, "frndint st0");
     return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_frndint_r80);
 }
 
@@ -14480,7 +8088,7 @@ FNIEMOP_DEF(iemOp_frndint)
 /** Opcode 0xd9 0xfd. */
 FNIEMOP_DEF(iemOp_fscale)
 {
-    IEMOP_MNEMONIC("fscale st0, st1");
+    IEMOP_MNEMONIC(fscale_st0_st1, "fscale st0,st1");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_stN, 1, iemAImpl_fscale_r80_by_r80);
 }
 
@@ -14488,7 +8096,7 @@ FNIEMOP_DEF(iemOp_fscale)
 /** Opcode 0xd9 0xfe. */
 FNIEMOP_DEF(iemOp_fsin)
 {
-    IEMOP_MNEMONIC("fsin st0");
+    IEMOP_MNEMONIC(fsin_st0, "fsin st0");
     return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fsin_r80);
 }
 
@@ -14496,7 +8104,7 @@ FNIEMOP_DEF(iemOp_fsin)
 /** Opcode 0xd9 0xff. */
 FNIEMOP_DEF(iemOp_fcos)
 {
-    IEMOP_MNEMONIC("fcos st0");
+    IEMOP_MNEMONIC(fcos_st0, "fcos st0");
     return FNIEMOP_CALL_1(iemOpHlpFpu_st0, iemAImpl_fcos_r80);
 }
 
@@ -14521,7 +8129,7 @@ IEM_STATIC const PFNIEMOP g_apfnEscF1_E0toFF[32] =
     /* 0xee */  iemOp_fldz,
     /* 0xef */  iemOp_Invalid,
     /* 0xf0 */  iemOp_f2xm1,
-    /* 0xf1 */  iemOp_fylx2,
+    /* 0xf1 */  iemOp_fyl2x,
     /* 0xf2 */  iemOp_fptan,
     /* 0xf3 */  iemOp_fpatan,
     /* 0xf4 */  iemOp_fxtract,
@@ -14539,7 +8147,9 @@ IEM_STATIC const PFNIEMOP g_apfnEscF1_E0toFF[32] =
 };
 
 
-/** Opcode 0xd9. */
+/**
+ * @opcode      0xd9
+ */
 FNIEMOP_DEF(iemOp_EscF1)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -14586,7 +8196,7 @@ FNIEMOP_DEF(iemOp_EscF1)
 /** Opcode 0xda 11/0. */
 FNIEMOP_DEF_1(iemOp_fcmovb_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcmovb st0,stN");
+    IEMOP_MNEMONIC(fcmovb_st0_stN, "fcmovb st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -14614,7 +8224,7 @@ FNIEMOP_DEF_1(iemOp_fcmovb_stN,  uint8_t, bRm)
 /** Opcode 0xda 11/1. */
 FNIEMOP_DEF_1(iemOp_fcmove_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcmove st0,stN");
+    IEMOP_MNEMONIC(fcmove_st0_stN, "fcmove st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -14642,7 +8252,7 @@ FNIEMOP_DEF_1(iemOp_fcmove_stN,  uint8_t, bRm)
 /** Opcode 0xda 11/2. */
 FNIEMOP_DEF_1(iemOp_fcmovbe_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcmovbe st0,stN");
+    IEMOP_MNEMONIC(fcmovbe_st0_stN, "fcmovbe st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -14670,7 +8280,7 @@ FNIEMOP_DEF_1(iemOp_fcmovbe_stN, uint8_t, bRm)
 /** Opcode 0xda 11/3. */
 FNIEMOP_DEF_1(iemOp_fcmovu_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcmovu st0,stN");
+    IEMOP_MNEMONIC(fcmovu_st0_stN, "fcmovu st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -14731,7 +8341,7 @@ FNIEMOP_DEF_1(iemOpHlpFpuNoStore_st0_stN_pop_pop, PFNIEMAIMPLFPUR80FSW, pfnAImpl
 /** Opcode 0xda 0xe9. */
 FNIEMOP_DEF(iemOp_fucompp)
 {
-    IEMOP_MNEMONIC("fucompp st0,stN");
+    IEMOP_MNEMONIC(fucompp_st0_stN, "fucompp st0,stN");
     return FNIEMOP_CALL_1(iemOpHlpFpuNoStore_st0_stN_pop_pop, iemAImpl_fucom_r80_by_r80);
 }
 
@@ -14776,7 +8386,7 @@ FNIEMOP_DEF_2(iemOpHlpFpu_st0_m32i, uint8_t, bRm, PFNIEMAIMPLFPUI32, pfnAImpl)
 /** Opcode 0xda !11/0. */
 FNIEMOP_DEF_1(iemOp_fiadd_m32i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fiadd m32i");
+    IEMOP_MNEMONIC(fiadd_m32i, "fiadd m32i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fiadd_r80_by_i32);
 }
 
@@ -14784,7 +8394,7 @@ FNIEMOP_DEF_1(iemOp_fiadd_m32i,  uint8_t, bRm)
 /** Opcode 0xda !11/1. */
 FNIEMOP_DEF_1(iemOp_fimul_m32i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fimul m32i");
+    IEMOP_MNEMONIC(fimul_m32i, "fimul m32i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fimul_r80_by_i32);
 }
 
@@ -14792,7 +8402,7 @@ FNIEMOP_DEF_1(iemOp_fimul_m32i,  uint8_t, bRm)
 /** Opcode 0xda !11/2. */
 FNIEMOP_DEF_1(iemOp_ficom_m32i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("ficom st0,m32i");
+    IEMOP_MNEMONIC(ficom_st0_m32i, "ficom st0,m32i");
 
     IEM_MC_BEGIN(3, 3);
     IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
@@ -14826,7 +8436,7 @@ FNIEMOP_DEF_1(iemOp_ficom_m32i,  uint8_t, bRm)
 /** Opcode 0xda !11/3. */
 FNIEMOP_DEF_1(iemOp_ficomp_m32i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("ficomp st0,m32i");
+    IEMOP_MNEMONIC(ficomp_st0_m32i, "ficomp st0,m32i");
 
     IEM_MC_BEGIN(3, 3);
     IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
@@ -14860,7 +8470,7 @@ FNIEMOP_DEF_1(iemOp_ficomp_m32i, uint8_t, bRm)
 /** Opcode 0xda !11/4. */
 FNIEMOP_DEF_1(iemOp_fisub_m32i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fisub m32i");
+    IEMOP_MNEMONIC(fisub_m32i, "fisub m32i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fisub_r80_by_i32);
 }
 
@@ -14868,7 +8478,7 @@ FNIEMOP_DEF_1(iemOp_fisub_m32i,  uint8_t, bRm)
 /** Opcode 0xda !11/5. */
 FNIEMOP_DEF_1(iemOp_fisubr_m32i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fisubr m32i");
+    IEMOP_MNEMONIC(fisubr_m32i, "fisubr m32i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fisubr_r80_by_i32);
 }
 
@@ -14876,7 +8486,7 @@ FNIEMOP_DEF_1(iemOp_fisubr_m32i, uint8_t, bRm)
 /** Opcode 0xda !11/6. */
 FNIEMOP_DEF_1(iemOp_fidiv_m32i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fidiv m32i");
+    IEMOP_MNEMONIC(fidiv_m32i, "fidiv m32i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fidiv_r80_by_i32);
 }
 
@@ -14884,12 +8494,14 @@ FNIEMOP_DEF_1(iemOp_fidiv_m32i,  uint8_t, bRm)
 /** Opcode 0xda !11/7. */
 FNIEMOP_DEF_1(iemOp_fidivr_m32i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fidivr m32i");
+    IEMOP_MNEMONIC(fidivr_m32i, "fidivr m32i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m32i, bRm, iemAImpl_fidivr_r80_by_i32);
 }
 
 
-/** Opcode 0xda. */
+/**
+ * @opcode      0xda
+ */
 FNIEMOP_DEF(iemOp_EscF2)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -14933,7 +8545,7 @@ FNIEMOP_DEF(iemOp_EscF2)
 /** Opcode 0xdb !11/0. */
 FNIEMOP_DEF_1(iemOp_fild_m32i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fild m32i");
+    IEMOP_MNEMONIC(fild_m32i, "fild m32i");
 
     IEM_MC_BEGIN(2, 3);
     IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
@@ -14966,7 +8578,7 @@ FNIEMOP_DEF_1(iemOp_fild_m32i, uint8_t, bRm)
 /** Opcode 0xdb !11/1. */
 FNIEMOP_DEF_1(iemOp_fisttp_m32i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fisttp m32i");
+    IEMOP_MNEMONIC(fisttp_m32i, "fisttp m32i");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -15002,7 +8614,7 @@ FNIEMOP_DEF_1(iemOp_fisttp_m32i, uint8_t, bRm)
 /** Opcode 0xdb !11/2. */
 FNIEMOP_DEF_1(iemOp_fist_m32i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fist m32i");
+    IEMOP_MNEMONIC(fist_m32i, "fist m32i");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -15038,7 +8650,7 @@ FNIEMOP_DEF_1(iemOp_fist_m32i, uint8_t, bRm)
 /** Opcode 0xdb !11/3. */
 FNIEMOP_DEF_1(iemOp_fistp_m32i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fisttp m32i");
+    IEMOP_MNEMONIC(fistp_m32i, "fistp m32i");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -15074,7 +8686,7 @@ FNIEMOP_DEF_1(iemOp_fistp_m32i, uint8_t, bRm)
 /** Opcode 0xdb !11/5. */
 FNIEMOP_DEF_1(iemOp_fld_m80r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fld m80r");
+    IEMOP_MNEMONIC(fld_m80r, "fld m80r");
 
     IEM_MC_BEGIN(2, 3);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
@@ -15107,7 +8719,7 @@ FNIEMOP_DEF_1(iemOp_fld_m80r, uint8_t, bRm)
 /** Opcode 0xdb !11/7. */
 FNIEMOP_DEF_1(iemOp_fstp_m80r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fstp m80r");
+    IEMOP_MNEMONIC(fstp_m80r, "fstp m80r");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -15143,7 +8755,7 @@ FNIEMOP_DEF_1(iemOp_fstp_m80r, uint8_t, bRm)
 /** Opcode 0xdb 11/0. */
 FNIEMOP_DEF_1(iemOp_fcmovnb_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcmovnb st0,stN");
+    IEMOP_MNEMONIC(fcmovnb_st0_stN, "fcmovnb st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -15171,7 +8783,7 @@ FNIEMOP_DEF_1(iemOp_fcmovnb_stN,  uint8_t, bRm)
 /** Opcode 0xdb 11/1. */
 FNIEMOP_DEF_1(iemOp_fcmovne_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcmovne st0,stN");
+    IEMOP_MNEMONIC(fcmovne_st0_stN, "fcmovne st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -15199,7 +8811,7 @@ FNIEMOP_DEF_1(iemOp_fcmovne_stN,  uint8_t, bRm)
 /** Opcode 0xdb 11/2. */
 FNIEMOP_DEF_1(iemOp_fcmovnbe_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcmovnbe st0,stN");
+    IEMOP_MNEMONIC(fcmovnbe_st0_stN, "fcmovnbe st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -15227,7 +8839,7 @@ FNIEMOP_DEF_1(iemOp_fcmovnbe_stN, uint8_t, bRm)
 /** Opcode 0xdb 11/3. */
 FNIEMOP_DEF_1(iemOp_fcmovnnu_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcmovnnu st0,stN");
+    IEMOP_MNEMONIC(fcmovnnu_st0_stN, "fcmovnnu st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -15255,7 +8867,7 @@ FNIEMOP_DEF_1(iemOp_fcmovnnu_stN, uint8_t, bRm)
 /** Opcode 0xdb 0xe0. */
 FNIEMOP_DEF(iemOp_fneni)
 {
-    IEMOP_MNEMONIC("fneni (8087/ign)");
+    IEMOP_MNEMONIC(fneni, "fneni (8087/ign)");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0,0);
     IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
@@ -15268,7 +8880,7 @@ FNIEMOP_DEF(iemOp_fneni)
 /** Opcode 0xdb 0xe1. */
 FNIEMOP_DEF(iemOp_fndisi)
 {
-    IEMOP_MNEMONIC("fndisi (8087/ign)");
+    IEMOP_MNEMONIC(fndisi, "fndisi (8087/ign)");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0,0);
     IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
@@ -15281,7 +8893,7 @@ FNIEMOP_DEF(iemOp_fndisi)
 /** Opcode 0xdb 0xe2. */
 FNIEMOP_DEF(iemOp_fnclex)
 {
-    IEMOP_MNEMONIC("fnclex");
+    IEMOP_MNEMONIC(fnclex, "fnclex");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0,0);
@@ -15297,7 +8909,7 @@ FNIEMOP_DEF(iemOp_fnclex)
 /** Opcode 0xdb 0xe3. */
 FNIEMOP_DEF(iemOp_fninit)
 {
-    IEMOP_MNEMONIC("fninit");
+    IEMOP_MNEMONIC(fninit, "fninit");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_finit, false /*fCheckXcpts*/);
 }
@@ -15306,7 +8918,7 @@ FNIEMOP_DEF(iemOp_fninit)
 /** Opcode 0xdb 0xe4. */
 FNIEMOP_DEF(iemOp_fnsetpm)
 {
-    IEMOP_MNEMONIC("fnsetpm (80287/ign)");   /* set protected mode on fpu. */
+    IEMOP_MNEMONIC(fnsetpm, "fnsetpm (80287/ign)");   /* set protected mode on fpu. */
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0,0);
     IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE();
@@ -15319,7 +8931,7 @@ FNIEMOP_DEF(iemOp_fnsetpm)
 /** Opcode 0xdb 0xe5. */
 FNIEMOP_DEF(iemOp_frstpm)
 {
-    IEMOP_MNEMONIC("frstpm (80287XL/ign)"); /* reset pm, back to real mode. */
+    IEMOP_MNEMONIC(frstpm, "frstpm (80287XL/ign)"); /* reset pm, back to real mode. */
 #if 0 /* #UDs on newer CPUs */
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0,0);
@@ -15336,7 +8948,7 @@ FNIEMOP_DEF(iemOp_frstpm)
 /** Opcode 0xdb 11/5. */
 FNIEMOP_DEF_1(iemOp_fucomi_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fucomi st0,stN");
+    IEMOP_MNEMONIC(fucomi_st0_stN, "fucomi st0,stN");
     return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_fcomi_fucomi, bRm & X86_MODRM_RM_MASK, iemAImpl_fucomi_r80_by_r80, false /*fPop*/);
 }
 
@@ -15344,12 +8956,14 @@ FNIEMOP_DEF_1(iemOp_fucomi_stN, uint8_t, bRm)
 /** Opcode 0xdb 11/6. */
 FNIEMOP_DEF_1(iemOp_fcomi_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcomi st0,stN");
+    IEMOP_MNEMONIC(fcomi_st0_stN, "fcomi st0,stN");
     return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_fcomi_fucomi, bRm & X86_MODRM_RM_MASK, iemAImpl_fcomi_r80_by_r80, false /*fPop*/);
 }
 
 
-/** Opcode 0xdb. */
+/**
+ * @opcode      0xdb
+ */
 FNIEMOP_DEF(iemOp_EscF3)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -15436,7 +9050,7 @@ FNIEMOP_DEF_2(iemOpHlpFpu_stN_st0, uint8_t, bRm, PFNIEMAIMPLFPUR80, pfnAImpl)
 /** Opcode 0xdc 11/0. */
 FNIEMOP_DEF_1(iemOp_fadd_stN_st0,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fadd stN,st0");
+    IEMOP_MNEMONIC(fadd_stN_st0, "fadd stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fadd_r80_by_r80);
 }
 
@@ -15444,7 +9058,7 @@ FNIEMOP_DEF_1(iemOp_fadd_stN_st0,   uint8_t, bRm)
 /** Opcode 0xdc 11/1. */
 FNIEMOP_DEF_1(iemOp_fmul_stN_st0,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fmul stN,st0");
+    IEMOP_MNEMONIC(fmul_stN_st0, "fmul stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fmul_r80_by_r80);
 }
 
@@ -15452,7 +9066,7 @@ FNIEMOP_DEF_1(iemOp_fmul_stN_st0,   uint8_t, bRm)
 /** Opcode 0xdc 11/4. */
 FNIEMOP_DEF_1(iemOp_fsubr_stN_st0,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsubr stN,st0");
+    IEMOP_MNEMONIC(fsubr_stN_st0, "fsubr stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fsubr_r80_by_r80);
 }
 
@@ -15460,7 +9074,7 @@ FNIEMOP_DEF_1(iemOp_fsubr_stN_st0,  uint8_t, bRm)
 /** Opcode 0xdc 11/5. */
 FNIEMOP_DEF_1(iemOp_fsub_stN_st0,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsub stN,st0");
+    IEMOP_MNEMONIC(fsub_stN_st0, "fsub stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fsub_r80_by_r80);
 }
 
@@ -15468,7 +9082,7 @@ FNIEMOP_DEF_1(iemOp_fsub_stN_st0,   uint8_t, bRm)
 /** Opcode 0xdc 11/6. */
 FNIEMOP_DEF_1(iemOp_fdivr_stN_st0,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdivr stN,st0");
+    IEMOP_MNEMONIC(fdivr_stN_st0, "fdivr stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fdivr_r80_by_r80);
 }
 
@@ -15476,7 +9090,7 @@ FNIEMOP_DEF_1(iemOp_fdivr_stN_st0,  uint8_t, bRm)
 /** Opcode 0xdc 11/7. */
 FNIEMOP_DEF_1(iemOp_fdiv_stN_st0,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdiv stN,st0");
+    IEMOP_MNEMONIC(fdiv_stN_st0, "fdiv stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0, bRm, iemAImpl_fdiv_r80_by_r80);
 }
 
@@ -15520,7 +9134,7 @@ FNIEMOP_DEF_2(iemOpHlpFpu_ST0_m64r, uint8_t, bRm, PFNIEMAIMPLFPUR64, pfnImpl)
 /** Opcode 0xdc !11/0. */
 FNIEMOP_DEF_1(iemOp_fadd_m64r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fadd m64r");
+    IEMOP_MNEMONIC(fadd_m64r, "fadd m64r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fadd_r80_by_r64);
 }
 
@@ -15528,7 +9142,7 @@ FNIEMOP_DEF_1(iemOp_fadd_m64r,  uint8_t, bRm)
 /** Opcode 0xdc !11/1. */
 FNIEMOP_DEF_1(iemOp_fmul_m64r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fmul m64r");
+    IEMOP_MNEMONIC(fmul_m64r, "fmul m64r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fmul_r80_by_r64);
 }
 
@@ -15536,7 +9150,7 @@ FNIEMOP_DEF_1(iemOp_fmul_m64r,  uint8_t, bRm)
 /** Opcode 0xdc !11/2. */
 FNIEMOP_DEF_1(iemOp_fcom_m64r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcom st0,m64r");
+    IEMOP_MNEMONIC(fcom_st0_m64r, "fcom st0,m64r");
 
     IEM_MC_BEGIN(3, 3);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
@@ -15570,7 +9184,7 @@ FNIEMOP_DEF_1(iemOp_fcom_m64r,  uint8_t, bRm)
 /** Opcode 0xdc !11/3. */
 FNIEMOP_DEF_1(iemOp_fcomp_m64r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcomp st0,m64r");
+    IEMOP_MNEMONIC(fcomp_st0_m64r, "fcomp st0,m64r");
 
     IEM_MC_BEGIN(3, 3);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
@@ -15604,7 +9218,7 @@ FNIEMOP_DEF_1(iemOp_fcomp_m64r, uint8_t, bRm)
 /** Opcode 0xdc !11/4. */
 FNIEMOP_DEF_1(iemOp_fsub_m64r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsub m64r");
+    IEMOP_MNEMONIC(fsub_m64r, "fsub m64r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fsub_r80_by_r64);
 }
 
@@ -15612,7 +9226,7 @@ FNIEMOP_DEF_1(iemOp_fsub_m64r,  uint8_t, bRm)
 /** Opcode 0xdc !11/5. */
 FNIEMOP_DEF_1(iemOp_fsubr_m64r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsubr m64r");
+    IEMOP_MNEMONIC(fsubr_m64r, "fsubr m64r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fsubr_r80_by_r64);
 }
 
@@ -15620,7 +9234,7 @@ FNIEMOP_DEF_1(iemOp_fsubr_m64r, uint8_t, bRm)
 /** Opcode 0xdc !11/6. */
 FNIEMOP_DEF_1(iemOp_fdiv_m64r,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdiv m64r");
+    IEMOP_MNEMONIC(fdiv_m64r, "fdiv m64r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fdiv_r80_by_r64);
 }
 
@@ -15628,12 +9242,14 @@ FNIEMOP_DEF_1(iemOp_fdiv_m64r,  uint8_t, bRm)
 /** Opcode 0xdc !11/7. */
 FNIEMOP_DEF_1(iemOp_fdivr_m64r, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdivr m64r");
+    IEMOP_MNEMONIC(fdivr_m64r, "fdivr m64r");
     return FNIEMOP_CALL_2(iemOpHlpFpu_ST0_m64r, bRm, iemAImpl_fdivr_r80_by_r64);
 }
 
 
-/** Opcode 0xdc. */
+/**
+ * @opcode      0xdc
+ */
 FNIEMOP_DEF(iemOp_EscF4)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -15675,7 +9291,7 @@ FNIEMOP_DEF(iemOp_EscF4)
  * @sa iemOp_fld_m32r */
 FNIEMOP_DEF_1(iemOp_fld_m64r,    uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fld m64r");
+    IEMOP_MNEMONIC(fld_m64r, "fld m64r");
 
     IEM_MC_BEGIN(2, 3);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffSrc);
@@ -15707,7 +9323,7 @@ FNIEMOP_DEF_1(iemOp_fld_m64r,    uint8_t, bRm)
 /** Opcode 0xdd !11/0. */
 FNIEMOP_DEF_1(iemOp_fisttp_m64i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fisttp m64i");
+    IEMOP_MNEMONIC(fisttp_m64i, "fisttp m64i");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -15743,7 +9359,7 @@ FNIEMOP_DEF_1(iemOp_fisttp_m64i, uint8_t, bRm)
 /** Opcode 0xdd !11/0. */
 FNIEMOP_DEF_1(iemOp_fst_m64r,    uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fst m64r");
+    IEMOP_MNEMONIC(fst_m64r, "fst m64r");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -15781,7 +9397,7 @@ FNIEMOP_DEF_1(iemOp_fst_m64r,    uint8_t, bRm)
 /** Opcode 0xdd !11/0. */
 FNIEMOP_DEF_1(iemOp_fstp_m64r,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fstp m64r");
+    IEMOP_MNEMONIC(fstp_m64r, "fstp m64r");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -15817,7 +9433,7 @@ FNIEMOP_DEF_1(iemOp_fstp_m64r,   uint8_t, bRm)
 /** Opcode 0xdd !11/0. */
 FNIEMOP_DEF_1(iemOp_frstor,      uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("frstor m94/108byte");
+    IEMOP_MNEMONIC(frstor, "frstor m94/108byte");
     IEM_MC_BEGIN(3, 0);
     IEM_MC_ARG_CONST(IEMMODE,           enmEffOpSize, /*=*/ pVCpu->iem.s.enmEffOpSize,  0);
     IEM_MC_ARG(uint8_t,                 iEffSeg,                                    1);
@@ -15836,7 +9452,7 @@ FNIEMOP_DEF_1(iemOp_frstor,      uint8_t, bRm)
 /** Opcode 0xdd !11/0. */
 FNIEMOP_DEF_1(iemOp_fnsave,      uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fnsave m94/108byte");
+    IEMOP_MNEMONIC(fnsave, "fnsave m94/108byte");
     IEM_MC_BEGIN(3, 0);
     IEM_MC_ARG_CONST(IEMMODE,           enmEffOpSize, /*=*/ pVCpu->iem.s.enmEffOpSize,  0);
     IEM_MC_ARG(uint8_t,                 iEffSeg,                                    1);
@@ -15855,7 +9471,7 @@ FNIEMOP_DEF_1(iemOp_fnsave,      uint8_t, bRm)
 /** Opcode 0xdd !11/0. */
 FNIEMOP_DEF_1(iemOp_fnstsw,      uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fnstsw m16");
+    IEMOP_MNEMONIC(fnstsw_m16, "fnstsw m16");
 
     IEM_MC_BEGIN(0, 2);
     IEM_MC_LOCAL(uint16_t, u16Tmp);
@@ -15881,7 +9497,7 @@ FNIEMOP_DEF_1(iemOp_fnstsw,      uint8_t, bRm)
 /** Opcode 0xdd 11/0. */
 FNIEMOP_DEF_1(iemOp_ffree_stN,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("ffree stN");
+    IEMOP_MNEMONIC(ffree_stN, "ffree stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     /* Note! C0, C1, C2 and C3 are documented as undefined, we leave the
              unmodified. */
@@ -15904,7 +9520,7 @@ FNIEMOP_DEF_1(iemOp_ffree_stN,   uint8_t, bRm)
 /** Opcode 0xdd 11/1. */
 FNIEMOP_DEF_1(iemOp_fst_stN,     uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fst st0,stN");
+    IEMOP_MNEMONIC(fst_st0_stN, "fst st0,stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 2);
@@ -15930,7 +9546,7 @@ FNIEMOP_DEF_1(iemOp_fst_stN,     uint8_t, bRm)
 /** Opcode 0xdd 11/3. */
 FNIEMOP_DEF_1(iemOp_fucom_stN_st0, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcom st0,stN");
+    IEMOP_MNEMONIC(fucom_st0_stN, "fucom st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpuNoStore_st0_stN, bRm, iemAImpl_fucom_r80_by_r80);
 }
 
@@ -15938,12 +9554,14 @@ FNIEMOP_DEF_1(iemOp_fucom_stN_st0, uint8_t, bRm)
 /** Opcode 0xdd 11/4. */
 FNIEMOP_DEF_1(iemOp_fucomp_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcomp st0,stN");
+    IEMOP_MNEMONIC(fucomp_st0_stN, "fucomp st0,stN");
     return FNIEMOP_CALL_2(iemOpHlpFpuNoStore_st0_stN_pop, bRm, iemAImpl_fucom_r80_by_r80);
 }
 
 
-/** Opcode 0xdd. */
+/**
+ * @opcode      0xdd
+ */
 FNIEMOP_DEF(iemOp_EscF5)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -15984,7 +9602,7 @@ FNIEMOP_DEF(iemOp_EscF5)
 /** Opcode 0xde 11/0. */
 FNIEMOP_DEF_1(iemOp_faddp_stN_st0, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("faddp stN,st0");
+    IEMOP_MNEMONIC(faddp_stN_st0, "faddp stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fadd_r80_by_r80);
 }
 
@@ -15992,7 +9610,7 @@ FNIEMOP_DEF_1(iemOp_faddp_stN_st0, uint8_t, bRm)
 /** Opcode 0xde 11/0. */
 FNIEMOP_DEF_1(iemOp_fmulp_stN_st0, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fmulp stN,st0");
+    IEMOP_MNEMONIC(fmulp_stN_st0, "fmulp stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fmul_r80_by_r80);
 }
 
@@ -16000,7 +9618,7 @@ FNIEMOP_DEF_1(iemOp_fmulp_stN_st0, uint8_t, bRm)
 /** Opcode 0xde 0xd9. */
 FNIEMOP_DEF(iemOp_fcompp)
 {
-    IEMOP_MNEMONIC("fucompp st0,stN");
+    IEMOP_MNEMONIC(fcompp_st0_stN, "fcompp st0,stN");
     return FNIEMOP_CALL_1(iemOpHlpFpuNoStore_st0_stN_pop_pop, iemAImpl_fcom_r80_by_r80);
 }
 
@@ -16008,7 +9626,7 @@ FNIEMOP_DEF(iemOp_fcompp)
 /** Opcode 0xde 11/4. */
 FNIEMOP_DEF_1(iemOp_fsubrp_stN_st0, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsubrp stN,st0");
+    IEMOP_MNEMONIC(fsubrp_stN_st0, "fsubrp stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fsubr_r80_by_r80);
 }
 
@@ -16016,7 +9634,7 @@ FNIEMOP_DEF_1(iemOp_fsubrp_stN_st0, uint8_t, bRm)
 /** Opcode 0xde 11/5. */
 FNIEMOP_DEF_1(iemOp_fsubp_stN_st0, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fsubp stN,st0");
+    IEMOP_MNEMONIC(fsubp_stN_st0, "fsubp stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fsub_r80_by_r80);
 }
 
@@ -16024,7 +9642,7 @@ FNIEMOP_DEF_1(iemOp_fsubp_stN_st0, uint8_t, bRm)
 /** Opcode 0xde 11/6. */
 FNIEMOP_DEF_1(iemOp_fdivrp_stN_st0, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdivrp stN,st0");
+    IEMOP_MNEMONIC(fdivrp_stN_st0, "fdivrp stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fdivr_r80_by_r80);
 }
 
@@ -16032,7 +9650,7 @@ FNIEMOP_DEF_1(iemOp_fdivrp_stN_st0, uint8_t, bRm)
 /** Opcode 0xde 11/7. */
 FNIEMOP_DEF_1(iemOp_fdivp_stN_st0, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fdivp stN,st0");
+    IEMOP_MNEMONIC(fdivp_stN_st0, "fdivp stN,st0");
     return FNIEMOP_CALL_2(iemOpHlpFpu_stN_st0_pop, bRm, iemAImpl_fdiv_r80_by_r80);
 }
 
@@ -16077,7 +9695,7 @@ FNIEMOP_DEF_2(iemOpHlpFpu_st0_m16i, uint8_t, bRm, PFNIEMAIMPLFPUI16, pfnAImpl)
 /** Opcode 0xde !11/0. */
 FNIEMOP_DEF_1(iemOp_fiadd_m16i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fiadd m16i");
+    IEMOP_MNEMONIC(fiadd_m16i, "fiadd m16i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fiadd_r80_by_i16);
 }
 
@@ -16085,7 +9703,7 @@ FNIEMOP_DEF_1(iemOp_fiadd_m16i,  uint8_t, bRm)
 /** Opcode 0xde !11/1. */
 FNIEMOP_DEF_1(iemOp_fimul_m16i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fimul m16i");
+    IEMOP_MNEMONIC(fimul_m16i, "fimul m16i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fimul_r80_by_i16);
 }
 
@@ -16093,7 +9711,7 @@ FNIEMOP_DEF_1(iemOp_fimul_m16i,  uint8_t, bRm)
 /** Opcode 0xde !11/2. */
 FNIEMOP_DEF_1(iemOp_ficom_m16i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("ficom st0,m16i");
+    IEMOP_MNEMONIC(ficom_st0_m16i, "ficom st0,m16i");
 
     IEM_MC_BEGIN(3, 3);
     IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
@@ -16127,7 +9745,7 @@ FNIEMOP_DEF_1(iemOp_ficom_m16i,  uint8_t, bRm)
 /** Opcode 0xde !11/3. */
 FNIEMOP_DEF_1(iemOp_ficomp_m16i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("ficomp st0,m16i");
+    IEMOP_MNEMONIC(ficomp_st0_m16i, "ficomp st0,m16i");
 
     IEM_MC_BEGIN(3, 3);
     IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
@@ -16161,7 +9779,7 @@ FNIEMOP_DEF_1(iemOp_ficomp_m16i, uint8_t, bRm)
 /** Opcode 0xde !11/4. */
 FNIEMOP_DEF_1(iemOp_fisub_m16i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fisub m16i");
+    IEMOP_MNEMONIC(fisub_m16i, "fisub m16i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fisub_r80_by_i16);
 }
 
@@ -16169,7 +9787,7 @@ FNIEMOP_DEF_1(iemOp_fisub_m16i,  uint8_t, bRm)
 /** Opcode 0xde !11/5. */
 FNIEMOP_DEF_1(iemOp_fisubr_m16i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fisubr m16i");
+    IEMOP_MNEMONIC(fisubr_m16i, "fisubr m16i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fisubr_r80_by_i16);
 }
 
@@ -16177,7 +9795,7 @@ FNIEMOP_DEF_1(iemOp_fisubr_m16i, uint8_t, bRm)
 /** Opcode 0xde !11/6. */
 FNIEMOP_DEF_1(iemOp_fidiv_m16i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fiadd m16i");
+    IEMOP_MNEMONIC(fidiv_m16i, "fidiv m16i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fidiv_r80_by_i16);
 }
 
@@ -16185,12 +9803,14 @@ FNIEMOP_DEF_1(iemOp_fidiv_m16i,  uint8_t, bRm)
 /** Opcode 0xde !11/7. */
 FNIEMOP_DEF_1(iemOp_fidivr_m16i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fiadd m16i");
+    IEMOP_MNEMONIC(fidivr_m16i, "fidivr m16i");
     return FNIEMOP_CALL_2(iemOpHlpFpu_st0_m16i, bRm, iemAImpl_fidivr_r80_by_i16);
 }
 
 
-/** Opcode 0xde. */
+/**
+ * @opcode      0xde
+ */
 FNIEMOP_DEF(iemOp_EscF6)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -16234,7 +9854,7 @@ FNIEMOP_DEF(iemOp_EscF6)
  * Undocument instruction, assumed to work like ffree + fincstp.  */
 FNIEMOP_DEF_1(iemOp_ffreep_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("ffreep stN");
+    IEMOP_MNEMONIC(ffreep_stN, "ffreep stN");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 0);
@@ -16256,7 +9876,7 @@ FNIEMOP_DEF_1(iemOp_ffreep_stN, uint8_t, bRm)
 /** Opcode 0xdf 0xe0. */
 FNIEMOP_DEF(iemOp_fnstsw_ax)
 {
-    IEMOP_MNEMONIC("fnstsw ax");
+    IEMOP_MNEMONIC(fnstsw_ax, "fnstsw ax");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
 
     IEM_MC_BEGIN(0, 1);
@@ -16274,7 +9894,7 @@ FNIEMOP_DEF(iemOp_fnstsw_ax)
 /** Opcode 0xdf 11/5. */
 FNIEMOP_DEF_1(iemOp_fucomip_st0_stN, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcomip st0,stN");
+    IEMOP_MNEMONIC(fucomip_st0_stN, "fucomip st0,stN");
     return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_fcomi_fucomi, bRm & X86_MODRM_RM_MASK, iemAImpl_fcomi_r80_by_r80, true /*fPop*/);
 }
 
@@ -16282,7 +9902,7 @@ FNIEMOP_DEF_1(iemOp_fucomip_st0_stN, uint8_t, bRm)
 /** Opcode 0xdf 11/6. */
 FNIEMOP_DEF_1(iemOp_fcomip_st0_stN,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fcomip st0,stN");
+    IEMOP_MNEMONIC(fcomip_st0_stN, "fcomip st0,stN");
     return IEM_MC_DEFER_TO_CIMPL_3(iemCImpl_fcomi_fucomi, bRm & X86_MODRM_RM_MASK, iemAImpl_fcomi_r80_by_r80, true /*fPop*/);
 }
 
@@ -16290,7 +9910,7 @@ FNIEMOP_DEF_1(iemOp_fcomip_st0_stN,  uint8_t, bRm)
 /** Opcode 0xdf !11/0. */
 FNIEMOP_DEF_1(iemOp_fild_m16i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fild m16i");
+    IEMOP_MNEMONIC(fild_m16i, "fild m16i");
 
     IEM_MC_BEGIN(2, 3);
     IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
@@ -16323,7 +9943,7 @@ FNIEMOP_DEF_1(iemOp_fild_m16i, uint8_t, bRm)
 /** Opcode 0xdf !11/1. */
 FNIEMOP_DEF_1(iemOp_fisttp_m16i, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fisttp m16i");
+    IEMOP_MNEMONIC(fisttp_m16i, "fisttp m16i");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -16359,7 +9979,7 @@ FNIEMOP_DEF_1(iemOp_fisttp_m16i, uint8_t, bRm)
 /** Opcode 0xdf !11/2. */
 FNIEMOP_DEF_1(iemOp_fist_m16i,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fistp m16i");
+    IEMOP_MNEMONIC(fist_m16i, "fist m16i");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -16395,7 +10015,7 @@ FNIEMOP_DEF_1(iemOp_fist_m16i,   uint8_t, bRm)
 /** Opcode 0xdf !11/3. */
 FNIEMOP_DEF_1(iemOp_fistp_m16i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fistp m16i");
+    IEMOP_MNEMONIC(fistp_m16i, "fistp m16i");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -16435,7 +10055,7 @@ FNIEMOP_STUB_1(iemOp_fbld_m80d,   uint8_t, bRm);
 /** Opcode 0xdf !11/5. */
 FNIEMOP_DEF_1(iemOp_fild_m64i,   uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fild m64i");
+    IEMOP_MNEMONIC(fild_m64i, "fild m64i");
 
     IEM_MC_BEGIN(2, 3);
     IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
@@ -16472,7 +10092,7 @@ FNIEMOP_STUB_1(iemOp_fbstp_m80d,  uint8_t, bRm);
 /** Opcode 0xdf !11/7. */
 FNIEMOP_DEF_1(iemOp_fistp_m64i,  uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("fistp m64i");
+    IEMOP_MNEMONIC(fistp_m64i, "fistp m64i");
     IEM_MC_BEGIN(3, 2);
     IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
     IEM_MC_LOCAL(uint16_t,              u16Fsw);
@@ -16505,7 +10125,9 @@ FNIEMOP_DEF_1(iemOp_fistp_m64i,  uint8_t, bRm)
 }
 
 
-/** Opcode 0xdf. */
+/**
+ * @opcode      0xdf
+ */
 FNIEMOP_DEF(iemOp_EscF7)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -16544,10 +10166,12 @@ FNIEMOP_DEF(iemOp_EscF7)
 }
 
 
-/** Opcode 0xe0. */
+/**
+ * @opcode      0xe0
+ */
 FNIEMOP_DEF(iemOp_loopne_Jb)
 {
-    IEMOP_MNEMONIC("loopne Jb");
+    IEMOP_MNEMONIC(loopne_Jb, "loopne Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -16592,10 +10216,12 @@ FNIEMOP_DEF(iemOp_loopne_Jb)
 }
 
 
-/** Opcode 0xe1. */
+/**
+ * @opcode      0xe1
+ */
 FNIEMOP_DEF(iemOp_loope_Jb)
 {
-    IEMOP_MNEMONIC("loope Jb");
+    IEMOP_MNEMONIC(loope_Jb, "loope Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -16640,10 +10266,12 @@ FNIEMOP_DEF(iemOp_loope_Jb)
 }
 
 
-/** Opcode 0xe2. */
+/**
+ * @opcode      0xe2
+ */
 FNIEMOP_DEF(iemOp_loop_Jb)
 {
-    IEMOP_MNEMONIC("loop Jb");
+    IEMOP_MNEMONIC(loop_Jb, "loop Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -16715,10 +10343,12 @@ FNIEMOP_DEF(iemOp_loop_Jb)
 }
 
 
-/** Opcode 0xe3. */
+/**
+ * @opcode      0xe3
+ */
 FNIEMOP_DEF(iemOp_jecxz_Jb)
 {
-    IEMOP_MNEMONIC("jecxz Jb");
+    IEMOP_MNEMONIC(jecxz_Jb, "jecxz Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -16763,7 +10393,7 @@ FNIEMOP_DEF(iemOp_jecxz_Jb)
 /** Opcode 0xe4 */
 FNIEMOP_DEF(iemOp_in_AL_Ib)
 {
-    IEMOP_MNEMONIC("in eAX,Ib");
+    IEMOP_MNEMONIC(in_AL_Ib, "in AL,Ib");
     uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_in, u8Imm, 1);
@@ -16773,7 +10403,7 @@ FNIEMOP_DEF(iemOp_in_AL_Ib)
 /** Opcode 0xe5 */
 FNIEMOP_DEF(iemOp_in_eAX_Ib)
 {
-    IEMOP_MNEMONIC("in eAX,Ib");
+    IEMOP_MNEMONIC(in_eAX_Ib, "in eAX,Ib");
     uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_in, u8Imm, pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
@@ -16783,7 +10413,7 @@ FNIEMOP_DEF(iemOp_in_eAX_Ib)
 /** Opcode 0xe6 */
 FNIEMOP_DEF(iemOp_out_Ib_AL)
 {
-    IEMOP_MNEMONIC("out Ib,AL");
+    IEMOP_MNEMONIC(out_Ib_AL, "out Ib,AL");
     uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_out, u8Imm, 1);
@@ -16793,17 +10423,19 @@ FNIEMOP_DEF(iemOp_out_Ib_AL)
 /** Opcode 0xe7 */
 FNIEMOP_DEF(iemOp_out_Ib_eAX)
 {
-    IEMOP_MNEMONIC("out Ib,eAX");
+    IEMOP_MNEMONIC(out_Ib_eAX, "out Ib,eAX");
     uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_out, u8Imm, pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
 }
 
 
-/** Opcode 0xe8. */
+/**
+ * @opcode      0xe8
+ */
 FNIEMOP_DEF(iemOp_call_Jv)
 {
-    IEMOP_MNEMONIC("call Jv");
+    IEMOP_MNEMONIC(call_Jv, "call Jv");
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     switch (pVCpu->iem.s.enmEffOpSize)
     {
@@ -16830,10 +10462,12 @@ FNIEMOP_DEF(iemOp_call_Jv)
 }
 
 
-/** Opcode 0xe9. */
+/**
+ * @opcode      0xe9
+ */
 FNIEMOP_DEF(iemOp_jmp_Jv)
 {
-    IEMOP_MNEMONIC("jmp Jv");
+    IEMOP_MNEMONIC(jmp_Jv, "jmp Jv");
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     switch (pVCpu->iem.s.enmEffOpSize)
     {
@@ -16861,10 +10495,12 @@ FNIEMOP_DEF(iemOp_jmp_Jv)
 }
 
 
-/** Opcode 0xea. */
+/**
+ * @opcode      0xea
+ */
 FNIEMOP_DEF(iemOp_jmp_Ap)
 {
-    IEMOP_MNEMONIC("jmp Ap");
+    IEMOP_MNEMONIC(jmp_Ap, "jmp Ap");
     IEMOP_HLP_NO_64BIT();
 
     /* Decode the far pointer address and pass it on to the far call C implementation. */
@@ -16879,10 +10515,12 @@ FNIEMOP_DEF(iemOp_jmp_Ap)
 }
 
 
-/** Opcode 0xeb. */
+/**
+ * @opcode      0xeb
+ */
 FNIEMOP_DEF(iemOp_jmp_Jb)
 {
-    IEMOP_MNEMONIC("jmp Jb");
+    IEMOP_MNEMONIC(jmp_Jb, "jmp Jb");
     int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(&i8Imm);
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
@@ -16897,7 +10535,7 @@ FNIEMOP_DEF(iemOp_jmp_Jb)
 /** Opcode 0xec */
 FNIEMOP_DEF(iemOp_in_AL_DX)
 {
-    IEMOP_MNEMONIC("in  AL,DX");
+    IEMOP_MNEMONIC(in_AL_DX, "in  AL,DX");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_in_eAX_DX, 1);
 }
@@ -16906,7 +10544,7 @@ FNIEMOP_DEF(iemOp_in_AL_DX)
 /** Opcode 0xed */
 FNIEMOP_DEF(iemOp_eAX_DX)
 {
-    IEMOP_MNEMONIC("in  eAX,DX");
+    IEMOP_MNEMONIC(in_eAX_DX, "in  eAX,DX");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_in_eAX_DX, pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
 }
@@ -16915,7 +10553,7 @@ FNIEMOP_DEF(iemOp_eAX_DX)
 /** Opcode 0xee */
 FNIEMOP_DEF(iemOp_out_DX_AL)
 {
-    IEMOP_MNEMONIC("out DX,AL");
+    IEMOP_MNEMONIC(out_DX_AL, "out DX,AL");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_out_DX_eAX, 1);
 }
@@ -16924,13 +10562,15 @@ FNIEMOP_DEF(iemOp_out_DX_AL)
 /** Opcode 0xef */
 FNIEMOP_DEF(iemOp_out_DX_eAX)
 {
-    IEMOP_MNEMONIC("out DX,eAX");
+    IEMOP_MNEMONIC(out_DX_eAX, "out DX,eAX");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_out_DX_eAX, pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
 }
 
 
-/** Opcode 0xf0. */
+/**
+ * @opcode      0xf0
+ */
 FNIEMOP_DEF(iemOp_lock)
 {
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("lock");
@@ -16941,17 +10581,22 @@ FNIEMOP_DEF(iemOp_lock)
 }
 
 
-/** Opcode 0xf1. */
-FNIEMOP_DEF(iemOp_int_1)
+/**
+ * @opcode      0xf1
+ */
+FNIEMOP_DEF(iemOp_int1)
 {
-    IEMOP_MNEMONIC("int1"); /* icebp */
+    IEMOP_MNEMONIC(int1, "int1"); /* icebp */
     IEMOP_HLP_MIN_386(); /** @todo does not generate #UD on 286, or so they say... */
     /** @todo testcase! */
+    IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_ICEBP, SVM_EXIT_ICEBP, 0, 0);
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_int, X86_XCPT_DB, false /*fIsBpInstr*/);
 }
 
 
-/** Opcode 0xf2. */
+/**
+ * @opcode      0xf2
+ */
 FNIEMOP_DEF(iemOp_repne)
 {
     /* This overrides any previous REPE prefix. */
@@ -16959,12 +10604,18 @@ FNIEMOP_DEF(iemOp_repne)
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("repne");
     pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REPNZ;
 
+    /* For the 4 entry opcode tables, REPNZ overrides any previous
+       REPZ and operand size prefixes. */
+    pVCpu->iem.s.idxPrefix = 3;
+
     uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
     return FNIEMOP_CALL(g_apfnOneByteMap[b]);
 }
 
 
-/** Opcode 0xf3. */
+/**
+ * @opcode      0xf3
+ */
 FNIEMOP_DEF(iemOp_repe)
 {
     /* This overrides any previous REPNE prefix. */
@@ -16972,12 +10623,18 @@ FNIEMOP_DEF(iemOp_repe)
     IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE("repe");
     pVCpu->iem.s.fPrefixes |= IEM_OP_PRF_REPZ;
 
+    /* For the 4 entry opcode tables, REPNZ overrides any previous
+       REPNZ and operand size prefixes. */
+    pVCpu->iem.s.idxPrefix = 2;
+
     uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
     return FNIEMOP_CALL(g_apfnOneByteMap[b]);
 }
 
 
-/** Opcode 0xf4. */
+/**
+ * @opcode      0xf4
+ */
 FNIEMOP_DEF(iemOp_hlt)
 {
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
@@ -16985,10 +10642,12 @@ FNIEMOP_DEF(iemOp_hlt)
 }
 
 
-/** Opcode 0xf5. */
+/**
+ * @opcode      0xf5
+ */
 FNIEMOP_DEF(iemOp_cmc)
 {
-    IEMOP_MNEMONIC("cmc");
+    IEMOP_MNEMONIC(cmc, "cmc");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0, 0);
     IEM_MC_FLIP_EFL_BIT(X86_EFL_CF);
@@ -17126,7 +10785,7 @@ FNIEMOP_DEF_2(iemOpCommonUnaryEv, uint8_t, bRm, PCIEMOPUNARYSIZES, pImpl)
 /** Opcode 0xf6 /0. */
 FNIEMOP_DEF_1(iemOp_grp3_test_Eb, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("test Eb,Ib");
+    IEMOP_MNEMONIC(test_Eb_Ib, "test Eb,Ib");
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
 
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
@@ -17174,7 +10833,7 @@ FNIEMOP_DEF_1(iemOp_grp3_test_Eb, uint8_t, bRm)
 /** Opcode 0xf7 /0. */
 FNIEMOP_DEF_1(iemOp_grp3_test_Ev, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("test Ev,Iv");
+    IEMOP_MNEMONIC(test_Ev_Iv, "test Ev,Iv");
     IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF);
 
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
@@ -17549,7 +11208,9 @@ FNIEMOP_DEF_2(iemOpCommonGrp3MulDivEv, uint8_t, bRm, PCIEMOPMULDIVSIZES, pImpl)
     }
 }
 
-/** Opcode 0xf6. */
+/**
+ * @opcode      0xf6
+ */
 FNIEMOP_DEF(iemOp_Grp3_Eb)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -17561,25 +11222,25 @@ FNIEMOP_DEF(iemOp_Grp3_Eb)
 /** @todo testcase: Present on <=386, most 486 (not early), Pentiums, and current CPUs too. CPUUNDOC.EXE */
             return IEMOP_RAISE_INVALID_OPCODE();
         case 2:
-            IEMOP_MNEMONIC("not Eb");
+            IEMOP_MNEMONIC(not_Eb, "not Eb");
             return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_not);
         case 3:
-            IEMOP_MNEMONIC("neg Eb");
+            IEMOP_MNEMONIC(neg_Eb, "neg Eb");
             return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_neg);
         case 4:
-            IEMOP_MNEMONIC("mul Eb");
+            IEMOP_MNEMONIC(mul_Eb, "mul Eb");
             IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
             return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, iemAImpl_mul_u8);
         case 5:
-            IEMOP_MNEMONIC("imul Eb");
+            IEMOP_MNEMONIC(imul_Eb, "imul Eb");
             IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
             return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, iemAImpl_imul_u8);
         case 6:
-            IEMOP_MNEMONIC("div Eb");
+            IEMOP_MNEMONIC(div_Eb, "div Eb");
             IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_OF | X86_EFL_CF);
             return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, iemAImpl_div_u8);
         case 7:
-            IEMOP_MNEMONIC("idiv Eb");
+            IEMOP_MNEMONIC(idiv_Eb, "idiv Eb");
             IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_OF | X86_EFL_CF);
             return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, iemAImpl_idiv_u8);
         IEM_NOT_REACHED_DEFAULT_CASE_RET();
@@ -17587,7 +11248,9 @@ FNIEMOP_DEF(iemOp_Grp3_Eb)
 }
 
 
-/** Opcode 0xf7. */
+/**
+ * @opcode      0xf7
+ */
 FNIEMOP_DEF(iemOp_Grp3_Ev)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
@@ -17599,25 +11262,25 @@ FNIEMOP_DEF(iemOp_Grp3_Ev)
 /** @todo testcase: Present on <=386, most 486 (not early), Pentiums, and current CPUs too. CPUUNDOC.EXE */
             return IEMOP_RAISE_INVALID_OPCODE();
         case 2:
-            IEMOP_MNEMONIC("not Ev");
+            IEMOP_MNEMONIC(not_Ev, "not Ev");
             return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_not);
         case 3:
-            IEMOP_MNEMONIC("neg Ev");
+            IEMOP_MNEMONIC(neg_Ev, "neg Ev");
             return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_neg);
         case 4:
-            IEMOP_MNEMONIC("mul Ev");
+            IEMOP_MNEMONIC(mul_Ev, "mul Ev");
             IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
             return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_mul);
         case 5:
-            IEMOP_MNEMONIC("imul Ev");
+            IEMOP_MNEMONIC(imul_Ev, "imul Ev");
             IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
             return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_imul);
         case 6:
-            IEMOP_MNEMONIC("div Ev");
+            IEMOP_MNEMONIC(div_Ev, "div Ev");
             IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_OF | X86_EFL_CF);
             return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_div);
         case 7:
-            IEMOP_MNEMONIC("idiv Ev");
+            IEMOP_MNEMONIC(idiv_Ev, "idiv Ev");
             IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_OF | X86_EFL_CF);
             return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_idiv);
         IEM_NOT_REACHED_DEFAULT_CASE_RET();
@@ -17625,10 +11288,12 @@ FNIEMOP_DEF(iemOp_Grp3_Ev)
 }
 
 
-/** Opcode 0xf8. */
+/**
+ * @opcode      0xf8
+ */
 FNIEMOP_DEF(iemOp_clc)
 {
-    IEMOP_MNEMONIC("clc");
+    IEMOP_MNEMONIC(clc, "clc");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0, 0);
     IEM_MC_CLEAR_EFL_BIT(X86_EFL_CF);
@@ -17638,10 +11303,12 @@ FNIEMOP_DEF(iemOp_clc)
 }
 
 
-/** Opcode 0xf9. */
+/**
+ * @opcode      0xf9
+ */
 FNIEMOP_DEF(iemOp_stc)
 {
-    IEMOP_MNEMONIC("stc");
+    IEMOP_MNEMONIC(stc, "stc");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0, 0);
     IEM_MC_SET_EFL_BIT(X86_EFL_CF);
@@ -17651,10 +11318,12 @@ FNIEMOP_DEF(iemOp_stc)
 }
 
 
-/** Opcode 0xfa. */
+/**
+ * @opcode      0xfa
+ */
 FNIEMOP_DEF(iemOp_cli)
 {
-    IEMOP_MNEMONIC("cli");
+    IEMOP_MNEMONIC(cli, "cli");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_cli);
 }
@@ -17662,16 +11331,18 @@ FNIEMOP_DEF(iemOp_cli)
 
 FNIEMOP_DEF(iemOp_sti)
 {
-    IEMOP_MNEMONIC("sti");
+    IEMOP_MNEMONIC(sti, "sti");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_sti);
 }
 
 
-/** Opcode 0xfc. */
+/**
+ * @opcode      0xfc
+ */
 FNIEMOP_DEF(iemOp_cld)
 {
-    IEMOP_MNEMONIC("cld");
+    IEMOP_MNEMONIC(cld, "cld");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0, 0);
     IEM_MC_CLEAR_EFL_BIT(X86_EFL_DF);
@@ -17681,10 +11352,12 @@ FNIEMOP_DEF(iemOp_cld)
 }
 
 
-/** Opcode 0xfd. */
+/**
+ * @opcode      0xfd
+ */
 FNIEMOP_DEF(iemOp_std)
 {
-    IEMOP_MNEMONIC("std");
+    IEMOP_MNEMONIC(std, "std");
     IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
     IEM_MC_BEGIN(0, 0);
     IEM_MC_SET_EFL_BIT(X86_EFL_DF);
@@ -17694,20 +11367,22 @@ FNIEMOP_DEF(iemOp_std)
 }
 
 
-/** Opcode 0xfe. */
+/**
+ * @opcode      0xfe
+ */
 FNIEMOP_DEF(iemOp_Grp4)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
     {
         case 0:
-            IEMOP_MNEMONIC("inc Ev");
+            IEMOP_MNEMONIC(inc_Eb, "inc Eb");
             return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_inc);
         case 1:
-            IEMOP_MNEMONIC("dec Ev");
+            IEMOP_MNEMONIC(dec_Eb, "dec Eb");
             return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_dec);
         default:
-            IEMOP_MNEMONIC("grp4-ud");
+            IEMOP_MNEMONIC(grp4_ud, "grp4-ud");
             return IEMOP_RAISE_INVALID_OPCODE();
     }
 }
@@ -17719,7 +11394,7 @@ FNIEMOP_DEF(iemOp_Grp4)
  */
 FNIEMOP_DEF_1(iemOp_Grp5_calln_Ev, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("calln Ev");
+    IEMOP_MNEMONIC(calln_Ev, "calln Ev");
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
 
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
@@ -17845,6 +11520,7 @@ FNIEMOP_DEF_2(iemOpHlp_Grp5_far_Ep, uint8_t, bRm, FNIEMCIMPLFARBRANCH *, pfnCImp
                 return VINF_SUCCESS;
             }
             /* AMD falls thru. */
+            /* fall thru */
 
         case IEMMODE_32BIT:
             IEM_MC_BEGIN(3, 1);
@@ -17871,7 +11547,7 @@ FNIEMOP_DEF_2(iemOpHlp_Grp5_far_Ep, uint8_t, bRm, FNIEMCIMPLFARBRANCH *, pfnCImp
  */
 FNIEMOP_DEF_1(iemOp_Grp5_callf_Ep, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("callf Ep");
+    IEMOP_MNEMONIC(callf_Ep, "callf Ep");
     return FNIEMOP_CALL_2(iemOpHlp_Grp5_far_Ep, bRm, iemCImpl_callf);
 }
 
@@ -17882,7 +11558,7 @@ FNIEMOP_DEF_1(iemOp_Grp5_callf_Ep, uint8_t, bRm)
  */
 FNIEMOP_DEF_1(iemOp_Grp5_jmpn_Ev, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("jmpn Ev");
+    IEMOP_MNEMONIC(jmpn_Ev, "jmpn Ev");
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
 
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
@@ -17968,7 +11644,7 @@ FNIEMOP_DEF_1(iemOp_Grp5_jmpn_Ev, uint8_t, bRm)
  */
 FNIEMOP_DEF_1(iemOp_Grp5_jmpf_Ep, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("jmpf Ep");
+    IEMOP_MNEMONIC(jmpf_Ep, "jmpf Ep");
     return FNIEMOP_CALL_2(iemOpHlp_Grp5_far_Ep, bRm, iemCImpl_FarJmp);
 }
 
@@ -17979,7 +11655,7 @@ FNIEMOP_DEF_1(iemOp_Grp5_jmpf_Ep, uint8_t, bRm)
  */
 FNIEMOP_DEF_1(iemOp_Grp5_push_Ev, uint8_t, bRm)
 {
-    IEMOP_MNEMONIC("push Ev");
+    IEMOP_MNEMONIC(push_Ev, "push Ev");
 
     /* Registers are handled by a common worker. */
     if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
@@ -18030,17 +11706,19 @@ FNIEMOP_DEF_1(iemOp_Grp5_push_Ev, uint8_t, bRm)
 }
 
 
-/** Opcode 0xff. */
+/**
+ * @opcode      0xff
+ */
 FNIEMOP_DEF(iemOp_Grp5)
 {
     uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
     switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
     {
         case 0:
-            IEMOP_MNEMONIC("inc Ev");
+            IEMOP_MNEMONIC(inc_Ev, "inc Ev");
             return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_inc);
         case 1:
-            IEMOP_MNEMONIC("dec Ev");
+            IEMOP_MNEMONIC(dec_Ev, "dec Ev");
             return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_dec);
         case 2:
             return FNIEMOP_CALL_1(iemOp_Grp5_calln_Ev, bRm);
@@ -18053,7 +11731,7 @@ FNIEMOP_DEF(iemOp_Grp5)
         case 6:
             return FNIEMOP_CALL_1(iemOp_Grp5_push_Ev, bRm);
         case 7:
-            IEMOP_MNEMONIC("grp5-ud");
+            IEMOP_MNEMONIC(grp5_ud, "grp5-ud");
             return IEMOP_RAISE_INVALID_OPCODE();
     }
     AssertFailedReturn(VERR_IEM_IPE_3);
@@ -18087,7 +11765,7 @@ const PFNIEMOP g_apfnOneByteMap[256] =
     /* 0x54 */  iemOp_push_eSP,         iemOp_push_eBP,         iemOp_push_eSI,         iemOp_push_eDI,
     /* 0x58 */  iemOp_pop_eAX,          iemOp_pop_eCX,          iemOp_pop_eDX,          iemOp_pop_eBX,
     /* 0x5c */  iemOp_pop_eSP,          iemOp_pop_eBP,          iemOp_pop_eSI,          iemOp_pop_eDI,
-    /* 0x60 */  iemOp_pusha,            iemOp_popa,             iemOp_bound_Gv_Ma_evex, iemOp_arpl_Ew_Gw_movsx_Gv_Ev,
+    /* 0x60 */  iemOp_pusha,            iemOp_popa__mvex,       iemOp_bound_Gv_Ma__evex, iemOp_arpl_Ew_Gw_movsx_Gv_Ev,
     /* 0x64 */  iemOp_seg_FS,           iemOp_seg_GS,           iemOp_op_size,          iemOp_addr_size,
     /* 0x68 */  iemOp_push_Iz,          iemOp_imul_Gv_Ev_Iz,    iemOp_push_Ib,          iemOp_imul_Gv_Ev_Ib,
     /* 0x6c */  iemOp_insb_Yb_DX,       iemOp_inswd_Yv_DX,      iemOp_outsb_Yb_DX,      iemOp_outswd_Yv_DX,
@@ -18098,12 +11776,12 @@ const PFNIEMOP g_apfnOneByteMap[256] =
     /* 0x80 */  iemOp_Grp1_Eb_Ib_80,    iemOp_Grp1_Ev_Iz,       iemOp_Grp1_Eb_Ib_82,    iemOp_Grp1_Ev_Ib,
     /* 0x84 */  iemOp_test_Eb_Gb,       iemOp_test_Ev_Gv,       iemOp_xchg_Eb_Gb,       iemOp_xchg_Ev_Gv,
     /* 0x88 */  iemOp_mov_Eb_Gb,        iemOp_mov_Ev_Gv,        iemOp_mov_Gb_Eb,        iemOp_mov_Gv_Ev,
-    /* 0x8c */  iemOp_mov_Ev_Sw,        iemOp_lea_Gv_M,         iemOp_mov_Sw_Ev,        iemOp_Grp1A,
+    /* 0x8c */  iemOp_mov_Ev_Sw,        iemOp_lea_Gv_M,         iemOp_mov_Sw_Ev,        iemOp_Grp1A__xop,
     /* 0x90 */  iemOp_nop,              iemOp_xchg_eCX_eAX,     iemOp_xchg_eDX_eAX,     iemOp_xchg_eBX_eAX,
     /* 0x94 */  iemOp_xchg_eSP_eAX,     iemOp_xchg_eBP_eAX,     iemOp_xchg_eSI_eAX,     iemOp_xchg_eDI_eAX,
     /* 0x98 */  iemOp_cbw,              iemOp_cwd,              iemOp_call_Ap,          iemOp_wait,
     /* 0x9c */  iemOp_pushf_Fv,         iemOp_popf_Fv,          iemOp_sahf,             iemOp_lahf,
-    /* 0xa0 */  iemOp_mov_Al_Ob,        iemOp_mov_rAX_Ov,       iemOp_mov_Ob_AL,        iemOp_mov_Ov_rAX,
+    /* 0xa0 */  iemOp_mov_AL_Ob,        iemOp_mov_rAX_Ov,       iemOp_mov_Ob_AL,        iemOp_mov_Ov_rAX,
     /* 0xa4 */  iemOp_movsb_Xb_Yb,      iemOp_movswd_Xv_Yv,     iemOp_cmpsb_Xb_Yb,      iemOp_cmpswd_Xv_Yv,
     /* 0xa8 */  iemOp_test_AL_Ib,       iemOp_test_eAX_Iz,      iemOp_stosb_Yb_AL,      iemOp_stoswd_Yv_eAX,
     /* 0xac */  iemOp_lodsb_AL_Xb,      iemOp_lodswd_eAX_Xv,    iemOp_scasb_AL_Xb,      iemOp_scaswd_eAX_Xv,
@@ -18112,9 +11790,9 @@ const PFNIEMOP g_apfnOneByteMap[256] =
     /* 0xb8 */  iemOp_eAX_Iv,           iemOp_eCX_Iv,           iemOp_eDX_Iv,           iemOp_eBX_Iv,
     /* 0xbc */  iemOp_eSP_Iv,           iemOp_eBP_Iv,           iemOp_eSI_Iv,           iemOp_eDI_Iv,
     /* 0xc0 */  iemOp_Grp2_Eb_Ib,       iemOp_Grp2_Ev_Ib,       iemOp_retn_Iw,          iemOp_retn,
-    /* 0xc4 */  iemOp_les_Gv_Mp_vex2,   iemOp_lds_Gv_Mp_vex3,   iemOp_Grp11_Eb_Ib,      iemOp_Grp11_Ev_Iz,
+    /* 0xc4 */  iemOp_les_Gv_Mp__vex3,  iemOp_lds_Gv_Mp__vex2,  iemOp_Grp11_Eb_Ib,      iemOp_Grp11_Ev_Iz,
     /* 0xc8 */  iemOp_enter_Iw_Ib,      iemOp_leave,            iemOp_retf_Iw,          iemOp_retf,
-    /* 0xcc */  iemOp_int_3,            iemOp_int_Ib,           iemOp_into,             iemOp_iret,
+    /* 0xcc */  iemOp_int3,             iemOp_int_Ib,           iemOp_into,             iemOp_iret,
     /* 0xd0 */  iemOp_Grp2_Eb_1,        iemOp_Grp2_Ev_1,        iemOp_Grp2_Eb_CL,       iemOp_Grp2_Ev_CL,
     /* 0xd4 */  iemOp_aam_Ib,           iemOp_aad_Ib,           iemOp_salc,             iemOp_xlat,
     /* 0xd8 */  iemOp_EscF0,            iemOp_EscF1,            iemOp_EscF2,            iemOp_EscF3,
@@ -18123,7 +11801,7 @@ const PFNIEMOP g_apfnOneByteMap[256] =
     /* 0xe4 */  iemOp_in_AL_Ib,         iemOp_in_eAX_Ib,        iemOp_out_Ib_AL,        iemOp_out_Ib_eAX,
     /* 0xe8 */  iemOp_call_Jv,          iemOp_jmp_Jv,           iemOp_jmp_Ap,           iemOp_jmp_Jb,
     /* 0xec */  iemOp_in_AL_DX,         iemOp_eAX_DX,           iemOp_out_DX_AL,        iemOp_out_DX_eAX,
-    /* 0xf0 */  iemOp_lock,             iemOp_int_1,            iemOp_repne,            iemOp_repe,
+    /* 0xf0 */  iemOp_lock,             iemOp_int1,             iemOp_repne,            iemOp_repe,
     /* 0xf4 */  iemOp_hlt,              iemOp_cmc,              iemOp_Grp3_Eb,          iemOp_Grp3_Ev,
     /* 0xf8 */  iemOp_clc,              iemOp_stc,              iemOp_cli,              iemOp_sti,
     /* 0xfc */  iemOp_cld,              iemOp_std,              iemOp_Grp4,             iemOp_Grp5,
@@ -18132,6 +11810,3 @@ const PFNIEMOP g_apfnOneByteMap[256] =
 
 /** @} */
 
-#ifdef _MSC_VER
-# pragma warning(pop)
-#endif
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py b/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py
new file mode 100755
index 0000000..6908587
--- /dev/null
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py
@@ -0,0 +1,3410 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# $Id: IEMAllInstructionsPython.py $
+
+"""
+IEM instruction extractor.
+
+This script/module parses the IEMAllInstruction*.cpp.h files next to it and
+collects information about the instructions.  It can then be used to generate
+disassembler tables and tests.
+"""
+
+__copyright__ = \
+"""
+Copyright (C) 2017 Oracle Corporation
+
+This file is part of VirtualBox Open Source Edition (OSE), as
+available from http://www.virtualbox.org. This file is free software;
+you can redistribute it and/or modify it under the terms of the GNU
+General Public License (GPL) as published by the Free Software
+Foundation, in version 2 as it comes in the "COPYING" file of the
+VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+
+The contents of this file may alternatively be used under the terms
+of the Common Development and Distribution License Version 1.0
+(CDDL) only, as it comes in the "COPYING.CDDL" file of the
+VirtualBox OSE distribution, in which case the provisions of the
+CDDL are applicable instead of those of the GPL.
+
+You may elect to license modified versions of this file under the
+terms and conditions of either the GPL or the CDDL or both.
+"""
+__version__ = "$Revision: 115000 $"
+
+# pylint: disable=anomalous-backslash-in-string
+
+# Standard python imports.
+import os
+import re
+import sys
+
+## Only the main script needs to modify the path.
+#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
+#                                    'ValidationKit');
+#sys.path.append(g_ksValidationKitDir);
+#
+#from common import utils; - Windows build boxes doesn't have pywin32.
+
+# Python 3 hacks:
+if sys.version_info[0] >= 3:
+    long = int;     # pylint: disable=redefined-builtin,invalid-name
+
+
+g_kdX86EFlagsConstants = {
+    'X86_EFL_CF':          0x00000001, # RT_BIT_32(0)
+    'X86_EFL_1':           0x00000002, # RT_BIT_32(1)
+    'X86_EFL_PF':          0x00000004, # RT_BIT_32(2)
+    'X86_EFL_AF':          0x00000010, # RT_BIT_32(4)
+    'X86_EFL_ZF':          0x00000040, # RT_BIT_32(6)
+    'X86_EFL_SF':          0x00000080, # RT_BIT_32(7)
+    'X86_EFL_TF':          0x00000100, # RT_BIT_32(8)
+    'X86_EFL_IF':          0x00000200, # RT_BIT_32(9)
+    'X86_EFL_DF':          0x00000400, # RT_BIT_32(10)
+    'X86_EFL_OF':          0x00000800, # RT_BIT_32(11)
+    'X86_EFL_IOPL':        0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
+    'X86_EFL_NT':          0x00004000, # RT_BIT_32(14)
+    'X86_EFL_RF':          0x00010000, # RT_BIT_32(16)
+    'X86_EFL_VM':          0x00020000, # RT_BIT_32(17)
+    'X86_EFL_AC':          0x00040000, # RT_BIT_32(18)
+    'X86_EFL_VIF':         0x00080000, # RT_BIT_32(19)
+    'X86_EFL_VIP':         0x00100000, # RT_BIT_32(20)
+    'X86_EFL_ID':          0x00200000, # RT_BIT_32(21)
+    'X86_EFL_LIVE_MASK':   0x003f7fd5, # UINT32_C(0x003f7fd5)
+    'X86_EFL_RA1_MASK':    0x00000002, # RT_BIT_32(1)
+};
+
+## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
+g_kdEFlagsMnemonics = {
+    # Debugger flag notation (sorted by value):
+    'cf':   'X86_EFL_CF',   ##< Carry Flag.
+    'nc':  '!X86_EFL_CF',   ##< No Carry.
+
+    'po':   'X86_EFL_PF',   ##< Parity Pdd.
+    'pe':  '!X86_EFL_PF',   ##< Parity Even.
+
+    'af':   'X86_EFL_AF',   ##< Aux Flag.
+    'na':  '!X86_EFL_AF',   ##< No Aux.
+
+    'zr':   'X86_EFL_ZF',   ##< ZeRo.
+    'nz':  '!X86_EFL_ZF',   ##< No Zero.
+
+    'ng':   'X86_EFL_SF',   ##< NeGative (sign).
+    'pl':  '!X86_EFL_SF',   ##< PLuss (sign).
+
+    'tf':   'X86_EFL_TF',   ##< Trap flag.
+
+    'ei':   'X86_EFL_IF',   ##< Enabled Interrupts.
+    'di':  '!X86_EFL_IF',   ##< Disabled Interrupts.
+
+    'dn':   'X86_EFL_DF',   ##< DowN (string op direction).
+    'up':  '!X86_EFL_DF',   ##< UP (string op direction).
+
+    'ov':   'X86_EFL_OF',   ##< OVerflow.
+    'nv':  '!X86_EFL_OF',   ##< No Overflow.
+
+    'nt':   'X86_EFL_NT',   ##< Nested Task.
+    'rf':   'X86_EFL_RF',   ##< Resume Flag.
+    'vm':   'X86_EFL_VM',   ##< Virtual-8086 Mode.
+    'ac':   'X86_EFL_AC',   ##< Alignment Check.
+    'vif':  'X86_EFL_VIF',  ##< Virtual Interrupt Flag.
+    'vip':  'X86_EFL_VIP',  ##< Virtual Interrupt Pending.
+
+    # Reference manual notation not covered above (sorted by value):
+    'pf':   'X86_EFL_PF',
+    'zf':   'X86_EFL_ZF',
+    'sf':   'X86_EFL_SF',
+    'if':   'X86_EFL_IF',
+    'df':   'X86_EFL_DF',
+    'of':   'X86_EFL_OF',
+    'iopl': 'X86_EFL_IOPL',
+    'id':   'X86_EFL_ID',
+};
+
+## Constants and values for CR0.
+g_kdX86Cr0Constants = {
+    'X86_CR0_PE':           0x00000001, # RT_BIT_32(0)
+    'X86_CR0_MP':           0x00000002, # RT_BIT_32(1)
+    'X86_CR0_EM':           0x00000004, # RT_BIT_32(2)
+    'X86_CR0_TS':           0x00000008, # RT_BIT_32(3)
+    'X86_CR0_ET':           0x00000010, # RT_BIT_32(4)
+    'X86_CR0_NE':           0x00000020, # RT_BIT_32(5)
+    'X86_CR0_WP':           0x00010000, # RT_BIT_32(16)
+    'X86_CR0_AM':           0x00040000, # RT_BIT_32(18)
+    'X86_CR0_NW':           0x20000000, # RT_BIT_32(29)
+    'X86_CR0_CD':           0x40000000, # RT_BIT_32(30)
+    'X86_CR0_PG':           0x80000000, # RT_BIT_32(31)
+};
+
+## Constants and values for CR4.
+g_kdX86Cr4Constants = {
+    'X86_CR4_VME':          0x00000001, # RT_BIT_32(0)
+    'X86_CR4_PVI':          0x00000002, # RT_BIT_32(1)
+    'X86_CR4_TSD':          0x00000004, # RT_BIT_32(2)
+    'X86_CR4_DE':           0x00000008, # RT_BIT_32(3)
+    'X86_CR4_PSE':          0x00000010, # RT_BIT_32(4)
+    'X86_CR4_PAE':          0x00000020, # RT_BIT_32(5)
+    'X86_CR4_MCE':          0x00000040, # RT_BIT_32(6)
+    'X86_CR4_PGE':          0x00000080, # RT_BIT_32(7)
+    'X86_CR4_PCE':          0x00000100, # RT_BIT_32(8)
+    'X86_CR4_OSFXSR':       0x00000200, # RT_BIT_32(9)
+    'X86_CR4_OSXMMEEXCPT':  0x00000400, # RT_BIT_32(10)
+    'X86_CR4_VMXE':         0x00002000, # RT_BIT_32(13)
+    'X86_CR4_SMXE':         0x00004000, # RT_BIT_32(14)
+    'X86_CR4_PCIDE':        0x00020000, # RT_BIT_32(17)
+    'X86_CR4_OSXSAVE':      0x00040000, # RT_BIT_32(18)
+    'X86_CR4_SMEP':         0x00100000, # RT_BIT_32(20)
+    'X86_CR4_SMAP':         0x00200000, # RT_BIT_32(21)
+    'X86_CR4_PKE':          0x00400000, # RT_BIT_32(22)
+};
+
+## XSAVE components (XCR0).
+g_kdX86XSaveCConstants = {
+    'XSAVE_C_X87':          0x00000001,
+    'XSAVE_C_SSE':          0x00000002,
+    'XSAVE_C_YMM':          0x00000004,
+    'XSAVE_C_BNDREGS':      0x00000008,
+    'XSAVE_C_BNDCSR':       0x00000010,
+    'XSAVE_C_OPMASK':       0x00000020,
+    'XSAVE_C_ZMM_HI256':    0x00000040,
+    'XSAVE_C_ZMM_16HI':     0x00000080,
+    'XSAVE_C_PKRU':         0x00000200,
+    'XSAVE_C_LWP':          0x4000000000000000,
+    'XSAVE_C_X':            0x8000000000000000,
+    'XSAVE_C_ALL_AVX':      0x000000c4, # For clearing all AVX bits.
+    'XSAVE_C_ALL_AVX_SSE':  0x000000c6, # For clearing all AVX and SSE bits.
+};
+
+
+## \@op[1-4] locations
+g_kdOpLocations = {
+    'reg':      [], ## modrm.reg
+    'rm':       [], ## modrm.rm
+    'imm':      [], ## immediate instruction data
+    'vvvv':     [], ## VEX.vvvv
+
+    # fixed registers.
+    'AL':       [],
+    'rAX':      [],
+    'rSI':      [],
+    'rDI':      [],
+    'rFLAGS':   [],
+    'CS':       [],
+    'DS':       [],
+    'ES':       [],
+    'FS':       [],
+    'GS':       [],
+    'SS':       [],
+};
+
+## \@op[1-4] types
+##
+## Value fields:
+##    - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
+##    - 1: the location (g_kdOpLocations).
+##    - 2: disassembler format string version of the type.
+##    - 3: disassembler OP_PARAM_XXX (XXX only).
+##
+## Note! See the A.2.1 in SDM vol 2 for the type names.
+g_kdOpTypes = {
+    # Fixed addresses
+    'Ap':   ( 'IDX_ParseImmAddrF',  'imm',    '%Ap',  'Ap',      ),
+
+    # ModR/M.rm
+    'Eb':   ( 'IDX_UseModRM',       'rm',     '%Eb',  'Eb',      ),
+    'Ew':   ( 'IDX_UseModRM',       'rm',     '%Ew',  'Ew',      ),
+    'Ev':   ( 'IDX_UseModRM',       'rm',     '%Ev',  'Ev',      ),
+    'Wss':  ( 'IDX_UseModRM',       'rm',     '%Wss', 'Wss',     ),
+    'Wsd':  ( 'IDX_UseModRM',       'rm',     '%Wsd', 'Wsd',     ),
+    'Wps':  ( 'IDX_UseModRM',       'rm',     '%Wps', 'Wps',     ),
+    'Wpd':  ( 'IDX_UseModRM',       'rm',     '%Wpd', 'Wpd',     ),
+    'Wdq':  ( 'IDX_UseModRM',       'rm',     '%Wdq', 'Wdq',     ),
+    'WqZxReg': ( 'IDX_UseModRM',    'rm',     '%Wq',  'Wq',      ),
+
+    # ModR/M.rm - register only.
+    'Uq':   ( 'IDX_UseModRM',       'rm',     '%Uq',  'Uq',      ),
+    'UqHi': ( 'IDX_UseModRM',       'rm',     '%Uq',  'UqHi',    ),
+
+    # ModR/M.rm - memory only.
+    'Ma':   ( 'IDX_UseModRM',       'rm',     '%Ma',  'Ma',      ), ##< Only used by BOUND.
+    'MbRO': ( 'IDX_UseModRM',       'rm',     '%Mb',  'Mb',      ),
+    'MdRO': ( 'IDX_UseModRM',       'rm',     '%Md',  'Md',      ),
+    'MdWO': ( 'IDX_UseModRM',       'rm',     '%Md',  'Md',      ),
+    'Mq':   ( 'IDX_UseModRM',       'rm',     '%Mq',  'Mq',      ),
+    'MRO':  ( 'IDX_UseModRM',       'rm',     '%M',   'M',      ),
+    'MRW':  ( 'IDX_UseModRM',       'rm',     '%M',   'M',      ),
+
+    # ModR/M.reg
+    'Gb':   ( 'IDX_UseModRM',       'reg',    '%Gb',  'Gb',      ),
+    'Gw':   ( 'IDX_UseModRM',       'reg',    '%Gw',  'Gw',      ),
+    'Gv':   ( 'IDX_UseModRM',       'reg',    '%Gv',  'Gv',      ),
+    'Vss':  ( 'IDX_UseModRM',       'reg',    '%Vss', 'Vss',     ),
+    'VssZxReg': ( 'IDX_UseModRM',   'reg',    '%Vss', 'Vss',     ),
+    'Vsd':  ( 'IDX_UseModRM',       'reg',    '%Vsd', 'Vsd',     ),
+    'Vps':  ( 'IDX_UseModRM',       'reg',    '%Vps', 'Vps',     ),
+    'Vpd':  ( 'IDX_UseModRM',       'reg',    '%Vpd', 'Vpd',     ),
+    'Vq':   ( 'IDX_UseModRM',       'reg',    '%Vq',  'Vq',      ),
+    'Vdq':  ( 'IDX_UseModRM',       'reg',    '%Vpd', 'Vpd',     ),
+
+    # Immediate values.
+    'Ib':   ( 'IDX_ParseImmByte',   'imm',    '%Ib',  'Ib',      ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
+    'Iw':   ( 'IDX_ParseImmUshort', 'imm',    '%Iw',  'Iw',      ),
+    'Id':   ( 'IDX_ParseImmUlong',  'imm',    '%Id',  'Id',      ),
+    'Iq':   ( 'IDX_ParseImmQword',  'imm',    '%Iq',  'Iq',      ),
+    'Iv':   ( 'IDX_ParseImmV',      'imm',    '%Iv',  'Iv',      ), ##< o16: word, o32: dword, o64: qword
+    'Iz':   ( 'IDX_ParseImmZ',      'imm',    '%Iz',  'Iz',      ), ##< o16: word, o32|o64:dword
+
+    # Address operands (no ModR/M).
+    'Ob':   ( 'IDX_ParseImmAddr',   'imm',    '%Ob',  'Ob',      ),
+    'Ov':   ( 'IDX_ParseImmAddr',   'imm',    '%Ov',  'Ov',      ),
+
+    # Relative jump targets
+    'Jb':   ( 'IDX_ParseImmBRel',   'imm',    '%Jb',  'Jb',      ),
+    'Jv':   ( 'IDX_ParseImmVRel',   'imm',    '%Jv',  'Jv',      ),
+
+    # DS:rSI
+    'Xb':   ( 'IDX_ParseXb',        'rSI',    '%eSI', 'Xb',      ),
+    'Xv':   ( 'IDX_ParseXv',        'rSI',    '%eSI', 'Xv',      ),
+    # ES:rDI
+    'Yb':   ( 'IDX_ParseYb',        'rDI',    '%eDI', 'Yb',      ),
+    'Yv':   ( 'IDX_ParseYv',        'rDI',    '%eDI', 'Yv',      ),
+
+    'Fv':   ( 'IDX_ParseFixedReg',  'rFLAGS', '%Fv',  'Fv',      ),
+
+    # Fixed registers.
+    'AL':   ( 'IDX_ParseFixedReg',  'AL',     'al',   'REG_AL',  ),
+    'rAX':  ( 'IDX_ParseFixedReg',  'rAX',    '%eAX', 'REG_EAX', ),
+    'CS':   ( 'IDX_ParseFixedReg',  'CS',     'cs',   'REG_CS',  ), # 8086: push CS
+    'DS':   ( 'IDX_ParseFixedReg',  'DS',     'ds',   'REG_DS',  ),
+    'ES':   ( 'IDX_ParseFixedReg',  'ES',     'es',   'REG_ES',  ),
+    'FS':   ( 'IDX_ParseFixedReg',  'FS',     'fs',   'REG_FS',  ),
+    'GS':   ( 'IDX_ParseFixedReg',  'GS',     'gs',   'REG_GS',  ),
+    'SS':   ( 'IDX_ParseFixedReg',  'SS',     'ss',   'REG_SS',  ),
+};
+
+# IDX_ParseFixedReg
+# IDX_ParseVexDest
+
+
+## IEMFORM_XXX mappings.
+g_kdIemForms = {     # sEncoding,   [ sWhere1, ... ]
+    'RM':           ( 'ModR/M',     [ 'reg', 'rm' ], ),
+    'RM_REG':       ( 'ModR/M',     [ 'reg', 'rm' ], ),
+    'RM_MEM':       ( 'ModR/M',     [ 'reg', 'rm' ], ),
+    'MR':           ( 'ModR/M',     [ 'rm', 'reg' ], ),
+    'MR_REG':       ( 'ModR/M',     [ 'rm', 'reg' ], ),
+    'MR_MEM':       ( 'ModR/M',     [ 'rm', 'reg' ], ),
+    'M':            ( 'ModR/M',     [ 'rm', ], ),
+    'M_REG':        ( 'ModR/M',     [ 'rm', ], ),
+    'M_MEM':        ( 'ModR/M',     [ 'rm', ], ),
+    'R':            ( 'ModR/M',     [ 'reg', ], ),
+
+    'VEX_RM':       ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
+    'VEX_RM_REG':   ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
+    'VEX_RM_MEM':   ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
+    'VEX_MR':       ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
+    'VEX_MR_REG':   ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
+    'VEX_MR_MEM':   ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
+    'VEX_M':        ( 'VEX.ModR/M', [ 'rm', ], ),
+    'VEX_M_REG':    ( 'VEX.ModR/M', [ 'rm', ], ),
+    'VEX_M_MEM':    ( 'VEX.ModR/M', [ 'rm', ], ),
+    'VEX_R':        ( 'VEX.ModR/M', [ 'reg', ], ),
+    'VEX_RVM':      ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
+    'VEX_MVR':      ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
+
+    'FIXED':        ( 'fixed',      None, )
+};
+
+## \@oppfx values.
+g_kdPrefixes = {
+    'none': [],
+    '0x66': [],
+    '0xf3': [],
+    '0xf2': [],
+};
+
+## Special \@opcode tag values.
+g_kdSpecialOpcodes = {
+    '/reg':         [],
+    'mr/reg':       [],
+    '11 /reg':      [],
+    '!11 /reg':     [],
+    '11 mr/reg':    [],
+    '!11 mr/reg':   [],
+};
+
+## Special \@opcodesub tag values.
+g_kdSubOpcodes = {
+    'none':         [ None,         ],
+    '11 mr/reg':    [ '11 mr/reg',  ],
+    '11':           [ '11 mr/reg',  ],      ##< alias
+    '!11 mr/reg':   [ '!11 mr/reg', ],
+    '!11':          [ '!11 mr/reg', ],      ##< alias
+};
+
+## Valid values for \@openc
+g_kdEncodings = {
+    'ModR/M':       [ 'BS3CG1ENC_MODRM', ],     ##< ModR/M
+    'VEX.ModR/M':   [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
+    'fixed':        [ 'BS3CG1ENC_FIXED', ],     ##< Fixed encoding (address, registers, etc).
+    'prefix':       [ None, ],                  ##< Prefix
+};
+
+## \@opunused, \@opinvalid, \@opinvlstyle
+g_kdInvalidStyles = {
+    'immediate':                [], ##< CPU stops decoding immediately after the opcode.
+    'intel-modrm':              [], ##< Intel decodes ModR/M.
+    'intel-modrm-imm8':         [], ##< Intel decodes ModR/M and an 8-byte immediate.
+    'intel-opcode-modrm':       [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
+    'intel-opcode-modrm-imm8':  [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
+};
+
+g_kdCpuNames = {
+    '8086':     (),
+    '80186':    (),
+    '80286':    (),
+    '80386':    (),
+    '80486':    (),
+};
+
+## \@opcpuid
+g_kdCpuIdFlags = {
+    'vme':          'X86_CPUID_FEATURE_EDX_VME',
+    'tsc':          'X86_CPUID_FEATURE_EDX_TSC',
+    'msr':          'X86_CPUID_FEATURE_EDX_MSR',
+    'cx8':          'X86_CPUID_FEATURE_EDX_CX8',
+    'sep':          'X86_CPUID_FEATURE_EDX_SEP',
+    'cmov':         'X86_CPUID_FEATURE_EDX_CMOV',
+    'clfsh':        'X86_CPUID_FEATURE_EDX_CLFSH',
+    'clflushopt':   'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
+    'mmx':          'X86_CPUID_FEATURE_EDX_MMX',
+    'fxsr':         'X86_CPUID_FEATURE_EDX_FXSR',
+    'sse':          'X86_CPUID_FEATURE_EDX_SSE',
+    'sse2':         'X86_CPUID_FEATURE_EDX_SSE2',
+    'sse3':         'X86_CPUID_FEATURE_ECX_SSE3',
+    'pclmul':       'X86_CPUID_FEATURE_ECX_DTES64',
+    'monitor':      'X86_CPUID_FEATURE_ECX_CPLDS',
+    'vmx':          'X86_CPUID_FEATURE_ECX_VMX',
+    'smx':          'X86_CPUID_FEATURE_ECX_TM2',
+    'ssse3':        'X86_CPUID_FEATURE_ECX_SSSE3',
+    'fma':          'X86_CPUID_FEATURE_ECX_FMA',
+    'cx16':         'X86_CPUID_FEATURE_ECX_CX16',
+    'pcid':         'X86_CPUID_FEATURE_ECX_PCID',
+    'sse41':        'X86_CPUID_FEATURE_ECX_SSE4_1',
+    'sse42':        'X86_CPUID_FEATURE_ECX_SSE4_2',
+    'movbe':        'X86_CPUID_FEATURE_ECX_MOVBE',
+    'popcnt':       'X86_CPUID_FEATURE_ECX_POPCNT',
+    'aes':          'X86_CPUID_FEATURE_ECX_AES',
+    'xsave':        'X86_CPUID_FEATURE_ECX_XSAVE',
+    'avx':          'X86_CPUID_FEATURE_ECX_AVX',
+    'f16c':         'X86_CPUID_FEATURE_ECX_F16C',
+    'rdrand':       'X86_CPUID_FEATURE_ECX_RDRAND',
+
+    'axmmx':        'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
+    '3dnowext':     'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
+    '3dnow':        'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
+    'svm':          'X86_CPUID_AMD_FEATURE_ECX_SVM',
+    'cr8l':         'X86_CPUID_AMD_FEATURE_ECX_CR8L',
+    'abm':          'X86_CPUID_AMD_FEATURE_ECX_ABM',
+    'sse4a':        'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
+    '3dnowprf':     'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
+    'xop':          'X86_CPUID_AMD_FEATURE_ECX_XOP',
+    'fma4':         'X86_CPUID_AMD_FEATURE_ECX_FMA4',
+};
+
+## \@ophints values.
+g_kdHints = {
+    'invalid':               'DISOPTYPE_INVALID',               ##<
+    'harmless':              'DISOPTYPE_HARMLESS',              ##<
+    'controlflow':           'DISOPTYPE_CONTROLFLOW',           ##<
+    'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
+    'dangerous':             'DISOPTYPE_DANGEROUS',             ##<
+    'portio':                'DISOPTYPE_PORTIO',                ##<
+    'privileged':            'DISOPTYPE_PRIVILEGED',            ##<
+    'privileged_notrap':     'DISOPTYPE_PRIVILEGED_NOTRAP',     ##<
+    'uncond_controlflow':    'DISOPTYPE_UNCOND_CONTROLFLOW',    ##<
+    'relative_controlflow':  'DISOPTYPE_RELATIVE_CONTROLFLOW',  ##<
+    'cond_controlflow':      'DISOPTYPE_COND_CONTROLFLOW',      ##<
+    'interrupt':             'DISOPTYPE_INTERRUPT',             ##<
+    'illegal':               'DISOPTYPE_ILLEGAL',               ##<
+    'rrm_dangerous':         'DISOPTYPE_RRM_DANGEROUS',         ##< Some additional dangerous ones when recompiling raw r0.
+    'rrm_dangerous_16':      'DISOPTYPE_RRM_DANGEROUS_16',      ##< Some additional dangerous ones when recompiling 16-bit raw r0.
+    'inhibit_irqs':          'DISOPTYPE_INHIBIT_IRQS',          ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
+    'portio_read':           'DISOPTYPE_PORTIO_READ',           ##<
+    'portio_write':          'DISOPTYPE_PORTIO_WRITE',          ##<
+    'invalid_64':            'DISOPTYPE_INVALID_64',            ##< Invalid in 64 bits mode
+    'only_64':               'DISOPTYPE_ONLY_64',               ##< Only valid in 64 bits mode
+    'default_64_op_size':    'DISOPTYPE_DEFAULT_64_OP_SIZE',    ##< Default 64 bits operand size
+    'forced_64_op_size':     'DISOPTYPE_FORCED_64_OP_SIZE',     ##< Forced 64 bits operand size; regardless of prefix bytes
+    'rexb_extends_opreg':    'DISOPTYPE_REXB_EXTENDS_OPREG',    ##< REX.B extends the register field in the opcode byte
+    'mod_fixed_11':          'DISOPTYPE_MOD_FIXED_11',          ##< modrm.mod is always 11b
+    'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
+                                                                ##  (only in 16 & 32 bits mode!)
+    'sse':                   'DISOPTYPE_SSE',                   ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
+    'mmx':                   'DISOPTYPE_MMX',                   ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
+    'fpu':                   'DISOPTYPE_FPU',                   ##< FPU instruction. Not implemented yet!
+    'ignores_op_size':       '',                                ##< Ignores both operand size prefixes.
+    'lock_allowed':          '',                                ##< Lock prefix allowed.
+};
+
+## \@opxcpttype values (see SDMv2 2.4, 2.7).
+g_kdXcptTypes = {
+    'none':     [],
+    '1':        [],
+    '2':        [],
+    '3':        [],
+    '4':        [],
+    '4UA':      [],
+    '5':        [],
+    '6':        [],
+    '7':        [],
+    '8':        [],
+    '11':       [],
+    '12':       [],
+    'E1':       [],
+    'E1NF':     [],
+    'E2':       [],
+    'E3':       [],
+    'E3NF':     [],
+    'E4':       [],
+    'E4NF':     [],
+    'E5':       [],
+    'E5NF':     [],
+    'E6':       [],
+    'E6NF':     [],
+    'E7NF':     [],
+    'E9':       [],
+    'E9NF':     [],
+    'E10':      [],
+    'E11':      [],
+    'E12':      [],
+    'E12NF':    [],
+};
+
+
+def _isValidOpcodeByte(sOpcode):
+    """
+    Checks if sOpcode is a valid lower case opcode byte.
+    Returns true/false.
+    """
+    if len(sOpcode) == 4:
+        if sOpcode[:2] == '0x':
+            if sOpcode[2] in '0123456789abcdef':
+                if sOpcode[3] in '0123456789abcdef':
+                    return True;
+    return False;
+
+
+class InstructionMap(object):
+    """
+    Instruction map.
+
+    The opcode map provides the lead opcode bytes (empty for the one byte
+    opcode map).  An instruction can be member of multiple opcode maps as long
+    as it uses the same opcode value within the map (because of VEX).
+    """
+
+    kdEncodings = {
+        'legacy':   [],
+        'vex1':     [], ##< VEX or EVEX prefix with vvvvv = 1
+        'vex2':     [], ##< VEX or EVEX prefix with vvvvv = 2
+        'vex3':     [], ##< VEX or EVEX prefix with vvvvv = 3
+        'xop8':     [], ##< XOP prefix with vvvvv = 8
+        'xop9':     [], ##< XOP prefix with vvvvv = 9
+        'xop10':    [], ##< XOP prefix with vvvvv = 10
+    };
+    ## Selectors.
+    ## The first value is the number of table entries required by a
+    ## decoder or disassembler for this type of selector.
+    kdSelectors = {
+        'byte':     [ 256, ], ##< next opcode byte selects the instruction (default).
+        '/r':       [   8, ], ##< modrm.reg selects the instruction.
+        'memreg /r':[  16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
+        'mod /r':   [  32, ], ##< modrm.reg and modrm.mod selects the instruction.
+        '!11 /r':   [   8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
+        '11 /r':    [   8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
+        '11':       [  64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
+    };
+
+    def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
+        assert sSelector in self.kdSelectors;
+        assert sEncoding in self.kdEncodings;
+        if asLeadOpcodes is None:
+            asLeadOpcodes = [];
+        else:
+            for sOpcode in asLeadOpcodes:
+                assert _isValidOpcodeByte(sOpcode);
+        assert sDisParse is None or sDisParse.startswith('IDX_Parse');
+
+        self.sName          = sName;
+        self.asLeadOpcodes  = asLeadOpcodes;    ##< Lead opcode bytes formatted as hex strings like '0x0f'.
+        self.sSelector      = sSelector;        ##< The member selector, see kdSelectors.
+        self.sEncoding      = sEncoding;        ##< The encoding, see kdSelectors.
+        self.aoInstructions = [];               # type: Instruction
+        self.sDisParse      = sDisParse;        ##< IDX_ParseXXX.
+
+    def getTableSize(self):
+        """
+        Number of table entries.   This corresponds directly to the selector.
+        """
+        return self.kdSelectors[self.sSelector][0];
+
+    def getInstructionIndex(self, oInstr):
+        """
+        Returns the table index for the instruction.
+        """
+        bOpcode = oInstr.getOpcodeByte();
+
+        # The byte selector is simple.  We need a full opcode byte and need just return it.
+        if self.sSelector == 'byte':
+            assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
+            return bOpcode;
+
+        # The other selectors needs masking and shifting.
+        if self.sSelector == '/r':
+            return (bOpcode >> 3) & 0x7;
+
+        if self.sSelector == 'mod /r':
+            return (bOpcode >> 3) & 0x1f;
+
+        if self.sSelector == 'memreg /r':
+            return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
+
+        if self.sSelector == '!11 /r':
+            assert (bOpcode & 0xc0) != 0xc, str(oInstr);
+            return (bOpcode >> 3) & 0x7;
+
+        if self.sSelector == '11 /r':
+            assert (bOpcode & 0xc0) == 0xc, str(oInstr);
+            return (bOpcode >> 3) & 0x7;
+
+        if self.sSelector == '11':
+            assert (bOpcode & 0xc0) == 0xc, str(oInstr);
+            return bOpcode & 0x3f;
+
+        assert False, self.sSelector;
+        return -1;
+
+    def getInstructionsInTableOrder(self):
+        """
+        Get instructions in table order.
+
+        Returns array of instructions.  Normally there is exactly one
+        instruction per entry.  However the entry could also be None if
+        not instruction was specified for that opcode value.  Or there
+        could be a list of instructions to deal with special encodings
+        where for instance prefix (e.g. REX.W) encodes a different
+        instruction or different CPUs have different instructions or
+        prefixes in the same place.
+        """
+        # Start with empty table.
+        cTable  = self.getTableSize();
+        aoTable = [None] * cTable;
+
+        # Insert the instructions.
+        for oInstr in self.aoInstructions:
+            if oInstr.sOpcode:
+                idxOpcode = self.getInstructionIndex(oInstr);
+                assert idxOpcode < cTable, str(idxOpcode);
+
+                oExisting = aoTable[idxOpcode];
+                if oExisting is None:
+                    aoTable[idxOpcode] = oInstr;
+                elif not isinstance(oExisting, list):
+                    aoTable[idxOpcode] = list([oExisting, oInstr]);
+                else:
+                    oExisting.append(oInstr);
+
+        return aoTable;
+
+
+    def getDisasTableName(self):
+        """
+        Returns the disassembler table name for this map.
+        """
+        sName = 'g_aDisas';
+        for sWord in self.sName.split('_'):
+            if sWord == 'm':            # suffix indicating modrm.mod==mem
+                sName += '_m';
+            elif sWord == 'r':          # suffix indicating modrm.mod==reg
+                sName += '_r';
+            elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
+                sName += '_' + sWord;
+            else:
+                sWord  = sWord.replace('grp', 'Grp');
+                sWord  = sWord.replace('map', 'Map');
+                sName += sWord[0].upper() + sWord[1:];
+        return sName;
+
+
+class TestType(object):
+    """
+    Test value type.
+
+    This base class deals with integer like values.  The fUnsigned constructor
+    parameter indicates the default stance on zero vs sign extending.  It is
+    possible to override fUnsigned=True by prefixing the value with '+' or '-'.
+    """
+    def __init__(self, sName, acbSizes = None, fUnsigned = True):
+        self.sName = sName;
+        self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes;  # Normal sizes.
+        self.fUnsigned = fUnsigned;
+
+    class BadValue(Exception):
+        """ Bad value exception. """
+        def __init__(self, sMessage):
+            Exception.__init__(self, sMessage);
+            self.sMessage = sMessage;
+
+    ## For ascii ~ operator.
+    kdHexInv = {
+        '0': 'f',
+        '1': 'e',
+        '2': 'd',
+        '3': 'c',
+        '4': 'b',
+        '5': 'a',
+        '6': '9',
+        '7': '8',
+        '8': '7',
+        '9': '6',
+        'a': '5',
+        'b': '4',
+        'c': '3',
+        'd': '2',
+        'e': '1',
+        'f': '0',
+    };
+
+    def get(self, sValue):
+        """
+        Get the shortest normal sized byte representation of oValue.
+
+        Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
+        The latter form is for AND+OR pairs where the first entry is what to
+        AND with the field and the second the one or OR with.
+
+        Raises BadValue if invalid value.
+        """
+        if not sValue:
+            raise TestType.BadValue('empty value');
+
+        # Deal with sign and detect hexadecimal or decimal.
+        fSignExtend = not self.fUnsigned;
+        if sValue[0] == '-' or sValue[0] == '+':
+            fSignExtend = True;
+            fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
+        else:
+            fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
+
+        # try convert it to long integer.
+        try:
+            iValue = long(sValue, 16 if fHex else 10);
+        except Exception as oXcpt:
+            raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
+
+        # Convert the hex string and pad it to a decent value.  Negative values
+        # needs to be manually converted to something non-negative (~-n + 1).
+        if iValue >= 0:
+            sHex = hex(iValue);
+            if sys.version_info[0] < 3:
+                assert sHex[-1] == 'L';
+                sHex = sHex[:-1];
+            assert sHex[:2] == '0x';
+            sHex = sHex[2:];
+        else:
+            sHex = hex(-iValue - 1);
+            if sys.version_info[0] < 3:
+                assert sHex[-1] == 'L';
+                sHex = sHex[:-1];
+            assert sHex[:2] == '0x';
+            sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
+            if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
+                sHex = 'f' + sHex;
+
+        cDigits = len(sHex);
+        if cDigits <= self.acbSizes[-1] * 2:
+            for cb in self.acbSizes:
+                cNaturalDigits = cb * 2;
+                if cDigits <= cNaturalDigits:
+                    break;
+        else:
+            cNaturalDigits = self.acbSizes[-1] * 2;
+            cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
+            assert isinstance(cNaturalDigits, int)
+
+        if cNaturalDigits != cDigits:
+            cNeeded = cNaturalDigits - cDigits;
+            if iValue >= 0:
+                sHex = ('0' * cNeeded) + sHex;
+            else:
+                sHex = ('f' * cNeeded) + sHex;
+
+        # Invert and convert to bytearray and return it.
+        abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
+
+        return ((fSignExtend, abValue),);
+
+    def validate(self, sValue):
+        """
+        Returns True if value is okay, error message on failure.
+        """
+        try:
+            self.get(sValue);
+        except TestType.BadValue as oXcpt:
+            return oXcpt.sMessage;
+        return True;
+
+    def isAndOrPair(self, sValue):
+        """
+        Checks if sValue is a pair.
+        """
+        _ = sValue;
+        return False;
+
+
+class TestTypeEflags(TestType):
+    """
+    Special value parsing for EFLAGS/RFLAGS/FLAGS.
+    """
+
+    kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
+
+    def __init__(self, sName):
+        TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
+
+    def get(self, sValue):
+        fClear = 0;
+        fSet   = 0;
+        for sFlag in sValue.split(','):
+            sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
+            if sConstant is None:
+                raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
+            if sConstant[0] == '!':
+                fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
+            else:
+                fSet   |= g_kdX86EFlagsConstants[sConstant];
+
+        aoSet = TestType.get(self, '0x%x' % (fSet,));
+        if fClear != 0:
+            aoClear = TestType.get(self, '%#x' % (fClear,))
+            assert self.isAndOrPair(sValue) is True;
+            return (aoClear[0], aoSet[0]);
+        assert self.isAndOrPair(sValue) is False;
+        return aoSet;
+
+    def isAndOrPair(self, sValue):
+        for sZeroFlag in self.kdZeroValueFlags:
+            if sValue.find(sZeroFlag) >= 0:
+                return True;
+        return False;
+
+class TestTypeFromDict(TestType):
+    """
+    Special value parsing for CR0.
+    """
+
+    kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
+
+    def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
+        TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
+        self.kdConstantsAndValues = kdConstantsAndValues;
+        self.sConstantPrefix      = sConstantPrefix;
+
+    def get(self, sValue):
+        fValue = 0;
+        for sFlag in sValue.split(','):
+            fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
+            if fFlagValue is None:
+                raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
+            fValue |= fFlagValue;
+        return TestType.get(self, '0x%x' % (fValue,));
+
+
+class TestInOut(object):
+    """
+    One input or output state modifier.
+
+    This should be thought as values to modify BS3REGCTX and extended (needs
+    to be structured) state.
+    """
+    ## Assigned operators.
+    kasOperators = [
+        '&|=',  # Special AND(INV)+OR operator for use with EFLAGS.
+        '&~=',
+        '&=',
+        '|=',
+        '='
+    ];
+    ## Types
+    kdTypes = {
+        'uint':  TestType('uint', fUnsigned = True),
+        'int':   TestType('int'),
+        'efl':   TestTypeEflags('efl'),
+        'cr0':   TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
+        'cr4':   TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
+        'xcr0':  TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
+    };
+    ## CPU context fields.
+    kdFields = {
+        # name:         ( default type, [both|input|output], )
+        # Operands.
+        'op1':          ( 'uint', 'both',   ), ## \@op1
+        'op2':          ( 'uint', 'both',   ), ## \@op2
+        'op3':          ( 'uint', 'both',   ), ## \@op3
+        'op4':          ( 'uint', 'both',   ), ## \@op4
+        # Flags.
+        'efl':          ( 'efl',  'both',   ),
+        'efl_undef':    ( 'uint', 'output', ),
+        # 8-bit GPRs.
+        'al':           ( 'uint', 'both',   ),
+        'cl':           ( 'uint', 'both',   ),
+        'dl':           ( 'uint', 'both',   ),
+        'bl':           ( 'uint', 'both',   ),
+        'ah':           ( 'uint', 'both',   ),
+        'ch':           ( 'uint', 'both',   ),
+        'dh':           ( 'uint', 'both',   ),
+        'bh':           ( 'uint', 'both',   ),
+        'r8l':          ( 'uint', 'both',   ),
+        'r9l':          ( 'uint', 'both',   ),
+        'r10l':         ( 'uint', 'both',   ),
+        'r11l':         ( 'uint', 'both',   ),
+        'r12l':         ( 'uint', 'both',   ),
+        'r13l':         ( 'uint', 'both',   ),
+        'r14l':         ( 'uint', 'both',   ),
+        'r15l':         ( 'uint', 'both',   ),
+        # 16-bit GPRs.
+        'ax':           ( 'uint', 'both',   ),
+        'dx':           ( 'uint', 'both',   ),
+        'cx':           ( 'uint', 'both',   ),
+        'bx':           ( 'uint', 'both',   ),
+        'sp':           ( 'uint', 'both',   ),
+        'bp':           ( 'uint', 'both',   ),
+        'si':           ( 'uint', 'both',   ),
+        'di':           ( 'uint', 'both',   ),
+        'r8w':          ( 'uint', 'both',   ),
+        'r9w':          ( 'uint', 'both',   ),
+        'r10w':         ( 'uint', 'both',   ),
+        'r11w':         ( 'uint', 'both',   ),
+        'r12w':         ( 'uint', 'both',   ),
+        'r13w':         ( 'uint', 'both',   ),
+        'r14w':         ( 'uint', 'both',   ),
+        'r15w':         ( 'uint', 'both',   ),
+        # 32-bit GPRs.
+        'eax':          ( 'uint', 'both',   ),
+        'edx':          ( 'uint', 'both',   ),
+        'ecx':          ( 'uint', 'both',   ),
+        'ebx':          ( 'uint', 'both',   ),
+        'esp':          ( 'uint', 'both',   ),
+        'ebp':          ( 'uint', 'both',   ),
+        'esi':          ( 'uint', 'both',   ),
+        'edi':          ( 'uint', 'both',   ),
+        'r8d':          ( 'uint', 'both',   ),
+        'r9d':          ( 'uint', 'both',   ),
+        'r10d':         ( 'uint', 'both',   ),
+        'r11d':         ( 'uint', 'both',   ),
+        'r12d':         ( 'uint', 'both',   ),
+        'r13d':         ( 'uint', 'both',   ),
+        'r14d':         ( 'uint', 'both',   ),
+        'r15d':         ( 'uint', 'both',   ),
+        # 64-bit GPRs.
+        'rax':          ( 'uint', 'both',   ),
+        'rdx':          ( 'uint', 'both',   ),
+        'rcx':          ( 'uint', 'both',   ),
+        'rbx':          ( 'uint', 'both',   ),
+        'rsp':          ( 'uint', 'both',   ),
+        'rbp':          ( 'uint', 'both',   ),
+        'rsi':          ( 'uint', 'both',   ),
+        'rdi':          ( 'uint', 'both',   ),
+        'r8':           ( 'uint', 'both',   ),
+        'r9':           ( 'uint', 'both',   ),
+        'r10':          ( 'uint', 'both',   ),
+        'r11':          ( 'uint', 'both',   ),
+        'r12':          ( 'uint', 'both',   ),
+        'r13':          ( 'uint', 'both',   ),
+        'r14':          ( 'uint', 'both',   ),
+        'r15':          ( 'uint', 'both',   ),
+        # 16-bit, 32-bit or 64-bit registers according to operand size.
+        'oz.rax':       ( 'uint', 'both',   ),
+        'oz.rdx':       ( 'uint', 'both',   ),
+        'oz.rcx':       ( 'uint', 'both',   ),
+        'oz.rbx':       ( 'uint', 'both',   ),
+        'oz.rsp':       ( 'uint', 'both',   ),
+        'oz.rbp':       ( 'uint', 'both',   ),
+        'oz.rsi':       ( 'uint', 'both',   ),
+        'oz.rdi':       ( 'uint', 'both',   ),
+        'oz.r8':        ( 'uint', 'both',   ),
+        'oz.r9':        ( 'uint', 'both',   ),
+        'oz.r10':       ( 'uint', 'both',   ),
+        'oz.r11':       ( 'uint', 'both',   ),
+        'oz.r12':       ( 'uint', 'both',   ),
+        'oz.r13':       ( 'uint', 'both',   ),
+        'oz.r14':       ( 'uint', 'both',   ),
+        'oz.r15':       ( 'uint', 'both',   ),
+        # Control registers.
+        'cr0':          ( 'cr0',  'both',   ),
+        'cr4':          ( 'cr4',  'both',   ),
+        'xcr0':         ( 'xcr0', 'both',   ),
+        # FPU Registers
+        'fcw':          ( 'uint', 'both',   ),
+        'fsw':          ( 'uint', 'both',   ),
+        'ftw':          ( 'uint', 'both',   ),
+        'fop':          ( 'uint', 'both',   ),
+        'fpuip':        ( 'uint', 'both',   ),
+        'fpucs':        ( 'uint', 'both',   ),
+        'fpudp':        ( 'uint', 'both',   ),
+        'fpuds':        ( 'uint', 'both',   ),
+        'mxcsr':        ( 'uint', 'both',   ),
+        'st0':          ( 'uint', 'both',   ),
+        'st1':          ( 'uint', 'both',   ),
+        'st2':          ( 'uint', 'both',   ),
+        'st3':          ( 'uint', 'both',   ),
+        'st4':          ( 'uint', 'both',   ),
+        'st5':          ( 'uint', 'both',   ),
+        'st6':          ( 'uint', 'both',   ),
+        'st7':          ( 'uint', 'both',   ),
+        # MMX registers.
+        'mm0':          ( 'uint', 'both',   ),
+        'mm1':          ( 'uint', 'both',   ),
+        'mm2':          ( 'uint', 'both',   ),
+        'mm3':          ( 'uint', 'both',   ),
+        'mm4':          ( 'uint', 'both',   ),
+        'mm5':          ( 'uint', 'both',   ),
+        'mm6':          ( 'uint', 'both',   ),
+        'mm7':          ( 'uint', 'both',   ),
+        # SSE registers.
+        'xmm0':         ( 'uint', 'both',   ),
+        'xmm1':         ( 'uint', 'both',   ),
+        'xmm2':         ( 'uint', 'both',   ),
+        'xmm3':         ( 'uint', 'both',   ),
+        'xmm4':         ( 'uint', 'both',   ),
+        'xmm5':         ( 'uint', 'both',   ),
+        'xmm6':         ( 'uint', 'both',   ),
+        'xmm7':         ( 'uint', 'both',   ),
+        'xmm8':         ( 'uint', 'both',   ),
+        'xmm9':         ( 'uint', 'both',   ),
+        'xmm10':        ( 'uint', 'both',   ),
+        'xmm11':        ( 'uint', 'both',   ),
+        'xmm12':        ( 'uint', 'both',   ),
+        'xmm13':        ( 'uint', 'both',   ),
+        'xmm14':        ( 'uint', 'both',   ),
+        'xmm15':        ( 'uint', 'both',   ),
+        'xmm0.lo':      ( 'uint', 'both',   ),
+        'xmm1.lo':      ( 'uint', 'both',   ),
+        'xmm2.lo':      ( 'uint', 'both',   ),
+        'xmm3.lo':      ( 'uint', 'both',   ),
+        'xmm4.lo':      ( 'uint', 'both',   ),
+        'xmm5.lo':      ( 'uint', 'both',   ),
+        'xmm6.lo':      ( 'uint', 'both',   ),
+        'xmm7.lo':      ( 'uint', 'both',   ),
+        'xmm8.lo':      ( 'uint', 'both',   ),
+        'xmm9.lo':      ( 'uint', 'both',   ),
+        'xmm10.lo':     ( 'uint', 'both',   ),
+        'xmm11.lo':     ( 'uint', 'both',   ),
+        'xmm12.lo':     ( 'uint', 'both',   ),
+        'xmm13.lo':     ( 'uint', 'both',   ),
+        'xmm14.lo':     ( 'uint', 'both',   ),
+        'xmm15.lo':     ( 'uint', 'both',   ),
+        'xmm0.hi':      ( 'uint', 'both',   ),
+        'xmm1.hi':      ( 'uint', 'both',   ),
+        'xmm2.hi':      ( 'uint', 'both',   ),
+        'xmm3.hi':      ( 'uint', 'both',   ),
+        'xmm4.hi':      ( 'uint', 'both',   ),
+        'xmm5.hi':      ( 'uint', 'both',   ),
+        'xmm6.hi':      ( 'uint', 'both',   ),
+        'xmm7.hi':      ( 'uint', 'both',   ),
+        'xmm8.hi':      ( 'uint', 'both',   ),
+        'xmm9.hi':      ( 'uint', 'both',   ),
+        'xmm10.hi':     ( 'uint', 'both',   ),
+        'xmm11.hi':     ( 'uint', 'both',   ),
+        'xmm12.hi':     ( 'uint', 'both',   ),
+        'xmm13.hi':     ( 'uint', 'both',   ),
+        'xmm14.hi':     ( 'uint', 'both',   ),
+        'xmm15.hi':     ( 'uint', 'both',   ),
+        'xmm0.lo.zx':   ( 'uint', 'both',   ),
+        'xmm1.lo.zx':   ( 'uint', 'both',   ),
+        'xmm2.lo.zx':   ( 'uint', 'both',   ),
+        'xmm3.lo.zx':   ( 'uint', 'both',   ),
+        'xmm4.lo.zx':   ( 'uint', 'both',   ),
+        'xmm5.lo.zx':   ( 'uint', 'both',   ),
+        'xmm6.lo.zx':   ( 'uint', 'both',   ),
+        'xmm7.lo.zx':   ( 'uint', 'both',   ),
+        'xmm8.lo.zx':   ( 'uint', 'both',   ),
+        'xmm9.lo.zx':   ( 'uint', 'both',   ),
+        'xmm10.lo.zx':  ( 'uint', 'both',   ),
+        'xmm11.lo.zx':  ( 'uint', 'both',   ),
+        'xmm12.lo.zx':  ( 'uint', 'both',   ),
+        'xmm13.lo.zx':  ( 'uint', 'both',   ),
+        'xmm14.lo.zx':  ( 'uint', 'both',   ),
+        'xmm15.lo.zx':  ( 'uint', 'both',   ),
+        'xmm0.dw0':     ( 'uint', 'both',   ),
+        'xmm1.dw0':     ( 'uint', 'both',   ),
+        'xmm2.dw0':     ( 'uint', 'both',   ),
+        'xmm3.dw0':     ( 'uint', 'both',   ),
+        'xmm4.dw0':     ( 'uint', 'both',   ),
+        'xmm5.dw0':     ( 'uint', 'both',   ),
+        'xmm6.dw0':     ( 'uint', 'both',   ),
+        'xmm7.dw0':     ( 'uint', 'both',   ),
+        'xmm8.dw0':     ( 'uint', 'both',   ),
+        'xmm9.dw0':     ( 'uint', 'both',   ),
+        'xmm10.dw0':    ( 'uint', 'both',   ),
+        'xmm11.dw0':    ( 'uint', 'both',   ),
+        'xmm12.dw0':    ( 'uint', 'both',   ),
+        'xmm13.dw0':    ( 'uint', 'both',   ),
+        'xmm14.dw0':    ( 'uint', 'both',   ),
+        'xmm15_dw0':    ( 'uint', 'both',   ),
+        # AVX registers.
+        'ymm0':         ( 'uint', 'both',   ),
+        'ymm1':         ( 'uint', 'both',   ),
+        'ymm2':         ( 'uint', 'both',   ),
+        'ymm3':         ( 'uint', 'both',   ),
+        'ymm4':         ( 'uint', 'both',   ),
+        'ymm5':         ( 'uint', 'both',   ),
+        'ymm6':         ( 'uint', 'both',   ),
+        'ymm7':         ( 'uint', 'both',   ),
+        'ymm8':         ( 'uint', 'both',   ),
+        'ymm9':         ( 'uint', 'both',   ),
+        'ymm10':        ( 'uint', 'both',   ),
+        'ymm11':        ( 'uint', 'both',   ),
+        'ymm12':        ( 'uint', 'both',   ),
+        'ymm13':        ( 'uint', 'both',   ),
+        'ymm14':        ( 'uint', 'both',   ),
+        'ymm15':        ( 'uint', 'both',   ),
+
+        # Special ones.
+        'value.xcpt':   ( 'uint', 'output', ),
+    };
+
+    def __init__(self, sField, sOp, sValue, sType):
+        assert sField in self.kdFields;
+        assert sOp in self.kasOperators;
+        self.sField = sField;
+        self.sOp    = sOp;
+        self.sValue = sValue;
+        self.sType  = sType;
+        assert isinstance(sField, str);
+        assert isinstance(sOp, str);
+        assert isinstance(sType, str);
+        assert isinstance(sValue, str);
+
+
+class TestSelector(object):
+    """
+    One selector for an instruction test.
+    """
+    ## Selector compare operators.
+    kasCompareOps = [ '==', '!=' ];
+    ## Selector variables and their valid values.
+    kdVariables = {
+        # Operand size.
+        'size': {
+            'o16':  'size_o16',
+            'o32':  'size_o32',
+            'o64':  'size_o64',
+        },
+        # Execution ring.
+        'ring': {
+            '0':    'ring_0',
+            '1':    'ring_1',
+            '2':    'ring_2',
+            '3':    'ring_3',
+            '0..2': 'ring_0_thru_2',
+            '1..3': 'ring_1_thru_3',
+        },
+        # Basic code mode.
+        'codebits': {
+            '64':   'code_64bit',
+            '32':   'code_32bit',
+            '16':   'code_16bit',
+        },
+        # cpu modes.
+        'mode': {
+            'real': 'mode_real',
+            'prot': 'mode_prot',
+            'long': 'mode_long',
+            'v86':  'mode_v86',
+            'smm':  'mode_smm',
+            'vmx':  'mode_vmx',
+            'svm':  'mode_svm',
+        },
+        # paging on/off
+        'paging': {
+            'on':       'paging_on',
+            'off':      'paging_off',
+        },
+        # CPU vendor
+        'vendor': {
+            'amd':      'vendor_amd',
+            'intel':    'vendor_intel',
+            'via':      'vendor_via',
+        },
+    };
+    ## Selector shorthand predicates.
+    ## These translates into variable expressions.
+    kdPredicates = {
+        'o16':          'size==o16',
+        'o32':          'size==o32',
+        'o64':          'size==o64',
+        'ring0':        'ring==0',
+        '!ring0':       'ring==1..3',
+        'ring1':        'ring==1',
+        'ring2':        'ring==2',
+        'ring3':        'ring==3',
+        'user':         'ring==3',
+        'supervisor':   'ring==0..2',
+        'real':         'mode==real',
+        'prot':         'mode==prot',
+        'long':         'mode==long',
+        'v86':          'mode==v86',
+        'smm':          'mode==smm',
+        'vmx':          'mode==vmx',
+        'svm':          'mode==svm',
+        'paging':       'paging==on',
+        '!paging':      'paging==off',
+        'amd':          'vendor==amd',
+        '!amd':         'vendor!=amd',
+        'intel':        'vendor==intel',
+        '!intel':       'vendor!=intel',
+        'via':          'vendor==via',
+        '!via':         'vendor!=via',
+    };
+
+    def __init__(self, sVariable, sOp, sValue):
+        assert sVariable in self.kdVariables;
+        assert sOp in self.kasCompareOps;
+        assert sValue in self.kdVariables[sVariable];
+        self.sVariable  = sVariable;
+        self.sOp        = sOp;
+        self.sValue     = sValue;
+
+
+class InstructionTest(object):
+    """
+    Instruction test.
+    """
+
+    def __init__(self, oInstr): # type: (InstructionTest, Instruction)
+        self.oInstr         = oInstr;   # type: InstructionTest
+        self.aoInputs       = [];       # type: list(TestInOut)
+        self.aoOutputs      = [];       # type: list(TestInOut)
+        self.aoSelectors    = [];       # type: list(TestSelector)
+
+    def toString(self, fRepr = False):
+        """
+        Converts it to string representation.
+        """
+        asWords = [];
+        if self.aoSelectors:
+            for oSelector in self.aoSelectors:
+                asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
+            asWords.append('/');
+
+        for oModifier in self.aoInputs:
+            asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
+
+        asWords.append('->');
+
+        for oModifier in self.aoOutputs:
+            asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
+
+        if fRepr:
+            return '<' + ' '.join(asWords) + '>';
+        return '  '.join(asWords);
+
+    def __str__(self):
+        """ Provide string represenation. """
+        return self.toString(False);
+
+    def __repr__(self):
+        """ Provide unambigious string representation. """
+        return self.toString(True);
+
+class Operand(object):
+    """
+    Instruction operand.
+    """
+
+    def __init__(self, sWhere, sType):
+        assert sWhere in g_kdOpLocations, sWhere;
+        assert sType  in g_kdOpTypes, sType;
+        self.sWhere = sWhere;           ##< g_kdOpLocations
+        self.sType  = sType;            ##< g_kdOpTypes
+
+    def usesModRM(self):
+        """ Returns True if using some form of ModR/M encoding. """
+        return self.sType[0] in ['E', 'G', 'M'];
+
+
+
+class Instruction(object): # pylint: disable=too-many-instance-attributes
+    """
+    Instruction.
+    """
+
+    def __init__(self, sSrcFile, iLine):
+        ## @name Core attributes.
+        ## @{
+        self.sMnemonic      = None;
+        self.sBrief         = None;
+        self.asDescSections = [];       # type: list(str)
+        self.aoMaps         = [];       # type: list(InstructionMap)
+        self.aoOperands     = [];       # type: list(Operand)
+        self.sPrefix        = None;     ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
+        self.sOpcode        = None;     # type: str
+        self.sSubOpcode     = None;     # type: str
+        self.sEncoding      = None;
+        self.asFlTest       = None;
+        self.asFlModify     = None;
+        self.asFlUndefined  = None;
+        self.asFlSet        = None;
+        self.asFlClear      = None;
+        self.dHints         = {};       ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
+        self.sDisEnum       = None;     ##< OP_XXXX value.  Default is based on the uppercased mnemonic.
+        self.asCpuIds       = [];       ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
+        self.asReqFeatures  = [];       ##< Which features are required to be enabled to run this instruction.
+        self.aoTests        = [];       # type: list(InstructionTest)
+        self.sMinCpu        = None;     ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
+        self.oCpuExpr       = None;     ##< Some CPU restriction expression...
+        self.sGroup         = None;
+        self.fUnused        = False;    ##< Unused instruction.
+        self.fInvalid       = False;    ##< Invalid instruction (like UD2).
+        self.sInvalidStyle  = None;     ##< Invalid behviour style (g_kdInvalidStyles),
+        self.sXcptType      = None;     ##< Exception type (g_kdXcptTypes).
+        ## @}
+
+        ## @name Implementation attributes.
+        ## @{
+        self.sStats         = None;
+        self.sFunction      = None;
+        self.fStub          = False;
+        self.fUdStub        = False;
+        ## @}
+
+        ## @name Decoding info
+        ## @{
+        self.sSrcFile       = sSrcFile;
+        self.iLineCreated   = iLine;
+        self.iLineCompleted = None;
+        self.cOpTags        = 0;
+        self.iLineFnIemOpMacro  = -1;
+        self.iLineMnemonicMacro = -1;
+        ## @}
+
+        ## @name Intermediate input fields.
+        ## @{
+        self.sRawDisOpNo    = None;
+        self.asRawDisParams = [];
+        self.sRawIemOpFlags = None;
+        self.sRawOldOpcodes = None;
+        self.asCopyTests    = [];
+        ## @}
+
+    def toString(self, fRepr = False):
+        """ Turn object into a string. """
+        aasFields = [];
+
+        aasFields.append(['opcode',    self.sOpcode]);
+        aasFields.append(['mnemonic',  self.sMnemonic]);
+        for iOperand, oOperand in enumerate(self.aoOperands):
+            aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
+        if self.aoMaps:         aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
+        aasFields.append(['encoding',  self.sEncoding]);
+        if self.dHints:         aasFields.append(['hints', ','.join(self.dHints.keys())]);
+        aasFields.append(['disenum',   self.sDisEnum]);
+        if self.asCpuIds:       aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
+        aasFields.append(['group',     self.sGroup]);
+        if self.fUnused:        aasFields.append(['unused', 'True']);
+        if self.fInvalid:       aasFields.append(['invalid', 'True']);
+        aasFields.append(['invlstyle', self.sInvalidStyle]);
+        aasFields.append(['fltest',    self.asFlTest]);
+        aasFields.append(['flmodify',  self.asFlModify]);
+        aasFields.append(['flundef',   self.asFlUndefined]);
+        aasFields.append(['flset',     self.asFlSet]);
+        aasFields.append(['flclear',   self.asFlClear]);
+        aasFields.append(['mincpu',    self.sMinCpu]);
+        aasFields.append(['stats',     self.sStats]);
+        aasFields.append(['sFunction', self.sFunction]);
+        if self.fStub:          aasFields.append(['fStub', 'True']);
+        if self.fUdStub:        aasFields.append(['fUdStub', 'True']);
+        if self.cOpTags:        aasFields.append(['optags', str(self.cOpTags)]);
+        if self.iLineFnIemOpMacro  != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
+        if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
+
+        sRet = '<' if fRepr else '';
+        for sField, sValue in aasFields:
+            if sValue != None:
+                if len(sRet) > 1:
+                    sRet += '; ';
+                sRet += '%s=%s' % (sField, sValue,);
+        if fRepr:
+            sRet += '>';
+
+        return sRet;
+
+    def __str__(self):
+        """ Provide string represenation. """
+        return self.toString(False);
+
+    def __repr__(self):
+        """ Provide unambigious string representation. """
+        return self.toString(True);
+
+    def getOpcodeByte(self):
+        """
+        Decodes sOpcode into a byte range integer value.
+        Raises exception if sOpcode is None or invalid.
+        """
+        if self.sOpcode is None:
+            raise Exception('No opcode byte for %s!' % (self,));
+        sOpcode = str(self.sOpcode);    # pylint type confusion workaround.
+
+        # Full hex byte form.
+        if sOpcode[:2] == '0x':
+            return int(sOpcode, 16);
+
+        # The /r form:
+        if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
+            return int(sOpcode[-1:]) << 3;
+
+        # The 11/r form:
+        if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
+            return (int(sOpcode[-1:]) << 3) | 0xc0;
+
+        # The !11/r form (returns mod=1):
+        ## @todo this doesn't really work...
+        if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
+            return (int(sOpcode[-1:]) << 3) | 0x80;
+
+        raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
+
+    @staticmethod
+    def _flagsToIntegerMask(asFlags):
+        """
+        Returns the integer mask value for asFlags.
+        """
+        uRet = 0;
+        if asFlags:
+            for sFlag in asFlags:
+                sConstant = g_kdEFlagsMnemonics[sFlag];
+                assert sConstant[0] != '!', sConstant
+                uRet |= g_kdX86EFlagsConstants[sConstant];
+        return uRet;
+
+    def getTestedFlagsMask(self):
+        """ Returns asFlTest into a integer mask value """
+        return self._flagsToIntegerMask(self.asFlTest);
+
+    def getModifiedFlagsMask(self):
+        """ Returns asFlModify into a integer mask value """
+        return self._flagsToIntegerMask(self.asFlModify);
+
+    def getUndefinedFlagsMask(self):
+        """ Returns asFlUndefined into a integer mask value """
+        return self._flagsToIntegerMask(self.asFlUndefined);
+
+    def getSetFlagsMask(self):
+        """ Returns asFlSet into a integer mask value """
+        return self._flagsToIntegerMask(self.asFlSet);
+
+    def getClearedFlagsMask(self):
+        """ Returns asFlClear into a integer mask value """
+        return self._flagsToIntegerMask(self.asFlClear);
+
+
+## All the instructions.
+g_aoAllInstructions = []; # type: list(Instruction)
+
+## All the instructions indexed by statistics name (opstat).
+g_dAllInstructionsByStat = {}; # type: dict(Instruction)
+
+## All the instructions indexed by function name (opfunction).
+g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
+
+## Instructions tagged by oponlytest
+g_aoOnlyTestInstructions = []; # type: list(Instruction)
+
+## Instruction maps.
+g_dInstructionMaps = {
+    'one':          InstructionMap('one'),
+    'grp1_80':      InstructionMap('grp1_80',   asLeadOpcodes = ['0x80',]),
+    'grp1_81':      InstructionMap('grp1_81',   asLeadOpcodes = ['0x81',], sSelector = '/r'),
+    'grp1_82':      InstructionMap('grp1_82',   asLeadOpcodes = ['0x82',], sSelector = '/r'),
+    'grp1_83':      InstructionMap('grp1_83',   asLeadOpcodes = ['0x83',], sSelector = '/r'),
+    'grp1a':        InstructionMap('grp1a',     asLeadOpcodes = ['0x8f',], sSelector = '/r'),
+    'grp2_c0':      InstructionMap('grp2_c0',   asLeadOpcodes = ['0xc0',], sSelector = '/r'),
+    'grp2_c1':      InstructionMap('grp2_c1',   asLeadOpcodes = ['0xc1',], sSelector = '/r'),
+    'grp2_d0':      InstructionMap('grp2_d0',   asLeadOpcodes = ['0xd0',], sSelector = '/r'),
+    'grp2_d1':      InstructionMap('grp2_d1',   asLeadOpcodes = ['0xd1',], sSelector = '/r'),
+    'grp2_d2':      InstructionMap('grp2_d2',   asLeadOpcodes = ['0xd2',], sSelector = '/r'),
+    'grp2_d3':      InstructionMap('grp2_d3',   asLeadOpcodes = ['0xd3',], sSelector = '/r'),
+    'grp3_f6':      InstructionMap('grp3_f6',   asLeadOpcodes = ['0xf6',], sSelector = '/r'),
+    'grp3_f7':      InstructionMap('grp3_f7',   asLeadOpcodes = ['0xf7',], sSelector = '/r'),
+    'grp4':         InstructionMap('grp4',      asLeadOpcodes = ['0xfe',], sSelector = '/r'),
+    'grp5':         InstructionMap('grp5',      asLeadOpcodes = ['0xff',], sSelector = '/r'),
+    'grp11_c6_m':   InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
+    'grp11_c6_r':   InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'),    # xabort
+    'grp11_c7_m':   InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
+    'grp11_c7_r':   InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'),    # xbegin
+
+    'two0f':        InstructionMap('two0f',     asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
+    'grp6':         InstructionMap('grp6',      asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
+    'grp7_m':       InstructionMap('grp7_m',    asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
+    'grp7_r':       InstructionMap('grp7_r',    asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
+    'grp8':         InstructionMap('grp8',      asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
+    'grp9':         InstructionMap('grp9',      asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
+    'grp10':        InstructionMap('grp10',     asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
+    'grp12':        InstructionMap('grp12',     asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
+    'grp13':        InstructionMap('grp13',     asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
+    'grp14':        InstructionMap('grp14',     asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
+    'grp15':        InstructionMap('grp15',     asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
+    'grp16':        InstructionMap('grp16',     asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
+    'grpA17':       InstructionMap('grpA17',    asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
+    'grpP':         InstructionMap('grpP',      asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
+
+    'three0f38':    InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
+    'three0f3a':    InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
+
+    'vexmap1':      InstructionMap('vexmap1',   sEncoding = 'vex1'),
+    'vexgrp12':     InstructionMap('vexgrp12',  sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
+    'vexgrp13':     InstructionMap('vexgrp13',  sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
+    'vexgrp14':     InstructionMap('vexgrp14',  sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
+    'vexgrp15':     InstructionMap('vexgrp15',  sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
+    'vexgrp17':     InstructionMap('vexgrp17',  sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
+
+    'vexmap2':      InstructionMap('vexmap2',   sEncoding = 'vex2'),
+    'vexmap3':      InstructionMap('vexmap3',   sEncoding = 'vex3'),
+
+    '3dnow':        InstructionMap('3dnow',     asLeadOpcodes = ['0x0f', '0x0f',]),
+    'xopmap8':      InstructionMap('xopmap8',   sEncoding = 'xop8'),
+    'xopmap9':      InstructionMap('xopmap9',   sEncoding = 'xop9'),
+    'xopgrp1':      InstructionMap('xopgrp1',   sEncoding = 'xop9',  asLeadOpcodes = ['0x01'], sSelector = '/r'),
+    'xopgrp2':      InstructionMap('xopgrp2',   sEncoding = 'xop9',  asLeadOpcodes = ['0x02'], sSelector = '/r'),
+    'xopgrp3':      InstructionMap('xopgrp3',   sEncoding = 'xop9',  asLeadOpcodes = ['0x12'], sSelector = '/r'),
+    'xopmap10':     InstructionMap('xopmap10',  sEncoding = 'xop10'),
+    'xopgrp4':      InstructionMap('xopgrp4',   sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
+};
+
+
+
+class ParserException(Exception):
+    """ Parser exception """
+    def __init__(self, sMessage):
+        Exception.__init__(self, sMessage);
+
+
+class SimpleParser(object):
+    """
+    Parser of IEMAllInstruction*.cpp.h instruction specifications.
+    """
+
+    ## @name Parser state.
+    ## @{
+    kiCode              = 0;
+    kiCommentMulti      = 1;
+    ## @}
+
+    def __init__(self, sSrcFile, asLines, sDefaultMap):
+        self.sSrcFile       = sSrcFile;
+        self.asLines        = asLines;
+        self.iLine          = 0;
+        self.iState         = self.kiCode;
+        self.sComment       = '';
+        self.iCommentLine   = 0;
+        self.aoCurInstrs    = [];
+
+        assert sDefaultMap in g_dInstructionMaps;
+        self.oDefaultMap    = g_dInstructionMaps[sDefaultMap];
+
+        self.cTotalInstr    = 0;
+        self.cTotalStubs    = 0;
+        self.cTotalTagged   = 0;
+
+        self.oReMacroName   = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
+        self.oReMnemonic    = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
+        self.oReStatsName   = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
+        self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
+        self.oReGroupName   = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
+        self.oReDisEnum     = re.compile('^OP_[A-Z0-9_]+$');
+        self.fDebug         = True;
+
+        self.dTagHandlers   = {
+            '@opbrief':     self.parseTagOpBrief,
+            '@opdesc':      self.parseTagOpDesc,
+            '@opmnemonic':  self.parseTagOpMnemonic,
+            '@op1':         self.parseTagOpOperandN,
+            '@op2':         self.parseTagOpOperandN,
+            '@op3':         self.parseTagOpOperandN,
+            '@op4':         self.parseTagOpOperandN,
+            '@oppfx':       self.parseTagOpPfx,
+            '@opmaps':      self.parseTagOpMaps,
+            '@opcode':      self.parseTagOpcode,
+            '@opcodesub':   self.parseTagOpcodeSub,
+            '@openc':       self.parseTagOpEnc,
+            '@opfltest':    self.parseTagOpEFlags,
+            '@opflmodify':  self.parseTagOpEFlags,
+            '@opflundef':   self.parseTagOpEFlags,
+            '@opflset':     self.parseTagOpEFlags,
+            '@opflclear':   self.parseTagOpEFlags,
+            '@ophints':     self.parseTagOpHints,
+            '@opdisenum':   self.parseTagOpDisEnum,
+            '@opmincpu':    self.parseTagOpMinCpu,
+            '@opcpuid':     self.parseTagOpCpuId,
+            '@opgroup':     self.parseTagOpGroup,
+            '@opunused':    self.parseTagOpUnusedInvalid,
+            '@opinvalid':   self.parseTagOpUnusedInvalid,
+            '@opinvlstyle': self.parseTagOpUnusedInvalid,
+            '@optest':      self.parseTagOpTest,
+            '@optestign':   self.parseTagOpTestIgnore,
+            '@optestignore': self.parseTagOpTestIgnore,
+            '@opcopytests': self.parseTagOpCopyTests,
+            '@oponly':      self.parseTagOpOnlyTest,
+            '@oponlytest':  self.parseTagOpOnlyTest,
+            '@opxcpttype':  self.parseTagOpXcptType,
+            '@opstats':     self.parseTagOpStats,
+            '@opfunction':  self.parseTagOpFunction,
+            '@opdone':      self.parseTagOpDone,
+        };
+        for i in range(48):
+            self.dTagHandlers['@optest%u' % (i,)]   = self.parseTagOpTestNum;
+            self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
+
+        self.asErrors = [];
+
+    def raiseError(self, sMessage):
+        """
+        Raise error prefixed with the source and line number.
+        """
+        raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
+
+    def raiseCommentError(self, iLineInComment, sMessage):
+        """
+        Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
+        """
+        raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
+
+    def error(self, sMessage):
+        """
+        Adds an error.
+        returns False;
+        """
+        self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
+        return False;
+
+    def errorComment(self, iLineInComment, sMessage):
+        """
+        Adds a comment error.
+        returns False;
+        """
+        self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
+        return False;
+
+    def printErrors(self):
+        """
+        Print the errors to stderr.
+        Returns number of errors.
+        """
+        if self.asErrors:
+            sys.stderr.write(u''.join(self.asErrors));
+        return len(self.asErrors);
+
+    def debug(self, sMessage):
+        """
+        For debugging.
+        """
+        if self.fDebug:
+            print('debug: %s' % (sMessage,));
+
+
+    def addInstruction(self, iLine = None):
+        """
+        Adds an instruction.
+        """
+        oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
+        g_aoAllInstructions.append(oInstr);
+        self.aoCurInstrs.append(oInstr);
+        return oInstr;
+
+    def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
+        """
+        Derives the mnemonic and operands from a IEM stats base name like string.
+        """
+        if oInstr.sMnemonic is None:
+            asWords = sStats.split('_');
+            oInstr.sMnemonic = asWords[0].lower();
+            if len(asWords) > 1 and not oInstr.aoOperands:
+                for sType in asWords[1:]:
+                    if sType in g_kdOpTypes:
+                        oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
+                    else:
+                        #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
+                        return False;
+        return True;
+
+    def doneInstructionOne(self, oInstr, iLine):
+        """
+        Complete the parsing by processing, validating and expanding raw inputs.
+        """
+        assert oInstr.iLineCompleted is None;
+        oInstr.iLineCompleted = iLine;
+
+        #
+        # Specified instructions.
+        #
+        if oInstr.cOpTags > 0:
+            if oInstr.sStats is None:
+                pass;
+
+        #
+        # Unspecified legacy stuff.  We generally only got a few things to go on here.
+        #   /** Opcode 0x0f 0x00 /0. */
+        #   FNIEMOPRM_DEF(iemOp_Grp6_sldt)
+        #
+        else:
+            #if oInstr.sRawOldOpcodes:
+            #
+            #if oInstr.sMnemonic:
+            pass;
+
+        #
+        # Common defaults.
+        #
+
+        # Guess mnemonic and operands from stats if the former is missing.
+        if oInstr.sMnemonic is None:
+            if oInstr.sStats is not None:
+                self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
+            elif oInstr.sFunction is not None:
+                self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
+
+        # Derive the disassembler op enum constant from the mnemonic.
+        if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
+            oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
+
+        # Derive the IEM statistics base name from mnemonic and operand types.
+        if oInstr.sStats is None:
+            if oInstr.sFunction is not None:
+                oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
+            elif oInstr.sMnemonic is not None:
+                oInstr.sStats = oInstr.sMnemonic;
+                for oOperand in oInstr.aoOperands:
+                    if oOperand.sType:
+                        oInstr.sStats += '_' + oOperand.sType;
+
+        # Derive the IEM function name from mnemonic and operand types.
+        if oInstr.sFunction is None:
+            if oInstr.sMnemonic is not None:
+                oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
+                for oOperand in oInstr.aoOperands:
+                    if oOperand.sType:
+                        oInstr.sFunction += '_' + oOperand.sType;
+            elif oInstr.sStats:
+                oInstr.sFunction = 'iemOp_' + oInstr.sStats;
+
+        # Derive encoding from operands.
+        if oInstr.sEncoding is None:
+            if not oInstr.aoOperands:
+                if oInstr.fUnused and oInstr.sSubOpcode:
+                    oInstr.sEncoding = 'ModR/M';
+                else:
+                    oInstr.sEncoding = 'fixed';
+            elif oInstr.aoOperands[0].usesModRM():
+                if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
+                    oInstr.sEncoding = 'ModR/M+VEX';
+                else:
+                    oInstr.sEncoding = 'ModR/M';
+
+        #
+        # Apply default map and then add the instruction to all it's groups.
+        #
+        if not oInstr.aoMaps:
+            oInstr.aoMaps = [ self.oDefaultMap, ];
+        for oMap in oInstr.aoMaps:
+            oMap.aoInstructions.append(oInstr);
+
+        #
+        # Check the opstat value and add it to the opstat indexed dictionary.
+        #
+        if oInstr.sStats:
+            if oInstr.sStats not in g_dAllInstructionsByStat:
+                g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
+            else:
+                self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
+                           % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
+
+        #
+        # Add to function indexed dictionary.  We allow multiple instructions per function.
+        #
+        if oInstr.sFunction:
+            if oInstr.sFunction not in g_dAllInstructionsByFunction:
+                g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
+            else:
+                g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
+
+        #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
+        return True;
+
+    def doneInstructions(self, iLineInComment = None):
+        """
+        Done with current instruction.
+        """
+        for oInstr in self.aoCurInstrs:
+            self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
+            if oInstr.fStub:
+                self.cTotalStubs += 1;
+
+        self.cTotalInstr += len(self.aoCurInstrs);
+
+        self.sComment     = '';
+        self.aoCurInstrs   = [];
+        return True;
+
+    def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
+        """
+        Sets the sAttrib of all current instruction to oValue.  If fOverwrite
+        is False, only None values and empty strings are replaced.
+        """
+        for oInstr in self.aoCurInstrs:
+            if fOverwrite is not True:
+                oOldValue = getattr(oInstr, sAttrib);
+                if oOldValue is not None:
+                    continue;
+            setattr(oInstr, sAttrib, oValue);
+
+    def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
+        """
+        Sets the iEntry of the array sAttrib of all current instruction to oValue.
+        If fOverwrite is False, only None values and empty strings are replaced.
+        """
+        for oInstr in self.aoCurInstrs:
+            aoArray = getattr(oInstr, sAttrib);
+            while len(aoArray) <= iEntry:
+                aoArray.append(None);
+            if fOverwrite is True or aoArray[iEntry] is None:
+                aoArray[iEntry] = oValue;
+
+    def parseCommentOldOpcode(self, asLines):
+        """ Deals with 'Opcode 0xff /4' like comments """
+        asWords = asLines[0].split();
+        if    len(asWords) >= 2  \
+          and asWords[0] == 'Opcode'  \
+          and (   asWords[1].startswith('0x')
+               or asWords[1].startswith('0X')):
+            asWords = asWords[:1];
+            for iWord, sWord in enumerate(asWords):
+                if sWord.startswith('0X'):
+                    sWord = '0x' + sWord[:2];
+                    asWords[iWord] = asWords;
+            self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
+
+        return False;
+
+    def ensureInstructionForOpTag(self, iTagLine):
+        """ Ensure there is an instruction for the op-tag being parsed. """
+        if not self.aoCurInstrs:
+            self.addInstruction(self.iCommentLine + iTagLine);
+        for oInstr in self.aoCurInstrs:
+            oInstr.cOpTags += 1;
+            if oInstr.cOpTags == 1:
+                self.cTotalTagged += 1;
+        return self.aoCurInstrs[-1];
+
+    @staticmethod
+    def flattenSections(aasSections):
+        """
+        Flattens multiline sections into stripped single strings.
+        Returns list of strings, on section per string.
+        """
+        asRet = [];
+        for asLines in aasSections:
+            if asLines:
+                asRet.append(' '.join([sLine.strip() for sLine in asLines]));
+        return asRet;
+
+    @staticmethod
+    def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
+        """
+        Flattens sections into a simple stripped string with newlines as
+        section breaks.  The final section does not sport a trailing newline.
+        """
+        # Typical: One section with a single line.
+        if len(aasSections) == 1 and len(aasSections[0]) == 1:
+            return aasSections[0][0].strip();
+
+        sRet = '';
+        for iSection, asLines in enumerate(aasSections):
+            if asLines:
+                if iSection > 0:
+                    sRet += sSectionSep;
+                sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
+        return sRet;
+
+
+
+    ## @name Tag parsers
+    ## @{
+
+    def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:    \@opbrief
+        Value:  Text description, multiple sections, appended.
+
+        Brief description.  If not given, it's the first sentence from @opdesc.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and validate the value.
+        sBrief = self.flattenAllSections(aasSections);
+        if not sBrief:
+            return self.errorComment(iTagLine, '%s: value required' % (sTag,));
+        if sBrief[-1] != '.':
+            sBrief = sBrief + '.';
+        if len(sBrief) > 180:
+            return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
+        offDot = sBrief.find('.');
+        while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
+            offDot = sBrief.find('.', offDot + 1);
+        if offDot >= 0 and offDot != len(sBrief) - 1:
+            return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
+
+        # Update the instruction.
+        if oInstr.sBrief is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
+                                               % (sTag, oInstr.sBrief, sBrief,));
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:    \@opdesc
+        Value:  Text description, multiple sections, appended.
+
+        It is used to describe instructions.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+        if aasSections:
+            oInstr.asDescSections.extend(self.flattenSections(aasSections));
+            return True;
+
+        _ = sTag; _ = iEndLine;
+        return True;
+
+    def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:    @opmenmonic
+        Value:  mnemonic
+
+        The 'mnemonic' value must be a valid C identifier string.  Because of
+        prefixes, groups and whatnot, there times when the mnemonic isn't that
+        of an actual assembler mnemonic.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and validate the value.
+        sMnemonic = self.flattenAllSections(aasSections);
+        if not self.oReMnemonic.match(sMnemonic):
+            return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
+        if oInstr.sMnemonic is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
+                                     % (sTag, oInstr.sMnemonic, sMnemonic,));
+        oInstr.sMnemonic = sMnemonic
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tags:  \@op1, \@op2, \@op3, \@op4
+        Value: [where:]type
+
+        The 'where' value indicates where the operand is found, like the 'reg'
+        part of the ModR/M encoding. See Instruction.kdOperandLocations for
+        a list.
+
+        The 'type' value indicates the operand type.  These follow the types
+        given in the opcode tables in the CPU reference manuals.
+        See Instruction.kdOperandTypes for a list.
+
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+        idxOp = int(sTag[-1]) - 1;
+        assert idxOp >= 0 and idxOp < 4;
+
+        # flatten, split up, and validate the "where:type" value.
+        sFlattened = self.flattenAllSections(aasSections);
+        asSplit = sFlattened.split(':');
+        if len(asSplit) == 1:
+            sType  = asSplit[0];
+            sWhere = None;
+        elif len(asSplit) == 2:
+            (sWhere, sType) = asSplit;
+        else:
+            return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
+
+        if sType not in g_kdOpTypes:
+            return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
+                                               % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
+        if sWhere is None:
+            sWhere = g_kdOpTypes[sType][1];
+        elif sWhere not in g_kdOpLocations:
+            return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
+                                               % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
+
+        # Insert the operand, refusing to overwrite an existing one.
+        while idxOp >= len(oInstr.aoOperands):
+            oInstr.aoOperands.append(None);
+        if oInstr.aoOperands[idxOp] is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
+                                               % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
+                                                   sWhere, sType,));
+        oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:    \@opmaps
+        Value:  map[,map2]
+
+        Indicates which maps the instruction is in.  There is a default map
+        associated with each input file.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten, split up and validate the value.
+        sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
+        asMaps = sFlattened.split(',');
+        if not asMaps:
+            return self.errorComment(iTagLine, '%s: value required' % (sTag,));
+        for sMap in asMaps:
+            if sMap not in g_dInstructionMaps:
+                return self.errorComment(iTagLine, '%s: invalid map value: %s  (valid values: %s)'
+                                                   % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
+
+        # Add the maps to the current list.  Throw errors on duplicates.
+        for oMap in oInstr.aoMaps:
+            if oMap.sName in asMaps:
+                return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
+
+        for sMap in asMaps:
+            oMap = g_dInstructionMaps[sMap];
+            if oMap not in oInstr.aoMaps:
+                oInstr.aoMaps.append(oMap);
+            else:
+                self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@oppfx
+        Value:      n/a|none|0x66|0xf3|0xf2
+
+        Required prefix for the instruction.  (In a (E)VEX context this is the
+        value of the 'pp' field rather than an actual prefix.)
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and validate the value.
+        sFlattened = self.flattenAllSections(aasSections);
+        asPrefixes = sFlattened.split();
+        if len(asPrefixes) > 1:
+            return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
+
+        sPrefix = asPrefixes[0].lower();
+        if sPrefix == 'none':
+            sPrefix = 'none';
+        elif sPrefix == 'n/a':
+            sPrefix = None;
+        else:
+            if len(sPrefix) == 2:
+                sPrefix = '0x' + sPrefix;
+            if not _isValidOpcodeByte(sPrefix):
+                return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
+
+        if sPrefix is not None and sPrefix not in g_kdPrefixes:
+            return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
+
+        # Set it.
+        if oInstr.sPrefix is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
+        oInstr.sPrefix = sPrefix;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opcode
+        Value:      0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
+
+        The opcode byte or sub-byte for the instruction in the context of a map.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and validate the value.
+        sOpcode = self.flattenAllSections(aasSections);
+        if _isValidOpcodeByte(sOpcode):
+            pass;
+        elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
+            pass;
+        elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
+            pass;
+        elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
+            pass;
+        else:
+            return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
+
+        # Set it.
+        if oInstr.sOpcode is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
+        oInstr.sOpcode = sOpcode;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opcodesub
+        Value:      none | 11 mr/reg | !11 mr/reg
+
+        This is a simple way of dealing with encodings where the mod=3 and mod!=3
+        represents exactly two different instructions.  The more proper way would
+        be to go via maps with two members, but this is faster.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and validate the value.
+        sSubOpcode = self.flattenAllSections(aasSections);
+        if sSubOpcode not in g_kdSubOpcodes:
+            return self.errorComment(iTagLine, '%s: invalid sub opcode: %s  (valid: 11, !11, none)' % (sTag, sSubOpcode,));
+        sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
+
+        # Set it.
+        if oInstr.sSubOpcode is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
+                                               % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
+        oInstr.sSubOpcode = sSubOpcode;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@openc
+        Value:      ModR/M|fixed|prefix|<map name>
+
+        The instruction operand encoding style.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and validate the value.
+        sEncoding = self.flattenAllSections(aasSections);
+        if sEncoding in g_kdEncodings:
+            pass;
+        elif sEncoding in g_dInstructionMaps:
+            pass;
+        elif not _isValidOpcodeByte(sEncoding):
+            return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
+
+        # Set it.
+        if oInstr.sEncoding is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
+                                               % ( sTag, oInstr.sEncoding, sEncoding,));
+        oInstr.sEncoding = sEncoding;
+
+        _ = iEndLine;
+        return True;
+
+    ## EFlags tag to Instruction attribute name.
+    kdOpFlagToAttr = {
+        '@opfltest':    'asFlTest',
+        '@opflmodify':  'asFlModify',
+        '@opflundef':   'asFlUndefined',
+        '@opflset':     'asFlSet',
+        '@opflclear':   'asFlClear',
+    };
+
+    def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tags:   \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
+        Value:  <eflags specifier>
+
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten, split up and validate the values.
+        asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
+        if len(asFlags) == 1 and asFlags[0].lower() == 'none':
+            asFlags = [];
+        else:
+            fRc = True;
+            for iFlag, sFlag in enumerate(asFlags):
+                if sFlag not in g_kdEFlagsMnemonics:
+                    if sFlag.strip() in g_kdEFlagsMnemonics:
+                        asFlags[iFlag] = sFlag.strip();
+                    else:
+                        fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
+            if not fRc:
+                return False;
+
+        # Set them.
+        asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
+        if asOld is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
+        setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@ophints
+        Value:      Comma or space separated list of flags and hints.
+
+        This covers the disassembler flags table and more.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten as a space separated list, split it up and validate the values.
+        asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
+        if len(asHints) == 1 and asHints[0].lower() == 'none':
+            asHints = [];
+        else:
+            fRc = True;
+            for iHint, sHint in enumerate(asHints):
+                if sHint not in g_kdHints:
+                    if sHint.strip() in g_kdHints:
+                        sHint[iHint] = sHint.strip();
+                    else:
+                        fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
+            if not fRc:
+                return False;
+
+        # Append them.
+        for sHint in asHints:
+            if sHint not in oInstr.dHints:
+                oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
+            else:
+                self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opdisenum
+        Value:      OP_XXXX
+
+        This is for select a specific (legacy) disassembler enum value for the
+        instruction.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and split.
+        asWords = self.flattenAllSections(aasSections).split();
+        if len(asWords) != 1:
+            self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
+            if not asWords:
+                return False;
+        sDisEnum = asWords[0];
+        if not self.oReDisEnum.match(sDisEnum):
+            return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
+                                               % (sTag, sDisEnum, self.oReDisEnum.pattern));
+
+        # Set it.
+        if oInstr.sDisEnum is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
+        oInstr.sDisEnum = sDisEnum;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opmincpu
+        Value:      <simple CPU name>
+
+        Indicates when this instruction was introduced.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten the value, split into words, make sure there's just one, valid it.
+        asCpus = self.flattenAllSections(aasSections).split();
+        if len(asCpus) > 1:
+            self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
+
+        sMinCpu = asCpus[0];
+        if sMinCpu in g_kdCpuNames:
+            oInstr.sMinCpu = sMinCpu;
+        else:
+            return self.errorComment(iTagLine, '%s: invalid CPU name: %s  (names: %s)'
+                                               % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
+
+        # Set it.
+        if oInstr.sMinCpu is None:
+            oInstr.sMinCpu = sMinCpu;
+        elif oInstr.sMinCpu != sMinCpu:
+            self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opcpuid
+        Value:      none | <CPUID flag specifier>
+
+        CPUID feature bit which is required for the instruction to be present.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten as a space separated list, split it up and validate the values.
+        asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
+        if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
+            asCpuIds = [];
+        else:
+            fRc = True;
+            for iCpuId, sCpuId in enumerate(asCpuIds):
+                if sCpuId not in g_kdCpuIdFlags:
+                    if sCpuId.strip() in g_kdCpuIdFlags:
+                        sCpuId[iCpuId] = sCpuId.strip();
+                    else:
+                        fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
+            if not fRc:
+                return False;
+
+        # Append them.
+        for sCpuId in asCpuIds:
+            if sCpuId not in oInstr.asCpuIds:
+                oInstr.asCpuIds.append(sCpuId);
+            else:
+                self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opgroup
+        Value:      op_grp1[_subgrp2[_subsubgrp3]]
+
+        Instruction grouping.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten as a space separated list, split it up and validate the values.
+        asGroups = self.flattenAllSections(aasSections).split();
+        if len(asGroups) != 1:
+            return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
+        sGroup = asGroups[0];
+        if not self.oReGroupName.match(sGroup):
+            return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
+                                               % (sTag, sGroup, self.oReGroupName.pattern));
+
+        # Set it.
+        if oInstr.sGroup is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
+        oInstr.sGroup = sGroup;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:    \@opunused, \@opinvalid, \@opinvlstyle
+        Value:  <invalid opcode behaviour style>
+
+        The \@opunused indicates the specification is for a currently unused
+        instruction encoding.
+
+        The \@opinvalid indicates the specification is for an invalid currently
+        instruction encoding (like UD2).
+
+        The \@opinvlstyle just indicates how CPUs decode the instruction when
+        not supported (\@opcpuid, \@opmincpu) or disabled.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten as a space separated list, split it up and validate the values.
+        asStyles = self.flattenAllSections(aasSections).split();
+        if len(asStyles) != 1:
+            return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
+        sStyle = asStyles[0];
+        if sStyle not in g_kdInvalidStyles:
+            return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
+                                               % (sTag, sStyle, g_kdInvalidStyles.keys(),));
+        # Set it.
+        if oInstr.sInvalidStyle is not None:
+            return self.errorComment(iTagLine,
+                                     '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
+                                     % ( sTag, oInstr.sInvalidStyle, sStyle,));
+        oInstr.sInvalidStyle = sStyle;
+        if sTag == '@opunused':
+            oInstr.fUnused = True;
+        elif sTag == '@opinvalid':
+            oInstr.fInvalid = True;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
+        """
+        Tag:        \@optest
+        Value:      [<selectors>[ ]?] <inputs> -> <outputs>
+        Example:    mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
+
+        The main idea here is to generate basic instruction tests.
+
+        The probably simplest way of handling the diverse input, would be to use
+        it to produce size optimized byte code for a simple interpreter that
+        modifies the register input and output states.
+
+        An alternative to the interpreter would be creating multiple tables,
+        but that becomes rather complicated wrt what goes where and then to use
+        them in an efficient manner.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        #
+        # Do it section by section.
+        #
+        for asSectionLines in aasSections:
+            #
+            # Sort the input into outputs, inputs and selector conditions.
+            #
+            sFlatSection = self.flattenAllSections([asSectionLines,]);
+            if not sFlatSection:
+                self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
+                continue;
+            oTest = InstructionTest(oInstr);
+
+            asSelectors = [];
+            asInputs    = [];
+            asOutputs   = [];
+            asCur   = asOutputs;
+            fRc     = True;
+            asWords = sFlatSection.split();
+            for iWord in range(len(asWords) - 1, -1, -1):
+                sWord = asWords[iWord];
+                # Check for array switchers.
+                if sWord == '->':
+                    if asCur != asOutputs:
+                        fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
+                        break;
+                    asCur = asInputs;
+                elif sWord == '/':
+                    if asCur != asInputs:
+                        fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
+                        break;
+                    asCur = asSelectors;
+                else:
+                    asCur.insert(0, sWord);
+
+            #
+            # Validate and add selectors.
+            #
+            for sCond in asSelectors:
+                sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
+                oSelector = None;
+                for sOp in TestSelector.kasCompareOps:
+                    off = sCondExp.find(sOp);
+                    if off >= 0:
+                        sVariable = sCondExp[:off];
+                        sValue    = sCondExp[off + len(sOp):];
+                        if sVariable in TestSelector.kdVariables:
+                            if sValue in TestSelector.kdVariables[sVariable]:
+                                oSelector = TestSelector(sVariable, sOp, sValue);
+                            else:
+                                self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
+                                                             % ( sTag, sValue, sCond,
+                                                                 TestSelector.kdVariables[sVariable].keys(),));
+                        else:
+                            self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
+                                                         % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
+                        break;
+                if oSelector is not None:
+                    for oExisting in oTest.aoSelectors:
+                        if oExisting.sVariable == oSelector.sVariable:
+                            self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
+                                                         % ( sTag, oSelector.sVariable, oExisting, oSelector,));
+                    oTest.aoSelectors.append(oSelector);
+                else:
+                    fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
+
+            #
+            # Validate outputs and inputs, adding them to the test as we go along.
+            #
+            for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
+                asValidFieldKinds = [ 'both', sDesc, ];
+                for sItem in asItems:
+                    oItem = None;
+                    for sOp in TestInOut.kasOperators:
+                        off = sItem.find(sOp);
+                        if off < 0:
+                            continue;
+                        sField     = sItem[:off];
+                        sValueType = sItem[off + len(sOp):];
+                        if     sField in TestInOut.kdFields \
+                           and TestInOut.kdFields[sField][1] in asValidFieldKinds:
+                            asSplit = sValueType.split(':', 1);
+                            sValue  = asSplit[0];
+                            sType   = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
+                            if sType in TestInOut.kdTypes:
+                                oValid = TestInOut.kdTypes[sType].validate(sValue);
+                                if oValid is True:
+                                    if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
+                                        oItem = TestInOut(sField, sOp, sValue, sType);
+                                    else:
+                                        self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
+                                                                    % ( sTag, sDesc, sItem, ));
+                                else:
+                                    self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
+                                                                % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
+                            else:
+                                self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
+                                                             % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
+                        else:
+                            self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
+                                                         % ( sTag, sDesc, sField, sItem,
+                                                             ', '.join([sKey for sKey in TestInOut.kdFields.keys()
+                                                                        if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
+                        break;
+                    if oItem is not None:
+                        for oExisting in aoDst:
+                            if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
+                                self.errorComment(iTagLine,
+                                                  '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
+                                                  % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
+                        aoDst.append(oItem);
+                    else:
+                        fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
+
+            #
+            # .
+            #
+            if fRc:
+                oInstr.aoTests.append(oTest);
+            else:
+                self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
+                self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
+                                            % (sTag, asSelectors, asInputs, asOutputs,));
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Numbered \@optest tag.  Either \@optest42 or \@optest[42].
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        iTest = 0;
+        if sTag[-1] == ']':
+            iTest = int(sTag[8:-1]);
+        else:
+            iTest = int(sTag[7:]);
+
+        if iTest != len(oInstr.aoTests):
+            self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
+        return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
+
+    def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@optestign | \@optestignore
+        Value:      <value is ignored>
+
+        This is a simple trick to ignore a test while debugging another.
+
+        See also \@oponlytest.
+        """
+        _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
+        return True;
+
+    def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opcopytests
+        Value:      <opstat | function> [..]
+        Example:    \@opcopytests add_Eb_Gb
+
+        Trick to avoid duplicating tests for different encodings of the same
+        operation.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten, validate and append the copy job to the instruction.  We execute
+        # them after parsing all the input so we can handle forward references.
+        asToCopy = self.flattenAllSections(aasSections).split();
+        if not asToCopy:
+            return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
+        for sToCopy in asToCopy:
+            if sToCopy not in oInstr.asCopyTests:
+                if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
+                    oInstr.asCopyTests.append(sToCopy);
+                else:
+                    self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
+                                                % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
+            else:
+                self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@oponlytest | \@oponly
+        Value:      none
+
+        Only test instructions with this tag.  This is a trick that is handy
+        for singling out one or two new instructions or tests.
+
+        See also \@optestignore.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Validate and add instruction to only test dictionary.
+        sValue = self.flattenAllSections(aasSections).strip();
+        if sValue:
+            return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
+
+        if oInstr not in g_aoOnlyTestInstructions:
+            g_aoOnlyTestInstructions.append(oInstr);
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opxcpttype
+        Value:      [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
+
+        Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten as a space separated list, split it up and validate the values.
+        asTypes = self.flattenAllSections(aasSections).split();
+        if len(asTypes) != 1:
+            return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
+        sType = asTypes[0];
+        if sType not in g_kdXcptTypes:
+            return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
+                                               % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
+        # Set it.
+        if oInstr.sXcptType is not None:
+            return self.errorComment(iTagLine,
+                                     '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
+                                     % ( sTag, oInstr.sXcptType, sType,));
+        oInstr.sXcptType = sType;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opfunction
+        Value:      <VMM function name>
+
+        This is for explicitly setting the IEM function name.  Normally we pick
+        this up from the FNIEMOP_XXX macro invocation after the description, or
+        generate it from the mnemonic and operands.
+
+        It it thought it maybe necessary to set it when specifying instructions
+        which implementation isn't following immediately or aren't implemented yet.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and validate the value.
+        sFunction = self.flattenAllSections(aasSections);
+        if not self.oReFunctionName.match(sFunction):
+            return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
+                                               % (sTag, sFunction, self.oReFunctionName.pattern));
+
+        if oInstr.sFunction is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
+                                     % (sTag, oInstr.sFunction, sFunction,));
+        oInstr.sFunction = sFunction;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:        \@opstats
+        Value:      <VMM statistics base name>
+
+        This is for explicitly setting the statistics name.  Normally we pick
+        this up from the IEMOP_MNEMONIC macro invocation, or generate it from
+        the mnemonic and operands.
+
+        It it thought it maybe necessary to set it when specifying instructions
+        which implementation isn't following immediately or aren't implemented yet.
+        """
+        oInstr = self.ensureInstructionForOpTag(iTagLine);
+
+        # Flatten and validate the value.
+        sStats = self.flattenAllSections(aasSections);
+        if not self.oReStatsName.match(sStats):
+            return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
+                                               % (sTag, sStats, self.oReStatsName.pattern));
+
+        if oInstr.sStats is not None:
+            return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
+                                     % (sTag, oInstr.sStats, sStats,));
+        oInstr.sStats = sStats;
+
+        _ = iEndLine;
+        return True;
+
+    def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
+        """
+        Tag:    \@opdone
+        Value:  none
+
+        Used to explictily flush the instructions that have been specified.
+        """
+        sFlattened = self.flattenAllSections(aasSections);
+        if sFlattened != '':
+            return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
+        _ = sTag; _ = iEndLine;
+        return self.doneInstructions();
+
+    ## @}
+
+
+    def parseComment(self):
+        """
+        Parse the current comment (self.sComment).
+
+        If it's a opcode specifiying comment, we reset the macro stuff.
+        """
+        #
+        # Reject if comment doesn't seem to contain anything interesting.
+        #
+        if    self.sComment.find('Opcode') < 0 \
+          and self.sComment.find('@') < 0:
+            return False;
+
+        #
+        # Split the comment into lines, removing leading asterisks and spaces.
+        # Also remove leading and trailing empty lines.
+        #
+        asLines = self.sComment.split('\n');
+        for iLine, sLine in enumerate(asLines):
+            asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
+
+        while asLines and not asLines[0]:
+            self.iCommentLine += 1;
+            asLines.pop(0);
+
+        while asLines and not asLines[-1]:
+            asLines.pop(len(asLines) - 1);
+
+        #
+        # Check for old style: Opcode 0x0f 0x12
+        #
+        if asLines[0].startswith('Opcode '):
+            self.parseCommentOldOpcode(asLines);
+
+        #
+        # Look for @op* tagged data.
+        #
+        cOpTags      = 0;
+        sFlatDefault = None;
+        sCurTag      = '@default';
+        iCurTagLine  = 0;
+        asCurSection = [];
+        aasSections  = [ asCurSection, ];
+        for iLine, sLine in enumerate(asLines):
+            if not sLine.startswith('@'):
+                if sLine:
+                    asCurSection.append(sLine);
+                elif asCurSection:
+                    asCurSection = [];
+                    aasSections.append(asCurSection);
+            else:
+                #
+                # Process the previous tag.
+                #
+                if not asCurSection and len(aasSections) > 1:
+                    aasSections.pop(-1);
+                if sCurTag in self.dTagHandlers:
+                    self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
+                    cOpTags += 1;
+                elif sCurTag.startswith('@op'):
+                    self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
+                elif sCurTag == '@default':
+                    sFlatDefault = self.flattenAllSections(aasSections);
+                elif '@op' + sCurTag[1:] in self.dTagHandlers:
+                    self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
+                elif sCurTag in ['@encoding', '@opencoding']:
+                    self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
+
+                #
+                # New tag.
+                #
+                asSplit = sLine.split(None, 1);
+                sCurTag = asSplit[0].lower();
+                if len(asSplit) > 1:
+                    asCurSection = [asSplit[1],];
+                else:
+                    asCurSection = [];
+                aasSections = [asCurSection, ];
+                iCurTagLine = iLine;
+
+        #
+        # Process the final tag.
+        #
+        if not asCurSection and len(aasSections) > 1:
+            aasSections.pop(-1);
+        if sCurTag in self.dTagHandlers:
+            self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
+            cOpTags += 1;
+        elif sCurTag.startswith('@op'):
+            self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
+        elif sCurTag == '@default':
+            sFlatDefault = self.flattenAllSections(aasSections);
+
+        #
+        # Don't allow default text in blocks containing @op*.
+        #
+        if cOpTags > 0 and sFlatDefault:
+            self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
+
+        return True;
+
+    def parseMacroInvocation(self, sInvocation):
+        """
+        Parses a macro invocation.
+
+        Returns a tuple, first element is the offset following the macro
+        invocation. The second element is a list of macro arguments, where the
+        zero'th is the macro name.
+        """
+        # First the name.
+        offOpen = sInvocation.find('(');
+        if offOpen <= 0:
+            self.raiseError("macro invocation open parenthesis not found");
+        sName = sInvocation[:offOpen].strip();
+        if not self.oReMacroName.match(sName):
+            return self.error("invalid macro name '%s'" % (sName,));
+        asRet = [sName, ];
+
+        # Arguments.
+        iLine    = self.iLine;
+        cDepth   = 1;
+        off      = offOpen + 1;
+        offStart = off;
+        while cDepth > 0:
+            if off >= len(sInvocation):
+                if iLine >= len(self.asLines):
+                    return self.error('macro invocation beyond end of file');
+                sInvocation += self.asLines[iLine];
+                iLine += 1;
+            ch = sInvocation[off];
+
+            if ch == ',' or ch == ')':
+                if cDepth == 1:
+                    asRet.append(sInvocation[offStart:off].strip());
+                    offStart = off + 1;
+                if ch == ')':
+                    cDepth -= 1;
+            elif ch == '(':
+                cDepth += 1;
+            off += 1;
+
+        return (off, asRet);
+
+    def findAndParseMacroInvocationEx(self, sCode, sMacro):
+        """
+        Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
+        """
+        offHit = sCode.find(sMacro);
+        if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
+            offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
+            return (offHit + offAfter, asRet);
+        return (len(sCode), None);
+
+    def findAndParseMacroInvocation(self, sCode, sMacro):
+        """
+        Returns None if not found, arguments as per parseMacroInvocation if found.
+        """
+        return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
+
+    def findAndParseFirstMacroInvocation(self, sCode, asMacro):
+        """
+        Returns same as findAndParseMacroInvocation.
+        """
+        for sMacro in asMacro:
+            asRet = self.findAndParseMacroInvocation(sCode, sMacro);
+            if asRet is not None:
+                return asRet;
+        return None;
+
+    def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower,  # pylint: disable=too-many-arguments
+                              sDisHints, sIemHints, asOperands):
+        """
+        Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
+        IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
+        """
+        #
+        # Some invocation checks.
+        #
+        if sUpper != sUpper.upper():
+            self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
+        if sLower != sLower.lower():
+            self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
+        if sUpper.lower() != sLower:
+            self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
+        if not self.oReMnemonic.match(sLower):
+            self.error('%s: invalid a_Lower: %s  (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
+
+        #
+        # Check if sIemHints tells us to not consider this macro invocation.
+        #
+        if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
+            return True;
+
+        # Apply to the last instruction only for now.
+        if not self.aoCurInstrs:
+            self.addInstruction();
+        oInstr = self.aoCurInstrs[-1];
+        if oInstr.iLineMnemonicMacro == -1:
+            oInstr.iLineMnemonicMacro = self.iLine;
+        else:
+            self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
+                       % (sMacro, oInstr.iLineMnemonicMacro,));
+
+        # Mnemonic
+        if oInstr.sMnemonic is None:
+            oInstr.sMnemonic = sLower;
+        elif oInstr.sMnemonic != sLower:
+            self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
+
+        # Process operands.
+        if len(oInstr.aoOperands) not in [0, len(asOperands)]:
+            self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
+                       % (sMacro, len(oInstr.aoOperands), len(asOperands),));
+        for iOperand, sType in enumerate(asOperands):
+            sWhere = g_kdOpTypes.get(sType, [None, None])[1];
+            if sWhere is None:
+                self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
+                if iOperand < len(oInstr.aoOperands): # error recovery.
+                    sWhere = oInstr.aoOperands[iOperand].sWhere;
+                    sType  = oInstr.aoOperands[iOperand].sType;
+                else:
+                    sWhere = 'reg';
+                    sType  = 'Gb';
+            if iOperand == len(oInstr.aoOperands):
+                oInstr.aoOperands.append(Operand(sWhere, sType))
+            elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
+                self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
+                           % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
+                              oInstr.aoOperands[iOperand].sType, sWhere, sType,));
+
+        # Encoding.
+        if sForm not in g_kdIemForms:
+            self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
+        else:
+            if oInstr.sEncoding is None:
+                oInstr.sEncoding = g_kdIemForms[sForm][0];
+            elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
+                self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
+                           % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
+
+            # Check the parameter locations for the encoding.
+            if g_kdIemForms[sForm][1] is not None:
+                if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
+                    self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
+                               % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
+                else:
+                    for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
+                        if oInstr.aoOperands[iOperand].sWhere != sWhere:
+                            self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
+                                       % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
+
+        # Stats.
+        if not self.oReStatsName.match(sStats):
+            self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
+        elif oInstr.sStats is None:
+            oInstr.sStats = sStats;
+        elif oInstr.sStats != sStats:
+            self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
+                       % (sMacro, oInstr.sStats, sStats,));
+
+        # Process the hints (simply merge with @ophints w/o checking anything).
+        for sHint in sDisHints.split('|'):
+            sHint = sHint.strip();
+            if sHint.startswith('DISOPTYPE_'):
+                sShortHint = sHint[len('DISOPTYPE_'):].lower();
+                if sShortHint in g_kdHints:
+                    oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
+                else:
+                    self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
+            elif sHint != '0':
+                self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
+
+        for sHint in sIemHints.split('|'):
+            sHint = sHint.strip();
+            if sHint.startswith('IEMOPHINT_'):
+                sShortHint = sHint[len('IEMOPHINT_'):].lower();
+                if sShortHint in g_kdHints:
+                    oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
+                else:
+                    self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
+            elif sHint != '0':
+                self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
+
+
+        _ = sAsm;
+        return True;
+
+    def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
+        """
+        Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
+        IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
+        """
+        if not asOperands:
+            return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
+        return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
+                                          sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
+
+    def checkCodeForMacro(self, sCode):
+        """
+        Checks code for relevant macro invocation.
+        """
+        #
+        # Scan macro invocations.
+        #
+        if sCode.find('(') > 0:
+            # Look for instruction decoder function definitions. ASSUME single line.
+            asArgs = self.findAndParseFirstMacroInvocation(sCode,
+                                                           [ 'FNIEMOP_DEF',
+                                                             'FNIEMOP_STUB',
+                                                             'FNIEMOP_STUB_1',
+                                                             'FNIEMOP_UD_STUB',
+                                                             'FNIEMOP_UD_STUB_1' ]);
+            if asArgs is not None:
+                sFunction = asArgs[1];
+
+                if not self.aoCurInstrs:
+                    self.addInstruction();
+                for oInstr in self.aoCurInstrs:
+                    if oInstr.iLineFnIemOpMacro == -1:
+                        oInstr.iLineFnIemOpMacro = self.iLine;
+                    else:
+                        self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
+                self.setInstrunctionAttrib('sFunction', sFunction);
+                self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
+                self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
+                if asArgs[0].find('STUB') > 0:
+                    self.doneInstructions();
+                return True;
+
+            # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
+            if asArgs is not None:
+                if len(self.aoCurInstrs) == 1:
+                    oInstr = self.aoCurInstrs[0];
+                    if oInstr.sStats is None:
+                        oInstr.sStats = asArgs[1];
+                    self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
+
+            # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
+            if asArgs is not None:
+                self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
+                                           []);
+            # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
+            if asArgs is not None:
+                self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
+                                           [asArgs[6],]);
+            # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
+            if asArgs is not None:
+                self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
+                                           [asArgs[6], asArgs[7]]);
+            # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
+            if asArgs is not None:
+                self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
+                                           asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
+            # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
+            #                   a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
+            if asArgs is not None:
+                self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
+                                           asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
+
+            # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
+            if asArgs is not None:
+                self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
+            # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
+            if asArgs is not None:
+                self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
+            # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
+            if asArgs is not None:
+                self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
+                                         [asArgs[4], asArgs[5],]);
+            # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
+            if asArgs is not None:
+                self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
+                                         [asArgs[4], asArgs[5], asArgs[6],]);
+            # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
+            asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
+            if asArgs is not None:
+                self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
+                                         [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
+
+        return False;
+
+
+    def parse(self):
+        """
+        Parses the given file.
+        Returns number or errors.
+        Raises exception on fatal trouble.
+        """
+        #self.debug('Parsing %s' % (self.sSrcFile,));
+
+        while self.iLine < len(self.asLines):
+            sLine = self.asLines[self.iLine];
+            self.iLine  += 1;
+
+            # We only look for comments, so only lines with a slash might possibly
+            # influence the parser state.
+            offSlash = sLine.find('/');
+            if offSlash >= 0:
+                if offSlash + 1 >= len(sLine)  or  sLine[offSlash + 1] != '/'  or  self.iState != self.kiCode:
+                    offLine = 0;
+                    while offLine < len(sLine):
+                        if self.iState == self.kiCode:
+                            offHit = sLine.find('/*', offLine); # only multiline comments for now.
+                            if offHit >= 0:
+                                self.checkCodeForMacro(sLine[offLine:offHit]);
+                                self.sComment     = '';
+                                self.iCommentLine = self.iLine;
+                                self.iState       = self.kiCommentMulti;
+                                offLine = offHit + 2;
+                            else:
+                                self.checkCodeForMacro(sLine[offLine:]);
+                                offLine = len(sLine);
+
+                        elif self.iState == self.kiCommentMulti:
+                            offHit = sLine.find('*/', offLine);
+                            if offHit >= 0:
+                                self.sComment += sLine[offLine:offHit];
+                                self.iState    = self.kiCode;
+                                offLine = offHit + 2;
+                                self.parseComment();
+                            else:
+                                self.sComment += sLine[offLine:];
+                                offLine = len(sLine);
+                        else:
+                            assert False;
+                # C++ line comment.
+                elif offSlash > 0:
+                    self.checkCodeForMacro(sLine[:offSlash]);
+
+            # No slash, but append the line if in multi-line comment.
+            elif self.iState == self.kiCommentMulti:
+                #self.debug('line %d: multi' % (self.iLine,));
+                self.sComment += sLine;
+
+            # No slash, but check code line for relevant macro.
+            elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
+                #self.debug('line %d: macro' % (self.iLine,));
+                self.checkCodeForMacro(sLine);
+
+            # If the line is a '}' in the first position, complete the instructions.
+            elif self.iState == self.kiCode and sLine[0] == '}':
+                #self.debug('line %d: }' % (self.iLine,));
+                self.doneInstructions();
+
+        self.doneInstructions();
+        self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
+        return self.printErrors();
+
+
+def __parseFileByName(sSrcFile, sDefaultMap):
+    """
+    Parses one source file for instruction specfications.
+    """
+    #
+    # Read sSrcFile into a line array.
+    #
+    try:
+        oFile = open(sSrcFile, "r");
+    except Exception as oXcpt:
+        raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
+    try:
+        asLines = oFile.readlines();
+    except Exception as oXcpt:
+        raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
+    finally:
+        oFile.close();
+
+    #
+    # Do the parsing.
+    #
+    try:
+        cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
+    except ParserException as oXcpt:
+        print(str(oXcpt));
+        raise;
+    except Exception as oXcpt:
+        raise;
+
+    return cErrors;
+
+
+def __doTestCopying():
+    """
+    Executes the asCopyTests instructions.
+    """
+    asErrors = [];
+    for oDstInstr in g_aoAllInstructions:
+        if oDstInstr.asCopyTests:
+            for sSrcInstr in oDstInstr.asCopyTests:
+                oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
+                if oSrcInstr:
+                    aoSrcInstrs = [oSrcInstr,];
+                else:
+                    aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
+                if aoSrcInstrs:
+                    for oSrcInstr in aoSrcInstrs:
+                        if oSrcInstr != oDstInstr:
+                            oDstInstr.aoTests.extend(oSrcInstr.aoTests);
+                        else:
+                            asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
+                                            % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
+                else:
+                    asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
+                                    % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
+
+    if asErrors:
+        sys.stderr.write(u''.join(asErrors));
+    return len(asErrors);
+
+
+def __applyOnlyTest():
+    """
+    If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
+    all other instructions so that only these get tested.
+    """
+    if g_aoOnlyTestInstructions:
+        for oInstr in g_aoAllInstructions:
+            if oInstr.aoTests:
+                if oInstr not in g_aoOnlyTestInstructions:
+                    oInstr.aoTests = [];
+    return 0;
+
+def __parseAll():
+    """
+    Parses all the IEMAllInstruction*.cpp.h files.
+
+    Raises exception on failure.
+    """
+    sSrcDir = os.path.dirname(os.path.abspath(__file__));
+    cErrors = 0;
+    for sDefaultMap, sName in [
+        ( 'one',        'IEMAllInstructionsOneByte.cpp.h'),
+        ( 'two0f',      'IEMAllInstructionsTwoByte0f.cpp.h'),
+        ( 'three0f38',  'IEMAllInstructionsThree0f38.cpp.h'),
+        ( 'three0f3a',  'IEMAllInstructionsThree0f3a.cpp.h'),
+        ( 'vexmap1',    'IEMAllInstructionsVexMap1.cpp.h'),
+        ( 'vexmap2',    'IEMAllInstructionsVexMap2.cpp.h'),
+        ( 'vexmap3',    'IEMAllInstructionsVexMap3.cpp.h'),
+        ( '3dnow',      'IEMAllInstructions3DNow.cpp.h'),
+    ]:
+        cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
+    cErrors += __doTestCopying();
+    cErrors += __applyOnlyTest();
+
+    if cErrors != 0:
+        #raise Exception('%d parse errors' % (cErrors,));
+        sys.exit(1);
+    return True;
+
+
+
+__parseAll();
+
+
+#
+# Generators (may perhaps move later).
+#
+def generateDisassemblerTables(oDstFile = sys.stdout):
+    """
+    Generates disassembler tables.
+    """
+
+    for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
+                              key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
+        assert oMap.sName == sName;
+        asLines = [];
+
+        asLines.append('/* Generated from: %-11s  Selector: %-7s  Encoding: %-7s  Lead bytes opcodes: %s */'
+                       % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
+        asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
+        asLines.append('{');
+
+        aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
+
+        aoTableOrder = oMap.getInstructionsInTableOrder();
+        for iInstr, oInstr in enumerate(aoTableOrder):
+
+            if (iInstr & 0xf) == 0:
+                if iInstr != 0:
+                    asLines.append('');
+                asLines.append('    /* %x */' % (iInstr >> 4,));
+
+            if oInstr is None:
+                pass;#asLines.append('    /* %#04x */ None,' % (iInstr));
+            elif isinstance(oInstr, list):
+                asLines.append('    /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
+            else:
+                sMacro = 'OP';
+                cMaxOperands = 3;
+                if len(oInstr.aoOperands) > 3:
+                    sMacro = 'OPVEX'
+                    cMaxOperands = 4;
+                    assert len(oInstr.aoOperands) <= cMaxOperands;
+
+                #
+                # Format string.
+                #
+                sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
+                for iOperand, oOperand in enumerate(oInstr.aoOperands):
+                    sTmp += ' ' if iOperand == 0 else ',';
+                    if g_kdOpTypes[oOperand.sType][2][0] != '%':        ## @todo remove upper() later.
+                        sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
+                    else:
+                        sTmp += g_kdOpTypes[oOperand.sType][2];
+                sTmp += '",';
+                asColumns = [ sTmp, ];
+
+                #
+                # Decoders.
+                #
+                iStart = len(asColumns);
+                if oInstr.sEncoding is None:
+                    pass;
+                elif oInstr.sEncoding == 'ModR/M':
+                    # ASSUME the first operand is using the ModR/M encoding
+                    assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
+                    asColumns.append('IDX_ParseModRM,');
+                    ## @todo IDX_ParseVexDest
+                    # Is second operand using ModR/M too?
+                    if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
+                        asColumns.append('IDX_UseModRM,')
+                elif oInstr.sEncoding in [ 'prefix', ]:
+                    for oOperand in oInstr.aoOperands:
+                        asColumns.append('0,');
+                elif oInstr.sEncoding in [ 'fixed' ]:
+                    pass;
+                elif oInstr.sEncoding == 'vex2':
+                    asColumns.append('IDX_ParseVex2b,')
+                elif oInstr.sEncoding == 'vex3':
+                    asColumns.append('IDX_ParseVex3b,')
+                elif oInstr.sEncoding in g_dInstructionMaps:
+                    asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
+                else:
+                    ## @todo
+                    #IDX_ParseTwoByteEsc,
+                    #IDX_ParseGrp1,
+                    #IDX_ParseShiftGrp2,
+                    #IDX_ParseGrp3,
+                    #IDX_ParseGrp4,
+                    #IDX_ParseGrp5,
+                    #IDX_Parse3DNow,
+                    #IDX_ParseGrp6,
+                    #IDX_ParseGrp7,
+                    #IDX_ParseGrp8,
+                    #IDX_ParseGrp9,
+                    #IDX_ParseGrp10,
+                    #IDX_ParseGrp12,
+                    #IDX_ParseGrp13,
+                    #IDX_ParseGrp14,
+                    #IDX_ParseGrp15,
+                    #IDX_ParseGrp16,
+                    #IDX_ParseThreeByteEsc4,
+                    #IDX_ParseThreeByteEsc5,
+                    #IDX_ParseModFence,
+                    #IDX_ParseEscFP,
+                    #IDX_ParseNopPause,
+                    #IDX_ParseInvOpModRM,
+                    assert False, str(oInstr);
+
+                # Check for immediates and stuff in the remaining operands.
+                for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
+                    sIdx = g_kdOpTypes[oOperand.sType][0];
+                    if sIdx != 'IDX_UseModRM':
+                        asColumns.append(sIdx + ',');
+                asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
+
+                #
+                # Opcode and operands.
+                #
+                assert oInstr.sDisEnum, str(oInstr);
+                asColumns.append(oInstr.sDisEnum + ',');
+                iStart = len(asColumns)
+                for oOperand in oInstr.aoOperands:
+                    asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
+                asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
+
+                #
+                # Flags.
+                #
+                sTmp = '';
+                for sHint in sorted(oInstr.dHints.keys()):
+                    sDefine = g_kdHints[sHint];
+                    if sDefine.startswith('DISOPTYPE_'):
+                        if sTmp:
+                            sTmp += ' | ' + sDefine;
+                        else:
+                            sTmp += sDefine;
+                if sTmp:
+                    sTmp += '),';
+                else:
+                    sTmp += '0),';
+                asColumns.append(sTmp);
+
+                #
+                # Format the columns into a line.
+                #
+                sLine = '';
+                for i, s in enumerate(asColumns):
+                    if len(sLine) < aoffColumns[i]:
+                        sLine += ' ' * (aoffColumns[i] - len(sLine));
+                    else:
+                        sLine += ' ';
+                    sLine += s;
+
+                # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
+                # DISOPTYPE_HARMLESS),
+                # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
+                # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
+
+                asLines.append(sLine);
+
+        asLines.append('};');
+        asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
+
+        #
+        # Write out the lines.
+        #
+        oDstFile.write('\n'.join(asLines));
+        oDstFile.write('\n');
+        break; #for now
+
+if __name__ == '__main__':
+    generateDisassemblerTables();
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructionsThree0f38.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructionsThree0f38.cpp.h
new file mode 100644
index 0000000..2d6d3a7
--- /dev/null
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructionsThree0f38.cpp.h
@@ -0,0 +1,779 @@
+/* $Id: IEMAllInstructionsThree0f38.cpp.h $ */
+/** @file
+ * IEM - Instruction Decoding and Emulation.
+ *
+ * @remarks IEMAllInstructionsVexMap2.cpp.h is a VEX mirror of this file.
+ *          Any update here is likely needed in that file too.
+ */
+
+/*
+ * Copyright (C) 2011-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @name Three byte opcodes with first two bytes 0x0f 0x38
+ * @{
+ */
+
+/*  Opcode      0x0f 0x38 0x00. */
+FNIEMOP_STUB(iemOp_pshufb_Pq_Qq);
+/*  Opcode 0x66 0x0f 0x38 0x00. */
+FNIEMOP_STUB(iemOp_pshufb_Vx_Wx);
+/*  Opcode      0x0f 0x38 0x01. */
+FNIEMOP_STUB(iemOp_phaddw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x01. */
+FNIEMOP_STUB(iemOp_phaddw_Vx_Wx);
+/** Opcode      0x0f 0x38 0x02. */
+FNIEMOP_STUB(iemOp_phaddd_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x02. */
+FNIEMOP_STUB(iemOp_phaddd_Vx_Wx);
+/** Opcode      0x0f 0x38 0x03. */
+FNIEMOP_STUB(iemOp_phaddsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x03. */
+FNIEMOP_STUB(iemOp_phaddsw_Vx_Wx);
+/** Opcode      0x0f 0x38 0x04. */
+FNIEMOP_STUB(iemOp_pmaddubsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x04. */
+FNIEMOP_STUB(iemOp_pmaddubsw_Vx_Wx);
+/** Opcode      0x0f 0x38 0x05. */
+FNIEMOP_STUB(iemOp_phsubw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x05. */
+FNIEMOP_STUB(iemOp_phsubw_Vx_Wx);
+/** Opcode      0x0f 0x38 0x06. */
+FNIEMOP_STUB(iemOp_phsubd_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x06. */
+FNIEMOP_STUB(iemOp_phsubdq_Vx_Wx);
+/** Opcode      0x0f 0x38 0x07. */
+FNIEMOP_STUB(iemOp_phsubsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x07. */
+FNIEMOP_STUB(iemOp_phsubsw_Vx_Wx);
+/** Opcode      0x0f 0x38 0x08. */
+FNIEMOP_STUB(iemOp_psignb_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x08. */
+FNIEMOP_STUB(iemOp_psignb_Vx_Wx);
+/** Opcode      0x0f 0x38 0x09. */
+FNIEMOP_STUB(iemOp_psignw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x09. */
+FNIEMOP_STUB(iemOp_psignw_Vx_Wx);
+/** Opcode      0x0f 0x38 0x0a. */
+FNIEMOP_STUB(iemOp_psignd_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x0a. */
+FNIEMOP_STUB(iemOp_psignd_Vx_Wx);
+/** Opcode      0x0f 0x38 0x0b. */
+FNIEMOP_STUB(iemOp_pmulhrsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x0b. */
+FNIEMOP_STUB(iemOp_pmulhrsw_Vx_Wx);
+/*  Opcode      0x0f 0x38 0x0c - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x0c - invalid (vex only). */
+/*  Opcode      0x0f 0x38 0x0d - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x0d - invalid (vex only). */
+/*  Opcode      0x0f 0x38 0x0e - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x0e - invalid (vex only). */
+/*  Opcode      0x0f 0x38 0x0f - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x0f - invalid (vex only). */
+
+
+/*  Opcode      0x0f 0x38 0x10 - invalid */
+/** Opcode 0x66 0x0f 0x38 0x10 (legacy only). */
+FNIEMOP_STUB(iemOp_pblendvb_Vdq_Wdq);
+/*  Opcode      0x0f 0x38 0x11 - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x11 - invalid */
+/*  Opcode      0x0f 0x38 0x12 - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x12 - invalid */
+/*  Opcode      0x0f 0x38 0x13 - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x13 - invalid (vex only). */
+/*  Opcode      0x0f 0x38 0x14 - invalid */
+/** Opcode 0x66 0x0f 0x38 0x14 (legacy only). */
+FNIEMOP_STUB(iemOp_blendvps_Vdq_Wdq);
+/*  Opcode      0x0f 0x38 0x15 - invalid */
+/** Opcode 0x66 0x0f 0x38 0x15 (legacy only). */
+FNIEMOP_STUB(iemOp_blendvpd_Vdq_Wdq);
+/*  Opcode      0x0f 0x38 0x16 - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x16 - invalid (vex only). */
+/*  Opcode      0x0f 0x38 0x17 - invalid */
+/** Opcode 0x66 0x0f 0x38 0x17 - invalid */
+FNIEMOP_STUB(iemOp_ptest_Vx_Wx);
+/*  Opcode      0x0f 0x38 0x18 - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x18 - invalid (vex only). */
+/*  Opcode      0x0f 0x38 0x19 - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x19 - invalid (vex only). */
+/*  Opcode      0x0f 0x38 0x1a - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x1a - invalid (vex only). */
+/*  Opcode      0x0f 0x38 0x1b - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x1b - invalid */
+/** Opcode      0x0f 0x38 0x1c. */
+FNIEMOP_STUB(iemOp_pabsb_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x1c. */
+FNIEMOP_STUB(iemOp_pabsb_Vx_Wx);
+/** Opcode      0x0f 0x38 0x1d. */
+FNIEMOP_STUB(iemOp_pabsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x1d. */
+FNIEMOP_STUB(iemOp_pabsw_Vx_Wx);
+/** Opcode      0x0f 0x38 0x1e. */
+FNIEMOP_STUB(iemOp_pabsd_Pq_Qq);
+/** Opcode 0x66 0x0f 0x38 0x1e. */
+FNIEMOP_STUB(iemOp_pabsd_Vx_Wx);
+/*  Opcode      0x0f 0x38 0x1f - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x1f - invalid */
+
+
+/** Opcode 0x66 0x0f 0x38 0x20. */
+FNIEMOP_STUB(iemOp_pmovsxbw_Vx_UxMq);
+/** Opcode 0x66 0x0f 0x38 0x21. */
+FNIEMOP_STUB(iemOp_pmovsxbd_Vx_UxMd);
+/** Opcode 0x66 0x0f 0x38 0x22. */
+FNIEMOP_STUB(iemOp_pmovsxbq_Vx_UxMw);
+/** Opcode 0x66 0x0f 0x38 0x23. */
+FNIEMOP_STUB(iemOp_pmovsxwd_Vx_UxMq);
+/** Opcode 0x66 0x0f 0x38 0x24. */
+FNIEMOP_STUB(iemOp_pmovsxwq_Vx_UxMd);
+/** Opcode 0x66 0x0f 0x38 0x25. */
+FNIEMOP_STUB(iemOp_pmovsxdq_Vx_UxMq);
+/*  Opcode 0x66 0x0f 0x38 0x26 - invalid */
+/*  Opcode 0x66 0x0f 0x38 0x27 - invalid */
+/** Opcode 0x66 0x0f 0x38 0x28. */
+FNIEMOP_STUB(iemOp_pmuldq_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x29. */
+FNIEMOP_STUB(iemOp_pcmpeqq_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x2a. */
+FNIEMOP_STUB(iemOp_movntdqa_Vx_Mx);
+/** Opcode 0x66 0x0f 0x38 0x2b. */
+FNIEMOP_STUB(iemOp_packusdw_Vx_Wx);
+/*  Opcode 0x66 0x0f 0x38 0x2c - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x2d - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x2e - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x2f - invalid (vex only). */
+
+/** Opcode 0x66 0x0f 0x38 0x30. */
+FNIEMOP_STUB(iemOp_pmovzxbw_Vx_UxMq);
+/** Opcode 0x66 0x0f 0x38 0x31. */
+FNIEMOP_STUB(iemOp_pmovzxbd_Vx_UxMd);
+/** Opcode 0x66 0x0f 0x38 0x32. */
+FNIEMOP_STUB(iemOp_pmovzxbq_Vx_UxMw);
+/** Opcode 0x66 0x0f 0x38 0x33. */
+FNIEMOP_STUB(iemOp_pmovzxwd_Vx_UxMq);
+/** Opcode 0x66 0x0f 0x38 0x34. */
+FNIEMOP_STUB(iemOp_pmovzxwq_Vx_UxMd);
+/** Opcode 0x66 0x0f 0x38 0x35. */
+FNIEMOP_STUB(iemOp_pmovzxdq_Vx_UxMq);
+/*  Opcode 0x66 0x0f 0x38 0x36 - invalid (vex only). */
+/** Opcode 0x66 0x0f 0x38 0x37. */
+FNIEMOP_STUB(iemOp_pcmpgtq_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x38. */
+FNIEMOP_STUB(iemOp_pminsb_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x39. */
+FNIEMOP_STUB(iemOp_pminsd_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x3a. */
+FNIEMOP_STUB(iemOp_pminuw_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x3b. */
+FNIEMOP_STUB(iemOp_pminud_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x3c. */
+FNIEMOP_STUB(iemOp_pmaxsb_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x3d. */
+FNIEMOP_STUB(iemOp_pmaxsd_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x3e. */
+FNIEMOP_STUB(iemOp_pmaxuw_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x3f. */
+FNIEMOP_STUB(iemOp_pmaxud_Vx_Wx);
+
+
+/** Opcode 0x66 0x0f 0x38 0x40. */
+FNIEMOP_STUB(iemOp_pmulld_Vx_Wx);
+/** Opcode 0x66 0x0f 0x38 0x41. */
+FNIEMOP_STUB(iemOp_phminposuw_Vdq_Wdq);
+/*  Opcode 0x66 0x0f 0x38 0x42 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x43 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x44 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x45 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x46 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x47 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x48 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x49 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x4a - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x4b - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x4c - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x4d - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x4e - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x4f - invalid. */
+
+/*  Opcode 0x66 0x0f 0x38 0x50 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x51 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x52 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x53 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x54 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x55 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x56 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x57 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x58 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x59 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x5a - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x5b - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x5c - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x5d - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x5e - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x5f - invalid. */
+
+/*  Opcode 0x66 0x0f 0x38 0x60 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x61 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x62 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x63 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x64 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x65 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x66 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x67 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x68 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x69 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x6a - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x6b - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x6c - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x6d - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x6e - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x6f - invalid. */
+
+/*  Opcode 0x66 0x0f 0x38 0x70 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x71 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x72 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x73 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x74 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x75 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x76 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x77 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x78 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x79 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x7a - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x7b - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x7c - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x7d - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x7e - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x7f - invalid. */
+
+/** Opcode 0x66 0x0f 0x38 0x80. */
+FNIEMOP_STUB(iemOp_invept_Gy_Mdq);
+/** Opcode 0x66 0x0f 0x38 0x81. */
+FNIEMOP_STUB(iemOp_invvpid_Gy_Mdq);
+/** Opcode 0x66 0x0f 0x38 0x82. */
+FNIEMOP_STUB(iemOp_invpcid_Gy_Mdq);
+/*  Opcode 0x66 0x0f 0x38 0x83 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x84 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x85 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x86 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x87 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x88 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x89 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x8a - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x8b - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x8c - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x8d - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x8e - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x8f - invalid. */
+
+/*  Opcode 0x66 0x0f 0x38 0x90 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x91 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x92 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x93 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x94 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x95 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0x96 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x97 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x98 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x99 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x9a - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x9b - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x9c - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x9d - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x9e - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0x9f - invalid (vex only). */
+
+/*  Opcode 0x66 0x0f 0x38 0xa0 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xa1 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xa2 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xa3 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xa4 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xa5 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xa6 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xa7 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xa8 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xa9 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xaa - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xab - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xac - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xad - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xae - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xaf - invalid (vex only). */
+
+/*  Opcode 0x66 0x0f 0x38 0xb0 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xb1 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xb2 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xb3 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xb4 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xb5 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xb6 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xb7 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xb8 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xb9 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xba - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xbb - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xbc - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xbd - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xbe - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xbf - invalid (vex only). */
+
+/*  Opcode      0x0f 0x38 0xc0 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xc0 - invalid. */
+/*  Opcode      0x0f 0x38 0xc1 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xc1 - invalid. */
+/*  Opcode      0x0f 0x38 0xc2 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xc2 - invalid. */
+/*  Opcode      0x0f 0x38 0xc3 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xc3 - invalid. */
+/*  Opcode      0x0f 0x38 0xc4 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xc4 - invalid. */
+/*  Opcode      0x0f 0x38 0xc5 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xc5 - invalid. */
+/*  Opcode      0x0f 0x38 0xc6 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xc6 - invalid. */
+/*  Opcode      0x0f 0x38 0xc7 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xc7 - invalid. */
+/** Opcode      0x0f 0x38 0xc8. */
+FNIEMOP_STUB(iemOp_sha1nexte_Vdq_Wdq);
+/*  Opcode 0x66 0x0f 0x38 0xc8 - invalid. */
+/** Opcode      0x0f 0x38 0xc9. */
+FNIEMOP_STUB(iemOp_sha1msg1_Vdq_Wdq);
+/*  Opcode 0x66 0x0f 0x38 0xc9 - invalid. */
+/** Opcode      0x0f 0x38 0xca. */
+FNIEMOP_STUB(iemOp_sha1msg2_Vdq_Wdq);
+/*  Opcode 0x66 0x0f 0x38 0xca - invalid. */
+/** Opcode      0x0f 0x38 0xcb. */
+FNIEMOP_STUB(iemOp_sha256rnds2_Vdq_Wdq);
+/*  Opcode 0x66 0x0f 0x38 0xcb - invalid. */
+/** Opcode      0x0f 0x38 0xcc. */
+FNIEMOP_STUB(iemOp_sha256msg1_Vdq_Wdq);
+/*  Opcode 0x66 0x0f 0x38 0xcc - invalid. */
+/** Opcode      0x0f 0x38 0xcd. */
+FNIEMOP_STUB(iemOp_sha256msg2_Vdq_Wdq);
+/*  Opcode 0x66 0x0f 0x38 0xcd - invalid. */
+/*  Opcode      0x0f 0x38 0xce - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xce - invalid. */
+/*  Opcode      0x0f 0x38 0xcf - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xcf - invalid. */
+
+/*  Opcode 0x66 0x0f 0x38 0xd0 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd1 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd2 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd3 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd4 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd5 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd6 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd7 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd8 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xd9 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xda - invalid. */
+/** Opcode 0x66 0x0f 0x38 0xdb. */
+FNIEMOP_STUB(iemOp_aesimc_Vdq_Wdq);
+/** Opcode 0x66 0x0f 0x38 0xdc. */
+FNIEMOP_STUB(iemOp_aesenc_Vdq_Wdq);
+/** Opcode 0x66 0x0f 0x38 0xdd. */
+FNIEMOP_STUB(iemOp_aesenclast_Vdq_Wdq);
+/** Opcode 0x66 0x0f 0x38 0xde. */
+FNIEMOP_STUB(iemOp_aesdec_Vdq_Wdq);
+/** Opcode 0x66 0x0f 0x38 0xdf. */
+FNIEMOP_STUB(iemOp_aesdeclast_Vdq_Wdq);
+
+/*  Opcode 0x66 0x0f 0x38 0xe0 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe1 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe2 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe3 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe4 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe5 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe6 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe7 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe8 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xe9 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xea - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xeb - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xec - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xed - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xee - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xef - invalid. */
+
+
+/** Opcode      0x0f 0x38 0xf0. */
+FNIEMOP_STUB(iemOp_movbe_Gy_My);
+/** Opcode 0x66 0x0f 0x38 0xf0. */
+FNIEMOP_STUB(iemOp_movbe_Gw_Mw);
+/*  Opcode 0xf3 0x0f 0x38 0xf0 - invalid. */
+/** Opcode 0xf2 0x0f 0x38 0xf0. */
+FNIEMOP_STUB(iemOp_crc32_Gb_Eb);
+
+/** Opcode      0x0f 0x38 0xf1. */
+FNIEMOP_STUB(iemOp_movbe_My_Gy);
+/** Opcode 0x66 0x0f 0x38 0xf1. */
+FNIEMOP_STUB(iemOp_movbe_Mw_Gw);
+/*  Opcode 0xf3 0x0f 0x38 0xf1 - invalid. */
+/** Opcode 0xf2 0x0f 0x38 0xf1. */
+FNIEMOP_STUB(iemOp_crc32_Gv_Ev);
+
+/*  Opcode      0x0f 0x38 0xf2 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xf2 - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xf2 - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xf2 - invalid. */
+
+/*  Opcode      0x0f 0x38 0xf3 - invalid (vex only - group 17). */
+/*  Opcode 0x66 0x0f 0x38 0xf3 - invalid (vex only - group 17). */
+/*  Opcode 0xf3 0x0f 0x38 0xf3 - invalid (vex only - group 17). */
+/*  Opcode 0xf2 0x0f 0x38 0xf3 - invalid (vex only - group 17). */
+
+/*  Opcode      0x0f 0x38 0xf4 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xf4 - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xf4 - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xf4 - invalid. */
+
+/*  Opcode      0x0f 0x38 0xf5 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xf5 - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xf5 - invalid (vex only). */
+/*  Opcode 0xf2 0x0f 0x38 0xf5 - invalid (vex only). */
+
+/*  Opcode      0x0f 0x38 0xf6 - invalid. */
+/** Opcode 0x66 0x0f 0x38 0xf6. */
+FNIEMOP_STUB(iemOp_adcx_Gy_Ey);
+/** Opcode 0xf3 0x0f 0x38 0xf6. */
+FNIEMOP_STUB(iemOp_adox_Gy_Ey);
+/*  Opcode 0xf2 0x0f 0x38 0xf6 - invalid (vex only). */
+
+/*  Opcode      0x0f 0x38 0xf7 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x38 0xf7 - invalid (vex only). */
+/*  Opcode 0xf3 0x0f 0x38 0xf7 - invalid (vex only). */
+/*  Opcode 0xf2 0x0f 0x38 0xf7 - invalid (vex only). */
+
+/*  Opcode      0x0f 0x38 0xf8 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xf8 - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xf8 - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xf8 - invalid. */
+
+/*  Opcode      0x0f 0x38 0xf9 - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xf9 - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xf9 - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xf9 - invalid. */
+
+/*  Opcode      0x0f 0x38 0xfa - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xfa - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xfa - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xfa - invalid. */
+
+/*  Opcode      0x0f 0x38 0xfb - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xfb - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xfb - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xfb - invalid. */
+
+/*  Opcode      0x0f 0x38 0xfc - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xfc - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xfc - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xfc - invalid. */
+
+/*  Opcode      0x0f 0x38 0xfd - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xfd - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xfd - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xfd - invalid. */
+
+/*  Opcode      0x0f 0x38 0xfe - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xfe - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xfe - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xfe - invalid. */
+
+/*  Opcode      0x0f 0x38 0xff - invalid. */
+/*  Opcode 0x66 0x0f 0x38 0xff - invalid. */
+/*  Opcode 0xf3 0x0f 0x38 0xff - invalid. */
+/*  Opcode 0xf2 0x0f 0x38 0xff - invalid. */
+
+
+/**
+ * Three byte opcode map, first two bytes are 0x0f 0x38.
+ * @sa      g_apfnVexMap2
+ */
+IEM_STATIC const PFNIEMOP g_apfnThreeByte0f38[] =
+{
+    /*          no prefix,                  066h prefix                 f3h prefix,                 f2h prefix */
+    /* 0x00 */  iemOp_pshufb_Pq_Qq,         iemOp_pshufb_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x01 */  iemOp_phaddw_Pq_Qq,         iemOp_phaddw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x02 */  iemOp_phaddd_Pq_Qq,         iemOp_phaddd_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x03 */  iemOp_phaddsw_Pq_Qq,        iemOp_phaddsw_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x04 */  iemOp_pmaddubsw_Pq_Qq,      iemOp_pmaddubsw_Vx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x05 */  iemOp_phsubw_Pq_Qq,         iemOp_phsubw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x06 */  iemOp_phsubd_Pq_Qq,         iemOp_phsubdq_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x07 */  iemOp_phsubsw_Pq_Qq,        iemOp_phsubsw_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x08 */  iemOp_psignb_Pq_Qq,         iemOp_psignb_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x09 */  iemOp_psignw_Pq_Qq,         iemOp_psignw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0a */  iemOp_psignd_Pq_Qq,         iemOp_psignd_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0b */  iemOp_pmulhrsw_Pq_Qq,       iemOp_pmulhrsw_Vx_Wx,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x0d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x0e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x0f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x10 */  iemOp_InvalidNeedRM,        iemOp_pblendvb_Vdq_Wdq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x11 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x12 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x13 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x14 */  iemOp_InvalidNeedRM,        iemOp_blendvps_Vdq_Wdq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x15 */  iemOp_InvalidNeedRM,        iemOp_blendvpd_Vdq_Wdq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x16 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x17 */  iemOp_InvalidNeedRM,        iemOp_ptest_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x18 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x19 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1c */  iemOp_pabsb_Pq_Qq,          iemOp_pabsb_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x1d */  iemOp_pabsw_Pq_Qq,          iemOp_pabsw_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x1e */  iemOp_pabsd_Pq_Qq,          iemOp_pabsd_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x1f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x20 */  iemOp_InvalidNeedRM,        iemOp_pmovsxbw_Vx_UxMq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x21 */  iemOp_InvalidNeedRM,        iemOp_pmovsxbd_Vx_UxMd,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x22 */  iemOp_InvalidNeedRM,        iemOp_pmovsxbq_Vx_UxMw,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x23 */  iemOp_InvalidNeedRM,        iemOp_pmovsxwd_Vx_UxMq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x24 */  iemOp_InvalidNeedRM,        iemOp_pmovsxwq_Vx_UxMd,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x25 */  iemOp_InvalidNeedRM,        iemOp_pmovsxdq_Vx_UxMq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x26 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x27 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x28 */  iemOp_InvalidNeedRM,        iemOp_pmuldq_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x29 */  iemOp_InvalidNeedRM,        iemOp_pcmpeqq_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2a */  iemOp_InvalidNeedRM,        iemOp_movntdqa_Vx_Mx,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2b */  iemOp_InvalidNeedRM,        iemOp_packusdw_Vx_Wx,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x2d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x2e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x2f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x30 */  iemOp_InvalidNeedRM,        iemOp_pmovzxbw_Vx_UxMq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x31 */  iemOp_InvalidNeedRM,        iemOp_pmovzxbd_Vx_UxMd,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x32 */  iemOp_InvalidNeedRM,        iemOp_pmovzxbq_Vx_UxMw,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x33 */  iemOp_InvalidNeedRM,        iemOp_pmovzxwd_Vx_UxMq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x34 */  iemOp_InvalidNeedRM,        iemOp_pmovzxwq_Vx_UxMd,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x35 */  iemOp_InvalidNeedRM,        iemOp_pmovzxdq_Vx_UxMq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x36 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x37 */  iemOp_InvalidNeedRM,        iemOp_pcmpgtq_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x38 */  iemOp_InvalidNeedRM,        iemOp_pminsb_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x39 */  iemOp_InvalidNeedRM,        iemOp_pminsd_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3a */  iemOp_InvalidNeedRM,        iemOp_pminuw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3b */  iemOp_InvalidNeedRM,        iemOp_pminud_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3c */  iemOp_InvalidNeedRM,        iemOp_pmaxsb_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3d */  iemOp_InvalidNeedRM,        iemOp_pmaxsd_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3e */  iemOp_InvalidNeedRM,        iemOp_pmaxuw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3f */  iemOp_InvalidNeedRM,        iemOp_pmaxud_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0x40 */  iemOp_InvalidNeedRM,        iemOp_pmulld_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x41 */  iemOp_InvalidNeedRM,        iemOp_phminposuw_Vdq_Wdq,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x42 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x43 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x44 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x45 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x46 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x47 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x48 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x49 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x50 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x51 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x52 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x53 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x54 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x55 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x56 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x57 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x58 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x59 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x60 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x61 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x62 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x63 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x64 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x65 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x66 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x67 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x68 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x69 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x70 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x71 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x72 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x73 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x74 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x75 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x76 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x77 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x78 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x79 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x80 */  iemOp_InvalidNeedRM,        iemOp_invept_Gy_Mdq,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x81 */  iemOp_InvalidNeedRM,        iemOp_invvpid_Gy_Mdq,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x82 */  iemOp_InvalidNeedRM,        iemOp_invpcid_Gy_Mdq,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x83 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x84 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x85 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x86 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x87 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x88 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x89 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x90 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x91 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x92 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x93 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x94 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x95 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x96 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x97 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x98 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x99 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xa0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xaa */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xab */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xac */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xad */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xae */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xaf */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xb0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xba */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbb */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbc */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbd */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbe */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbf */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xc0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc8 */  iemOp_sha1nexte_Vdq_Wdq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xc9 */  iemOp_sha1msg1_Vdq_Wdq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xca */  iemOp_sha1msg2_Vdq_Wdq,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xcb */  iemOp_sha256rnds2_Vdq_Wdq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xcc */  iemOp_sha256msg1_Vdq_Wdq,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xcd */  iemOp_sha256msg2_Vdq_Wdq,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xce */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xcf */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xd0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xda */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xdb */  iemOp_InvalidNeedRM,        iemOp_aesimc_Vdq_Wdq,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdc */  iemOp_InvalidNeedRM,        iemOp_aesenc_Vdq_Wdq,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdd */  iemOp_InvalidNeedRM,        iemOp_aesenclast_Vdq_Wdq,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xde */  iemOp_InvalidNeedRM,        iemOp_aesdec_Vdq_Wdq,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdf */  iemOp_InvalidNeedRM,        iemOp_aesdeclast_Vdq_Wdq,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0xe0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xea */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xeb */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xec */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xed */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xee */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xef */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xf0 */  iemOp_movbe_Gy_My,          iemOp_movbe_Gw_Mw,          iemOp_InvalidNeedRM,        iemOp_crc32_Gb_Eb,
+    /* 0xf1 */  iemOp_movbe_My_Gy,          iemOp_movbe_Mw_Gw,          iemOp_InvalidNeedRM,        iemOp_crc32_Gv_Ev,
+    /* 0xf2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf6 */  iemOp_InvalidNeedRM,        iemOp_adcx_Gy_Ey,           iemOp_adox_Gy_Ey,           iemOp_InvalidNeedRM,
+    /* 0xf7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfa */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfb */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfc */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfd */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfe */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xff */  IEMOP_X4(iemOp_InvalidNeedRM),
+};
+AssertCompile(RT_ELEMENTS(g_apfnThreeByte0f38) == 1024);
+
+/** @} */
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructionsThree0f3a.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructionsThree0f3a.cpp.h
new file mode 100644
index 0000000..e2b62e4
--- /dev/null
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructionsThree0f3a.cpp.h
@@ -0,0 +1,502 @@
+/* $Id: IEMAllInstructionsThree0f3a.cpp.h $ */
+/** @file
+ * IEM - Instruction Decoding and Emulation, 0x0f 0x3a map.
+ *
+ * @remarks IEMAllInstructionsVexMap3.cpp.h is a VEX mirror of this file.
+ *          Any update here is likely needed in that file too.
+ */
+
+/*
+ * Copyright (C) 2011-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @name Three byte opcodes with first two bytes 0x0f 0x3a
+ * @{
+ */
+
+/** Opcode 0x66 0x0f 0x00 - invalid (vex only). */
+/** Opcode 0x66 0x0f 0x01 - invalid (vex only). */
+/** Opcode 0x66 0x0f 0x02 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x03 - invalid */
+/** Opcode 0x66 0x0f 0x04 - invalid (vex only). */
+/** Opcode 0x66 0x0f 0x05 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x06 - invalid (vex only) */
+/*  Opcode 0x66 0x0f 0x07 - invalid */
+/** Opcode 0x66 0x0f 0x08. */
+FNIEMOP_STUB(iemOp_roundps_Vx_Wx_Ib);
+/** Opcode 0x66 0x0f 0x09. */
+FNIEMOP_STUB(iemOp_roundpd_Vx_Wx_Ib);
+/** Opcode 0x66 0x0f 0x0a. */
+FNIEMOP_STUB(iemOp_roundss_Vss_Wss_Ib);
+/** Opcode 0x66 0x0f 0x0b. */
+FNIEMOP_STUB(iemOp_roundsd_Vsd_Wsd_Ib);
+/** Opcode 0x66 0x0f 0x0c. */
+FNIEMOP_STUB(iemOp_blendps_Vx_Wx_Ib);
+/** Opcode 0x66 0x0f 0x0d. */
+FNIEMOP_STUB(iemOp_blendpd_Vx_Wx_Ib);
+/** Opcode 0x66 0x0f 0x0e. */
+FNIEMOP_STUB(iemOp_blendw_Vx_Wx_Ib);
+/** Opcode      0x0f 0x0f. */
+FNIEMOP_STUB(iemOp_palignr_Pq_Qq_Ib);
+/** Opcode 0x66 0x0f 0x0f. */
+FNIEMOP_STUB(iemOp_palignr_Vx_Wx_Ib);
+
+
+/*  Opcode 0x66 0x0f 0x10 - invalid */
+/*  Opcode 0x66 0x0f 0x11 - invalid */
+/*  Opcode 0x66 0x0f 0x12 - invalid */
+/*  Opcode 0x66 0x0f 0x13 - invalid */
+/** Opcode 0x66 0x0f 0x14. */
+FNIEMOP_STUB(iemOp_pextrb_RdMb_Vdq_Ib);
+/** Opcode 0x66 0x0f 0x15. */
+FNIEMOP_STUB(iemOp_pextrw_RdMw_Vdq_Ib);
+/** Opcode 0x66 0x0f 0x16. */
+FNIEMOP_STUB(iemOp_pextrd_q_RdMw_Vdq_Ib);
+/** Opcode 0x66 0x0f 0x17. */
+FNIEMOP_STUB(iemOp_extractps_Ed_Vdq_Ib);
+/*  Opcode 0x66 0x0f 0x18 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x19 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x1a - invalid */
+/*  Opcode 0x66 0x0f 0x1b - invalid */
+/*  Opcode 0x66 0x0f 0x1c - invalid */
+/*  Opcode 0x66 0x0f 0x1d - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x1e - invalid */
+/*  Opcode 0x66 0x0f 0x1f - invalid */
+
+
+/** Opcode 0x66 0x0f 0x20. */
+FNIEMOP_STUB(iemOp_pinsrb_Vdq_RyMb_Ib);
+/** Opcode 0x66 0x0f 0x21, */
+FNIEMOP_STUB(iemOp_insertps_Vdq_UdqMd_Ib);
+/** Opcode 0x66 0x0f 0x22. */
+FNIEMOP_STUB(iemOp_pinsrd_q_Vdq_Ey_Ib);
+/*  Opcode 0x66 0x0f 0x23 - invalid */
+/*  Opcode 0x66 0x0f 0x24 - invalid */
+/*  Opcode 0x66 0x0f 0x25 - invalid */
+/*  Opcode 0x66 0x0f 0x26 - invalid */
+/*  Opcode 0x66 0x0f 0x27 - invalid */
+/*  Opcode 0x66 0x0f 0x28 - invalid */
+/*  Opcode 0x66 0x0f 0x29 - invalid */
+/*  Opcode 0x66 0x0f 0x2a - invalid */
+/*  Opcode 0x66 0x0f 0x2b - invalid */
+/*  Opcode 0x66 0x0f 0x2c - invalid */
+/*  Opcode 0x66 0x0f 0x2d - invalid */
+/*  Opcode 0x66 0x0f 0x2e - invalid */
+/*  Opcode 0x66 0x0f 0x2f - invalid */
+
+
+/*  Opcode 0x66 0x0f 0x30 - invalid */
+/*  Opcode 0x66 0x0f 0x31 - invalid */
+/*  Opcode 0x66 0x0f 0x32 - invalid */
+/*  Opcode 0x66 0x0f 0x33 - invalid */
+/*  Opcode 0x66 0x0f 0x34 - invalid */
+/*  Opcode 0x66 0x0f 0x35 - invalid */
+/*  Opcode 0x66 0x0f 0x36 - invalid */
+/*  Opcode 0x66 0x0f 0x37 - invalid */
+/*  Opcode 0x66 0x0f 0x38 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x39 - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x3a - invalid */
+/*  Opcode 0x66 0x0f 0x3b - invalid */
+/*  Opcode 0x66 0x0f 0x3c - invalid */
+/*  Opcode 0x66 0x0f 0x3d - invalid */
+/*  Opcode 0x66 0x0f 0x3e - invalid */
+/*  Opcode 0x66 0x0f 0x3f - invalid */
+
+
+/** Opcode 0x66 0x0f 0x40. */
+FNIEMOP_STUB(iemOp_dpps_Vx_Wx_Ib);
+/** Opcode 0x66 0x0f 0x41, */
+FNIEMOP_STUB(iemOp_dppd_Vdq_Wdq_Ib);
+/** Opcode 0x66 0x0f 0x42. */
+FNIEMOP_STUB(iemOp_mpsadbw_Vx_Wx_Ib);
+/*  Opcode 0x66 0x0f 0x43 - invalid */
+/** Opcode 0x66 0x0f 0x44. */
+FNIEMOP_STUB(iemOp_pclmulqdq_Vdq_Wdq_Ib);
+/*  Opcode 0x66 0x0f 0x45 - invalid */
+/*  Opcode 0x66 0x0f 0x46 - invalid (vex only)  */
+/*  Opcode 0x66 0x0f 0x47 - invalid */
+/*  Opcode 0x66 0x0f 0x48 - invalid */
+/*  Opcode 0x66 0x0f 0x49 - invalid */
+/*  Opcode 0x66 0x0f 0x4a - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x4b - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x4c - invalid (vex only). */
+/*  Opcode 0x66 0x0f 0x4d - invalid */
+/*  Opcode 0x66 0x0f 0x4e - invalid */
+/*  Opcode 0x66 0x0f 0x4f - invalid */
+
+
+/*  Opcode 0x66 0x0f 0x50 - invalid */
+/*  Opcode 0x66 0x0f 0x51 - invalid */
+/*  Opcode 0x66 0x0f 0x52 - invalid */
+/*  Opcode 0x66 0x0f 0x53 - invalid */
+/*  Opcode 0x66 0x0f 0x54 - invalid */
+/*  Opcode 0x66 0x0f 0x55 - invalid */
+/*  Opcode 0x66 0x0f 0x56 - invalid */
+/*  Opcode 0x66 0x0f 0x57 - invalid */
+/*  Opcode 0x66 0x0f 0x58 - invalid */
+/*  Opcode 0x66 0x0f 0x59 - invalid */
+/*  Opcode 0x66 0x0f 0x5a - invalid */
+/*  Opcode 0x66 0x0f 0x5b - invalid */
+/*  Opcode 0x66 0x0f 0x5c - invalid */
+/*  Opcode 0x66 0x0f 0x5d - invalid */
+/*  Opcode 0x66 0x0f 0x5e - invalid */
+/*  Opcode 0x66 0x0f 0x5f - invalid */
+
+
+/** Opcode 0x66 0x0f 0x60. */
+FNIEMOP_STUB(iemOp_pcmpestrm_Vdq_Wdq_Ib);
+/** Opcode 0x66 0x0f 0x61, */
+FNIEMOP_STUB(iemOp_pcmpestri_Vdq_Wdq_Ib);
+/** Opcode 0x66 0x0f 0x62. */
+FNIEMOP_STUB(iemOp_pcmpistrm_Vdq_Wdq_Ib);
+/** Opcode 0x66 0x0f 0x63*/
+FNIEMOP_STUB(iemOp_pcmpistri_Vdq_Wdq_Ib);
+/*  Opcode 0x66 0x0f 0x64 - invalid */
+/*  Opcode 0x66 0x0f 0x65 - invalid */
+/*  Opcode 0x66 0x0f 0x66 - invalid */
+/*  Opcode 0x66 0x0f 0x67 - invalid */
+/*  Opcode 0x66 0x0f 0x68 - invalid */
+/*  Opcode 0x66 0x0f 0x69 - invalid */
+/*  Opcode 0x66 0x0f 0x6a - invalid */
+/*  Opcode 0x66 0x0f 0x6b - invalid */
+/*  Opcode 0x66 0x0f 0x6c - invalid */
+/*  Opcode 0x66 0x0f 0x6d - invalid */
+/*  Opcode 0x66 0x0f 0x6e - invalid */
+/*  Opcode 0x66 0x0f 0x6f - invalid */
+
+/*  Opcodes 0x0f 0x70 thru 0x0f 0xb0 are unused.  */
+
+
+/*  Opcode      0x0f 0xc0 - invalid */
+/*  Opcode      0x0f 0xc1 - invalid */
+/*  Opcode      0x0f 0xc2 - invalid */
+/*  Opcode      0x0f 0xc3 - invalid */
+/*  Opcode      0x0f 0xc4 - invalid */
+/*  Opcode      0x0f 0xc5 - invalid */
+/*  Opcode      0x0f 0xc6 - invalid */
+/*  Opcode      0x0f 0xc7 - invalid */
+/*  Opcode      0x0f 0xc8 - invalid */
+/*  Opcode      0x0f 0xc9 - invalid */
+/*  Opcode      0x0f 0xca - invalid */
+/*  Opcode      0x0f 0xcb - invalid */
+/*  Opcode      0x0f 0xcc */
+FNIEMOP_STUB(iemOp_sha1rnds4_Vdq_Wdq_Ib);
+/*  Opcode      0x0f 0xcd - invalid */
+/*  Opcode      0x0f 0xce - invalid */
+/*  Opcode      0x0f 0xcf - invalid */
+
+
+/*  Opcode 0x66 0x0f 0xd0 - invalid */
+/*  Opcode 0x66 0x0f 0xd1 - invalid */
+/*  Opcode 0x66 0x0f 0xd2 - invalid */
+/*  Opcode 0x66 0x0f 0xd3 - invalid */
+/*  Opcode 0x66 0x0f 0xd4 - invalid */
+/*  Opcode 0x66 0x0f 0xd5 - invalid */
+/*  Opcode 0x66 0x0f 0xd6 - invalid */
+/*  Opcode 0x66 0x0f 0xd7 - invalid */
+/*  Opcode 0x66 0x0f 0xd8 - invalid */
+/*  Opcode 0x66 0x0f 0xd9 - invalid */
+/*  Opcode 0x66 0x0f 0xda - invalid */
+/*  Opcode 0x66 0x0f 0xdb - invalid */
+/*  Opcode 0x66 0x0f 0xdc - invalid */
+/*  Opcode 0x66 0x0f 0xdd - invalid */
+/*  Opcode 0x66 0x0f 0xde - invalid */
+/*  Opcode 0x66 0x0f 0xdf - (aeskeygenassist). */
+FNIEMOP_STUB(iemOp_aeskeygen_Vdq_Wdq_Ib);
+
+
+/*  Opcode 0xf2 0x0f 0xf0 - invalid (vex only) */
+
+
+/**
+ * Three byte opcode map, first two bytes are 0x0f 0x3a.
+ * @sa      g_apfnVexMap2
+ */
+IEM_STATIC const PFNIEMOP g_apfnThreeByte0f3a[] =
+{
+    /*          no prefix,                  066h prefix                 f3h prefix,                 f2h prefix */
+    /* 0x00 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x01 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x02 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x03 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x04 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x05 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x06 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x07 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x08 */  iemOp_InvalidNeedRMImm8,    iemOp_roundps_Vx_Wx_Ib,     iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x09 */  iemOp_InvalidNeedRMImm8,    iemOp_roundpd_Vx_Wx_Ib,     iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0a */  iemOp_InvalidNeedRMImm8,    iemOp_roundss_Vss_Wss_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0b */  iemOp_InvalidNeedRMImm8,    iemOp_roundsd_Vsd_Wsd_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0c */  iemOp_InvalidNeedRMImm8,    iemOp_blendps_Vx_Wx_Ib,     iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0d */  iemOp_InvalidNeedRMImm8,    iemOp_blendpd_Vx_Wx_Ib,     iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0e */  iemOp_InvalidNeedRMImm8,    iemOp_blendw_Vx_Wx_Ib,      iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0f */  iemOp_palignr_Pq_Qq_Ib,     iemOp_palignr_Vx_Wx_Ib,     iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+
+    /* 0x10 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x11 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x12 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x13 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x14 */  iemOp_InvalidNeedRMImm8,    iemOp_pextrb_RdMb_Vdq_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x15 */  iemOp_InvalidNeedRMImm8,    iemOp_pextrw_RdMw_Vdq_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x16 */  iemOp_InvalidNeedRMImm8,    iemOp_pextrd_q_RdMw_Vdq_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x17 */  iemOp_InvalidNeedRMImm8,    iemOp_extractps_Ed_Vdq_Ib,  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x18 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x19 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x20 */  iemOp_InvalidNeedRMImm8,    iemOp_pinsrb_Vdq_RyMb_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x21 */  iemOp_InvalidNeedRMImm8,    iemOp_insertps_Vdq_UdqMd_Ib,iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x22 */  iemOp_InvalidNeedRMImm8,    iemOp_pinsrd_q_Vdq_Ey_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x23 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x24 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x25 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x26 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x27 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x28 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x29 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x30 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x31 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x32 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x33 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x34 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x35 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x36 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x37 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x38 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x39 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x40 */  iemOp_InvalidNeedRMImm8,    iemOp_dpps_Vx_Wx_Ib,        iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x41 */  iemOp_InvalidNeedRMImm8,    iemOp_dppd_Vdq_Wdq_Ib,      iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x42 */  iemOp_InvalidNeedRMImm8,    iemOp_mpsadbw_Vx_Wx_Ib,     iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x43 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x44 */  iemOp_InvalidNeedRMImm8,    iemOp_pclmulqdq_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x45 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x46 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x47 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x48 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x49 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x4a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x4b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x4c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x4d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x4e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x4f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x50 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x51 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x52 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x53 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x54 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x55 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x56 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x57 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x58 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x59 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x60 */  iemOp_InvalidNeedRMImm8,    iemOp_pcmpestrm_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x61 */  iemOp_InvalidNeedRMImm8,    iemOp_pcmpestri_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x62 */  iemOp_InvalidNeedRMImm8,    iemOp_pcmpistrm_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x63 */  iemOp_InvalidNeedRMImm8,    iemOp_pcmpistri_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x64 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x65 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x66 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x67 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x68 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x69 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x6a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x6b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x6c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x6d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x6e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x6f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x70 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x71 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x72 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x73 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x74 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x75 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x76 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x77 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x78 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x79 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x7a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x7b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x7c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x7d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x7e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x7f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x80 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x81 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x82 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x83 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x84 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x85 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x86 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x87 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x88 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x89 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x90 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x91 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x92 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x93 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x94 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x95 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x96 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x97 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x98 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x99 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xa0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xaa */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xab */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xac */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xad */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xae */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xaf */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xb0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xba */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbc */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbd */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbe */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbf */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xc0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xca */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xcb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xcc */  iemOp_sha1rnds4_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0xcd */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xce */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xcf */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xd0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xda */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xdb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xdc */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xdd */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xde */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xdf */  iemOp_aeskeygen_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+
+    /* 0xe0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xea */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xeb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xec */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xed */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xee */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xef */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xf0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfa */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfc */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfd */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfe */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xff */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+};
+AssertCompile(RT_ELEMENTS(g_apfnThreeByte0f3a) == 1024);
+
+/** @} */
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h
new file mode 100644
index 0000000..0bb58a2
--- /dev/null
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h
@@ -0,0 +1,8538 @@
+/* $Id: IEMAllInstructionsTwoByte0f.cpp.h $ */
+/** @file
+ * IEM - Instruction Decoding and Emulation.
+ *
+ * @remarks IEMAllInstructionsVexMap1.cpp.h is a VEX mirror of this file.
+ *          Any update here is likely needed in that file too.
+ */
+
+/*
+ * Copyright (C) 2011-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @name Two byte opcodes (first byte 0x0f).
+ *
+ * @{
+ */
+
+/** Opcode 0x0f 0x00 /0. */
+FNIEMOPRM_DEF(iemOp_Grp6_sldt)
+{
+    IEMOP_MNEMONIC(sldt, "sldt Rv/Mw");
+    IEMOP_HLP_MIN_286();
+    IEMOP_HLP_NO_REAL_OR_V86_MODE();
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DECODED_NL_1(OP_SLDT, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+        IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_LDTR_READS, SVM_EXIT_LDTR_READ, 0, 0);
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Ldtr);
+                IEM_MC_FETCH_LDTR_U16(u16Ldtr);
+                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Ldtr);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Ldtr);
+                IEM_MC_FETCH_LDTR_U32(u32Ldtr);
+                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Ldtr);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Ldtr);
+                IEM_MC_FETCH_LDTR_U64(u64Ldtr);
+                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Ldtr);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint16_t, u16Ldtr);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DECODED_NL_1(OP_SLDT, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+        IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_LDTR_READS, SVM_EXIT_LDTR_READ, 0, 0);
+        IEM_MC_FETCH_LDTR_U16(u16Ldtr);
+        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Ldtr);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x00 /1. */
+FNIEMOPRM_DEF(iemOp_Grp6_str)
+{
+    IEMOP_MNEMONIC(str, "str Rv/Mw");
+    IEMOP_HLP_MIN_286();
+    IEMOP_HLP_NO_REAL_OR_V86_MODE();
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DECODED_NL_1(OP_STR, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+        IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_TR_READS, SVM_EXIT_TR_READ, 0, 0);
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Tr);
+                IEM_MC_FETCH_TR_U16(u16Tr);
+                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Tr);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Tr);
+                IEM_MC_FETCH_TR_U32(u32Tr);
+                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tr);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Tr);
+                IEM_MC_FETCH_TR_U64(u64Tr);
+                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tr);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint16_t, u16Tr);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DECODED_NL_1(OP_STR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+        IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_TR_READS, SVM_EXIT_TR_READ, 0, 0);
+        IEM_MC_FETCH_TR_U16(u16Tr);
+        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Tr);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x00 /2. */
+FNIEMOPRM_DEF(iemOp_Grp6_lldt)
+{
+    IEMOP_MNEMONIC(lldt, "lldt Ew");
+    IEMOP_HLP_MIN_286();
+    IEMOP_HLP_NO_REAL_OR_V86_MODE();
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DECODED_NL_1(OP_LLDT, IEMOPFORM_M_REG, OP_PARM_Ew, DISOPTYPE_DANGEROUS);
+        IEM_MC_BEGIN(1, 0);
+        IEM_MC_ARG(uint16_t, u16Sel, 0);
+        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_CIMPL_1(iemCImpl_lldt, u16Sel);
+        IEM_MC_END();
+    }
+    else
+    {
+        IEM_MC_BEGIN(1, 1);
+        IEM_MC_ARG(uint16_t, u16Sel, 0);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DECODED_NL_1(OP_LLDT, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS);
+        IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO(); /** @todo test order */
+        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_CALL_CIMPL_1(iemCImpl_lldt, u16Sel);
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x00 /3. */
+FNIEMOPRM_DEF(iemOp_Grp6_ltr)
+{
+    IEMOP_MNEMONIC(ltr, "ltr Ew");
+    IEMOP_HLP_MIN_286();
+    IEMOP_HLP_NO_REAL_OR_V86_MODE();
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(1, 0);
+        IEM_MC_ARG(uint16_t, u16Sel, 0);
+        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_CIMPL_1(iemCImpl_ltr, u16Sel);
+        IEM_MC_END();
+    }
+    else
+    {
+        IEM_MC_BEGIN(1, 1);
+        IEM_MC_ARG(uint16_t, u16Sel, 0);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO(); /** @todo test ordre */
+        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_CALL_CIMPL_1(iemCImpl_ltr, u16Sel);
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x00 /3. */
+FNIEMOP_DEF_2(iemOpCommonGrp6VerX, uint8_t, bRm, bool, fWrite)
+{
+    IEMOP_HLP_MIN_286();
+    IEMOP_HLP_NO_REAL_OR_V86_MODE();
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DECODED_NL_1(fWrite ? OP_VERW : OP_VERR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(uint16_t,    u16Sel,            0);
+        IEM_MC_ARG_CONST(bool,  fWriteArg, fWrite, 1);
+        IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_CIMPL_2(iemCImpl_VerX, u16Sel, fWriteArg);
+        IEM_MC_END();
+    }
+    else
+    {
+        IEM_MC_BEGIN(2, 1);
+        IEM_MC_ARG(uint16_t,    u16Sel,            0);
+        IEM_MC_ARG_CONST(bool,  fWriteArg, fWrite, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DECODED_NL_1(fWrite ? OP_VERW : OP_VERR, IEMOPFORM_M_MEM, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+        IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_CALL_CIMPL_2(iemCImpl_VerX, u16Sel, fWriteArg);
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x00 /4. */
+FNIEMOPRM_DEF(iemOp_Grp6_verr)
+{
+    IEMOP_MNEMONIC(verr, "verr Ew");
+    IEMOP_HLP_MIN_286();
+    return FNIEMOP_CALL_2(iemOpCommonGrp6VerX, bRm, false);
+}
+
+
+/** Opcode 0x0f 0x00 /5. */
+FNIEMOPRM_DEF(iemOp_Grp6_verw)
+{
+    IEMOP_MNEMONIC(verw, "verw Ew");
+    IEMOP_HLP_MIN_286();
+    return FNIEMOP_CALL_2(iemOpCommonGrp6VerX, bRm, true);
+}
+
+
+/**
+ * Group 6 jump table.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup6[8] =
+{
+    iemOp_Grp6_sldt,
+    iemOp_Grp6_str,
+    iemOp_Grp6_lldt,
+    iemOp_Grp6_ltr,
+    iemOp_Grp6_verr,
+    iemOp_Grp6_verw,
+    iemOp_InvalidWithRM,
+    iemOp_InvalidWithRM
+};
+
+/** Opcode 0x0f 0x00. */
+FNIEMOP_DEF(iemOp_Grp6)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    return FNIEMOP_CALL_1(g_apfnGroup6[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK], bRm);
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF_1(iemOp_Grp7_sgdt, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(sgdt, "sgdt Ms");
+    IEMOP_HLP_MIN_286();
+    IEMOP_HLP_64BIT_OP_SIZE();
+    IEM_MC_BEGIN(2, 1);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_2(iemCImpl_sgdt, iEffSeg, GCPtrEffSrc);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF(iemOp_Grp7_vmcall)
+{
+    IEMOP_BITCH_ABOUT_STUB();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF(iemOp_Grp7_vmlaunch)
+{
+    IEMOP_BITCH_ABOUT_STUB();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF(iemOp_Grp7_vmresume)
+{
+    IEMOP_BITCH_ABOUT_STUB();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF(iemOp_Grp7_vmxoff)
+{
+    IEMOP_BITCH_ABOUT_STUB();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /1. */
+FNIEMOP_DEF_1(iemOp_Grp7_sidt, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(sidt, "sidt Ms");
+    IEMOP_HLP_MIN_286();
+    IEMOP_HLP_64BIT_OP_SIZE();
+    IEM_MC_BEGIN(2, 1);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_2(iemCImpl_sidt, iEffSeg, GCPtrEffSrc);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x01 /1. */
+FNIEMOP_DEF(iemOp_Grp7_monitor)
+{
+    IEMOP_MNEMONIC(monitor, "monitor");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX(); /** @todo Verify that monitor is allergic to lock prefixes. */
+    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_monitor, pVCpu->iem.s.iEffSeg);
+}
+
+
+/** Opcode 0x0f 0x01 /1. */
+FNIEMOP_DEF(iemOp_Grp7_mwait)
+{
+    IEMOP_MNEMONIC(mwait, "mwait"); /** @todo Verify that mwait is allergic to lock prefixes. */
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_mwait);
+}
+
+
+/** Opcode 0x0f 0x01 /2. */
+FNIEMOP_DEF_1(iemOp_Grp7_lgdt, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(lgdt, "lgdt");
+    IEMOP_HLP_64BIT_OP_SIZE();
+    IEM_MC_BEGIN(3, 1);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                    0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                                1);
+    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSizeArg,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_3(iemCImpl_lgdt, iEffSeg, GCPtrEffSrc, enmEffOpSizeArg);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x01 0xd0. */
+FNIEMOP_DEF(iemOp_Grp7_xgetbv)
+{
+    IEMOP_MNEMONIC(xgetbv, "xgetbv");
+    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXSaveRstor)
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
+        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_xgetbv);
+    }
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 0xd1. */
+FNIEMOP_DEF(iemOp_Grp7_xsetbv)
+{
+    IEMOP_MNEMONIC(xsetbv, "xsetbv");
+    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXSaveRstor)
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES();
+        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_xsetbv);
+    }
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /3. */
+FNIEMOP_DEF_1(iemOp_Grp7_lidt, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(lidt, "lidt");
+    IEMMODE enmEffOpSize = pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT
+                         ? IEMMODE_64BIT
+                         : pVCpu->iem.s.enmEffOpSize;
+    IEM_MC_BEGIN(3, 1);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                            0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                        1);
+    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSizeArg,/*=*/enmEffOpSize,  2);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_3(iemCImpl_lidt, iEffSeg, GCPtrEffSrc, enmEffOpSizeArg);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/** Opcode 0x0f 0x01 0xd8. */
+FNIEMOP_DEF(iemOp_Grp7_Amd_vmrun)
+{
+    IEMOP_MNEMONIC(vmrun, "vmrun");
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_vmrun);
+}
+
+/** Opcode 0x0f 0x01 0xd9. */
+FNIEMOP_DEF(iemOp_Grp7_Amd_vmmcall)
+{
+    IEMOP_MNEMONIC(vmmcall, "vmmcall");
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_vmmcall);
+}
+
+
+/** Opcode 0x0f 0x01 0xda. */
+FNIEMOP_DEF(iemOp_Grp7_Amd_vmload)
+{
+    IEMOP_MNEMONIC(vmload, "vmload");
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_vmload);
+}
+
+
+/** Opcode 0x0f 0x01 0xdb. */
+FNIEMOP_DEF(iemOp_Grp7_Amd_vmsave)
+{
+    IEMOP_MNEMONIC(vmsave, "vmsave");
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_vmsave);
+}
+
+
+/** Opcode 0x0f 0x01 0xdc. */
+FNIEMOP_DEF(iemOp_Grp7_Amd_stgi)
+{
+    IEMOP_MNEMONIC(stgi, "stgi");
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stgi);
+}
+
+
+/** Opcode 0x0f 0x01 0xdd. */
+FNIEMOP_DEF(iemOp_Grp7_Amd_clgi)
+{
+    IEMOP_MNEMONIC(clgi, "clgi");
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_clgi);
+}
+
+
+/** Opcode 0x0f 0x01 0xdf. */
+FNIEMOP_DEF(iemOp_Grp7_Amd_invlpga)
+{
+    IEMOP_MNEMONIC(invlpga, "invlpga");
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_invlpga);
+}
+
+
+/** Opcode 0x0f 0x01 0xde. */
+FNIEMOP_DEF(iemOp_Grp7_Amd_skinit)
+{
+    IEMOP_MNEMONIC(skinit, "skinit");
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_skinit);
+}
+#else
+/** Opcode 0x0f 0x01 0xd8. */
+FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmrun);
+
+/** Opcode 0x0f 0x01 0xd9. */
+FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmmcall);
+
+/** Opcode 0x0f 0x01 0xda. */
+FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmload);
+
+/** Opcode 0x0f 0x01 0xdb. */
+FNIEMOP_UD_STUB(iemOp_Grp7_Amd_vmsave);
+
+/** Opcode 0x0f 0x01 0xdc. */
+FNIEMOP_UD_STUB(iemOp_Grp7_Amd_stgi);
+
+/** Opcode 0x0f 0x01 0xdd. */
+FNIEMOP_UD_STUB(iemOp_Grp7_Amd_clgi);
+
+/** Opcode 0x0f 0x01 0xdf. */
+FNIEMOP_UD_STUB(iemOp_Grp7_Amd_invlpga);
+
+/** Opcode 0x0f 0x01 0xde. */
+FNIEMOP_UD_STUB(iemOp_Grp7_Amd_skinit);
+#endif /* VBOX_WITH_NESTED_HWVIRT */
+
+/** Opcode 0x0f 0x01 /4. */
+FNIEMOP_DEF_1(iemOp_Grp7_smsw, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(smsw, "smsw");
+    IEMOP_HLP_MIN_286();
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEMOP_HLP_SVM_READ_CR_INTERCEPT(pVCpu, /*cr*/ 0, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Tmp);
+                IEM_MC_FETCH_CR0_U16(u16Tmp);
+                if (IEM_GET_TARGET_CPU(pVCpu) > IEMTARGETCPU_386)
+                { /* likely */ }
+                else if (IEM_GET_TARGET_CPU(pVCpu) >= IEMTARGETCPU_386)
+                    IEM_MC_OR_LOCAL_U16(u16Tmp, 0xffe0);
+                else
+                    IEM_MC_OR_LOCAL_U16(u16Tmp, 0xfff0);
+                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u16Tmp);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Tmp);
+                IEM_MC_FETCH_CR0_U32(u32Tmp);
+                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Tmp);
+                IEM_MC_FETCH_CR0_U64(u64Tmp);
+                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* Ignore operand size here, memory refs are always 16-bit. */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint16_t, u16Tmp);
+        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEMOP_HLP_SVM_READ_CR_INTERCEPT(pVCpu, /*cr*/ 0, 0 /* uExitInfo1 */, 0 /* uExitInfo2 */);
+        IEM_MC_FETCH_CR0_U16(u16Tmp);
+        if (IEM_GET_TARGET_CPU(pVCpu) > IEMTARGETCPU_386)
+        { /* likely */ }
+        else if (pVCpu->iem.s.uTargetCpu >= IEMTARGETCPU_386)
+            IEM_MC_OR_LOCAL_U16(u16Tmp, 0xffe0);
+        else
+            IEM_MC_OR_LOCAL_U16(u16Tmp, 0xfff0);
+        IEM_MC_STORE_MEM_U16(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u16Tmp);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+        return VINF_SUCCESS;
+    }
+}
+
+
+/** Opcode 0x0f 0x01 /6. */
+FNIEMOP_DEF_1(iemOp_Grp7_lmsw, uint8_t, bRm)
+{
+    /* The operand size is effectively ignored, all is 16-bit and only the
+       lower 3-bits are used. */
+    IEMOP_MNEMONIC(lmsw, "lmsw");
+    IEMOP_HLP_MIN_286();
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(1, 0);
+        IEM_MC_ARG(uint16_t, u16Tmp, 0);
+        IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_CIMPL_1(iemCImpl_lmsw, u16Tmp);
+        IEM_MC_END();
+    }
+    else
+    {
+        IEM_MC_BEGIN(1, 1);
+        IEM_MC_ARG(uint16_t, u16Tmp, 0);
+        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+        IEM_MC_CALL_CIMPL_1(iemCImpl_lmsw, u16Tmp);
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x01 /7. */
+FNIEMOP_DEF_1(iemOp_Grp7_invlpg, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(invlpg, "invlpg");
+    IEMOP_HLP_MIN_486();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_BEGIN(1, 1);
+    IEM_MC_ARG(RTGCPTR, GCPtrEffDst, 0);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+    IEM_MC_CALL_CIMPL_1(iemCImpl_invlpg, GCPtrEffDst);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x01 /7. */
+FNIEMOP_DEF(iemOp_Grp7_swapgs)
+{
+    IEMOP_MNEMONIC(swapgs, "swapgs");
+    IEMOP_HLP_ONLY_64BIT();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_swapgs);
+}
+
+
+/** Opcode 0x0f 0x01 /7. */
+FNIEMOP_DEF(iemOp_Grp7_rdtscp)
+{
+    IEMOP_MNEMONIC(rdtscp, "rdtscp");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    /** @todo SVM intercept removal from here. */
+    IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_RDTSCP, SVM_EXIT_RDTSCP, 0, 0);
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rdtscp);
+}
+
+
+/**
+ * Group 7 jump table, memory variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup7Mem[8] =
+{
+    iemOp_Grp7_sgdt,
+    iemOp_Grp7_sidt,
+    iemOp_Grp7_lgdt,
+    iemOp_Grp7_lidt,
+    iemOp_Grp7_smsw,
+    iemOp_InvalidWithRM,
+    iemOp_Grp7_lmsw,
+    iemOp_Grp7_invlpg
+};
+
+
+/** Opcode 0x0f 0x01. */
+FNIEMOP_DEF(iemOp_Grp7)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+        return FNIEMOP_CALL_1(g_apfnGroup7Mem[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK], bRm);
+
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0:
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_vmcall);
+                case 2: return FNIEMOP_CALL(iemOp_Grp7_vmlaunch);
+                case 3: return FNIEMOP_CALL(iemOp_Grp7_vmresume);
+                case 4: return FNIEMOP_CALL(iemOp_Grp7_vmxoff);
+            }
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 1:
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 0: return FNIEMOP_CALL(iemOp_Grp7_monitor);
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_mwait);
+            }
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 2:
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 0: return FNIEMOP_CALL(iemOp_Grp7_xgetbv);
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_xsetbv);
+            }
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 3:
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 0: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmrun);
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmmcall);
+                case 2: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmload);
+                case 3: return FNIEMOP_CALL(iemOp_Grp7_Amd_vmsave);
+                case 4: return FNIEMOP_CALL(iemOp_Grp7_Amd_stgi);
+                case 5: return FNIEMOP_CALL(iemOp_Grp7_Amd_clgi);
+                case 6: return FNIEMOP_CALL(iemOp_Grp7_Amd_skinit);
+                case 7: return FNIEMOP_CALL(iemOp_Grp7_Amd_invlpga);
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            }
+
+        case 4:
+            return FNIEMOP_CALL_1(iemOp_Grp7_smsw, bRm);
+
+        case 5:
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 6:
+            return FNIEMOP_CALL_1(iemOp_Grp7_lmsw, bRm);
+
+        case 7:
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 0: return FNIEMOP_CALL(iemOp_Grp7_swapgs);
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_rdtscp);
+            }
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+/** Opcode 0x0f 0x00 /3. */
+FNIEMOP_DEF_1(iemOpCommonLarLsl_Gv_Ew, bool, fIsLar)
+{
+    IEMOP_HLP_NO_REAL_OR_V86_MODE();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_REG, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,  pu16Dst,           0);
+                IEM_MC_ARG(uint16_t,    u16Sel,            1);
+                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
+
+                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u16, pu16Dst, u16Sel, fIsLarArg);
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_32BIT:
+            case IEMMODE_64BIT:
+            {
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,  pu64Dst,           0);
+                IEM_MC_ARG(uint16_t,    u16Sel,            1);
+                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
+
+                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_GREG_U16(u16Sel, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u64, pu64Dst, u16Sel, fIsLarArg);
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t *,  pu16Dst,           0);
+                IEM_MC_ARG(uint16_t,    u16Sel,            1);
+                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
+                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+                IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_MEM, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+
+                IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u16, pu16Dst, u16Sel, fIsLarArg);
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_32BIT:
+            case IEMMODE_64BIT:
+            {
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint64_t *,  pu64Dst,           0);
+                IEM_MC_ARG(uint16_t,    u16Sel,            1);
+                IEM_MC_ARG_CONST(bool,  fIsLarArg, fIsLar, 2);
+                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+                IEMOP_HLP_DECODED_NL_2(fIsLar ? OP_LAR : OP_LSL, IEMOPFORM_RM_MEM, OP_PARM_Gv, OP_PARM_Ew, DISOPTYPE_DANGEROUS | DISOPTYPE_PRIVILEGED_NOTRAP);
+/** @todo testcase: make sure it's a 16-bit read. */
+
+                IEM_MC_FETCH_MEM_U16(u16Sel, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_CALL_CIMPL_3(iemCImpl_LarLsl_u64, pu64Dst, u16Sel, fIsLarArg);
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+
+/** Opcode 0x0f 0x02. */
+FNIEMOP_DEF(iemOp_lar_Gv_Ew)
+{
+    IEMOP_MNEMONIC(lar, "lar Gv,Ew");
+    return FNIEMOP_CALL_1(iemOpCommonLarLsl_Gv_Ew, true);
+}
+
+
+/** Opcode 0x0f 0x03. */
+FNIEMOP_DEF(iemOp_lsl_Gv_Ew)
+{
+    IEMOP_MNEMONIC(lsl, "lsl Gv,Ew");
+    return FNIEMOP_CALL_1(iemOpCommonLarLsl_Gv_Ew, false);
+}
+
+
+/** Opcode 0x0f 0x05. */
+FNIEMOP_DEF(iemOp_syscall)
+{
+    IEMOP_MNEMONIC(syscall, "syscall"); /** @todo 286 LOADALL   */
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_syscall);
+}
+
+
+/** Opcode 0x0f 0x06. */
+FNIEMOP_DEF(iemOp_clts)
+{
+    IEMOP_MNEMONIC(clts, "clts");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_clts);
+}
+
+
+/** Opcode 0x0f 0x07. */
+FNIEMOP_DEF(iemOp_sysret)
+{
+    IEMOP_MNEMONIC(sysret, "sysret");  /** @todo 386 LOADALL   */
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_sysret);
+}
+
+
+/** Opcode 0x0f 0x08. */
+FNIEMOP_DEF(iemOp_invd)
+{
+    IEMOP_MNEMONIC(invd, "invd");
+#ifdef VBOX_WITH_NESTED_HWVIRT
+    IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO();
+    IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_INVD, SVM_EXIT_INVD, 0, 0);
+#else
+    RT_NOREF_PV(pVCpu);
+#endif
+    /** @todo implement invd for the regular case (above only handles nested SVM
+     *        exits). */
+    IEMOP_BITCH_ABOUT_STUB();
+    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+}
+
+// IEMOP_HLP_MIN_486();
+
+
+/** Opcode 0x0f 0x09. */
+FNIEMOP_DEF(iemOp_wbinvd)
+{
+    IEMOP_MNEMONIC(wbinvd, "wbinvd");
+    IEMOP_HLP_MIN_486();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO();
+    IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_WBINVD, SVM_EXIT_WBINVD, 0, 0);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS; /* ignore for now */
+}
+
+
+/** Opcode 0x0f 0x0b. */
+FNIEMOP_DEF(iemOp_ud2)
+{
+    IEMOP_MNEMONIC(ud2, "ud2");
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+/** Opcode 0x0f 0x0d. */
+FNIEMOP_DEF(iemOp_nop_Ev_GrpP)
+{
+    /* AMD prefetch group, Intel implements this as NOP Ev (and so do we). */
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->f3DNowPrefetch)
+    {
+        IEMOP_MNEMONIC(GrpPNotSupported, "GrpP");
+        return IEMOP_RAISE_INVALID_OPCODE();
+    }
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_MNEMONIC(GrpPInvalid, "GrpP");
+        return IEMOP_RAISE_INVALID_OPCODE();
+    }
+
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 2: /* Aliased to /0 for the time being. */
+        case 4: /* Aliased to /0 for the time being. */
+        case 5: /* Aliased to /0 for the time being. */
+        case 6: /* Aliased to /0 for the time being. */
+        case 7: /* Aliased to /0 for the time being. */
+        case 0: IEMOP_MNEMONIC(prefetch, "prefetch"); break;
+        case 1: IEMOP_MNEMONIC(prefetchw_1, "prefetchw"); break;
+        case 3: IEMOP_MNEMONIC(prefetchw_3, "prefetchw"); break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+
+    IEM_MC_BEGIN(0, 1);
+    IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    /* Currently a NOP. */
+    NOREF(GCPtrEffSrc);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x0e. */
+FNIEMOP_STUB(iemOp_femms);
+
+
+/** Opcode 0x0f 0x0f. */
+FNIEMOP_DEF(iemOp_3Dnow)
+{
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->f3DNow)
+    {
+        IEMOP_MNEMONIC(Inv3Dnow, "3Dnow");
+        return IEMOP_RAISE_INVALID_OPCODE();
+    }
+
+#ifdef IEM_WITH_3DNOW
+    /* This is pretty sparse, use switch instead of table. */
+    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
+    return FNIEMOP_CALL_1(iemOp_3DNowDispatcher, b);
+#else
+    IEMOP_BITCH_ABOUT_STUB();
+    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/** Opcode      0x0f 0x10 - vmovups Vps, Wps */
+FNIEMOP_STUB(iemOp_movups_Vps_Wps);
+/** Opcode 0x66 0x0f 0x10 - vmovupd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_movupd_Vpd_Wpd);
+
+
+/**
+ * @opcode      0x10
+ * @oppfx       0xf3
+ * @opcpuid     sse
+ * @opgroup     og_sse_simdfp_datamove
+ * @opxcpttype  5
+ * @optest      op1=1 op2=2 -> op1=2
+ * @optest      op1=0 op2=-22 -> op1=-22
+ */
+FNIEMOP_DEF(iemOp_movss_Vss_Wss)
+{
+    IEMOP_MNEMONIC2(RM, MOVSS, movss, VssZxReg, Wss, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint32_t,                  uSrc);
+
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_XREG_U32(uSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_STORE_XREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint32_t,                  uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+
+        IEM_MC_FETCH_MEM_U32(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xf2 0x0f 0x10 - movsd Vx, Wsd */
+FNIEMOP_STUB(iemOp_movsd_Vx_Wsd);
+
+
+/**
+ * @opcode      0x11
+ * @oppfx       none
+ * @opcpuid     sse
+ * @opgroup     og_sse_simdfp_datamove
+ * @opxcpttype  4UA
+ * @optest      op1=1 op2=2 -> op1=2
+ * @optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_DEF(iemOp_movups_Wps_Vps)
+{
+    IEMOP_MNEMONIC2(MR, MOVUPS, movups, Wps, Vps, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opcode      0x11
+ * @oppfx       0x66
+ * @opcpuid     sse2
+ * @opgroup     og_sse2_pcksclr_datamove
+ * @opxcpttype  4UA
+ * @optest      op1=1 op2=2 -> op1=2
+ * @optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_DEF(iemOp_movupd_Wpd_Vpd)
+{
+    IEMOP_MNEMONIC2(MR, MOVUPD, movupd, Wpd, Vpd, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opcode      0x11
+ * @oppfx       0xf3
+ * @opcpuid     sse
+ * @opgroup     og_sse_simdfp_datamove
+ * @opxcpttype  5
+ * @optest      op1=1 op2=2 -> op1=2
+ * @optest      op1=0 op2=-22 -> op1=-22
+ */
+FNIEMOP_DEF(iemOp_movss_Wss_Vss)
+{
+    IEMOP_MNEMONIC2(MR, MOVSS, movss, Wss, Vss, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint32_t,                  uSrc);
+
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_XREG_U32(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_XREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint32_t,                  uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U32(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opcode      0x11
+ * @oppfx       0xf2
+ * @opcpuid     sse2
+ * @opgroup     og_sse2_pcksclr_datamove
+ * @opxcpttype  5
+ * @optest      op1=1 op2=2 -> op1=2
+ * @optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_DEF(iemOp_movsd_Wsd_Vsd)
+{
+    IEMOP_MNEMONIC2(MR, MOVSD, movsd, Wsd, Vsd, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_XREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+FNIEMOP_DEF(iemOp_movlps_Vq_Mq__movhlps)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /**
+         * @opcode      0x12
+         * @opcodesub   11 mr/reg
+         * @oppfx       none
+         * @opcpuid     sse
+         * @opgroup     og_sse_simdfp_datamove
+         * @opxcpttype  5
+         * @optest      op1=1 op2=2 -> op1=2
+         * @optest      op1=0 op2=-42 -> op1=-42
+         */
+        IEMOP_MNEMONIC2(RM_REG, MOVHLPS, movhlps, Vq, UqHi, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_XREG_HI_U64(uSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_STORE_XREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /**
+         * @opdone
+         * @opcode      0x12
+         * @opcodesub   !11 mr/reg
+         * @oppfx       none
+         * @opcpuid     sse
+         * @opgroup     og_sse_simdfp_datamove
+         * @opxcpttype  5
+         * @optest      op1=1 op2=2 -> op1=2
+         * @optest      op1=0 op2=-42 -> op1=-42
+         * @opfunction  iemOp_movlps_Vq_Mq__vmovhlps
+         */
+        IEMOP_MNEMONIC2(RM_MEM, MOVLPS, movlps, Vq, Mq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+
+        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_STORE_XREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opcode      0x12
+ * @opcodesub   !11 mr/reg
+ * @oppfx       0x66
+ * @opcpuid     sse2
+ * @opgroup     og_sse2_pcksclr_datamove
+ * @opxcpttype  5
+ * @optest      op1=1 op2=2 -> op1=2
+ * @optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_DEF(iemOp_movlpd_Vq_Mq)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_MNEMONIC2(RM_MEM, MOVLPD, movlpd, Vq, Mq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+
+        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_STORE_XREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+        return VINF_SUCCESS;
+    }
+
+    /**
+     * @opdone
+     * @opmnemonic  ud660f12m3
+     * @opcode      0x12
+     * @opcodesub   11 mr/reg
+     * @oppfx       0x66
+     * @opunused    immediate
+     * @opcpuid     sse
+     * @optest      ->
+     */
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/**
+ * @opcode      0x12
+ * @oppfx       0xf3
+ * @opcpuid     sse3
+ * @opgroup     og_sse3_pcksclr_datamove
+ * @opxcpttype  4
+ * @optest      op1=-1 op2=0xdddddddd00000002eeeeeeee00000001 ->
+ *              op1=0x00000002000000020000000100000001
+ */
+FNIEMOP_DEF(iemOp_movsldup_Vdq_Wdq)
+{
+    IEMOP_MNEMONIC2(RM, MOVSLDUP, movsldup, Vdq, Wdq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(PRTUINT128U,                 puDst, 0);
+        IEM_MC_ARG(PCRTUINT128U,                puSrc, 1);
+
+        IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+
+        IEM_MC_REF_XREG_U128_CONST(puSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_REF_XREG_U128(puDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_movsldup, puDst, puSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+        IEM_MC_ARG(PRTUINT128U,                 puDst, 0);
+        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      puSrc, uSrc, 1);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_REF_XREG_U128(puDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_movsldup, puDst, puSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opcode      0x12
+ * @oppfx       0xf2
+ * @opcpuid     sse3
+ * @opgroup     og_sse3_pcksclr_datamove
+ * @opxcpttype  5
+ * @optest      op1=-1 op2=0xddddddddeeeeeeee2222222211111111 ->
+ *              op1=0x22222222111111112222222211111111
+ */
+FNIEMOP_DEF(iemOp_movddup_Vdq_Wdq)
+{
+    IEMOP_MNEMONIC2(RM, MOVDDUP, movddup, Vdq, Wdq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(PRTUINT128U,                 puDst, 0);
+        IEM_MC_ARG(uint64_t,                    uSrc, 1);
+
+        IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+
+        IEM_MC_FETCH_XREG_U64(uSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_REF_XREG_U128(puDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_movddup, puDst, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+        IEM_MC_ARG(PRTUINT128U,                 puDst, 0);
+        IEM_MC_ARG(uint64_t,                    uSrc, 1);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+
+        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_REF_XREG_U128(puDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_movddup, puDst, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode      0x0f 0x13 - vmovlps Mq, Vq */
+FNIEMOP_STUB(iemOp_movlps_Mq_Vq);
+
+/** Opcode 0x66 0x0f 0x13 - vmovlpd Mq, Vq */
+FNIEMOP_DEF(iemOp_movlpd_Mq_Vq)
+{
+    IEMOP_MNEMONIC(movlpd_Mq_Vq, "movlpd Mq,Vq");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+#if 0
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_XREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+#else
+        return IEMOP_RAISE_INVALID_OPCODE();
+#endif
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/*  Opcode 0xf3 0x0f 0x13 - invalid */
+/*  Opcode 0xf2 0x0f 0x13 - invalid */
+
+/** Opcode      0x0f 0x14 - unpcklps Vx, Wx*/
+FNIEMOP_STUB(iemOp_unpcklps_Vx_Wx);
+/** Opcode 0x66 0x0f 0x14 - unpcklpd Vx, Wx   */
+FNIEMOP_STUB(iemOp_unpcklpd_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0x14 - invalid */
+/*  Opcode 0xf2 0x0f 0x14 - invalid */
+/** Opcode      0x0f 0x15 - unpckhps Vx, Wx   */
+FNIEMOP_STUB(iemOp_unpckhps_Vx_Wx);
+/** Opcode 0x66 0x0f 0x15 - unpckhpd Vx, Wx   */
+FNIEMOP_STUB(iemOp_unpckhpd_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0x15 - invalid */
+/*  Opcode 0xf2 0x0f 0x15 - invalid */
+/** Opcode      0x0f 0x16 - movhpsv1 Vdq, Mq movlhps Vdq, Uq   */
+FNIEMOP_STUB(iemOp_movhpsv1_Vdq_Mq__movlhps_Vdq_Uq);  //NEXT
+/** Opcode 0x66 0x0f 0x16 - movhpdv1 Vdq, Mq   */
+FNIEMOP_STUB(iemOp_movhpdv1_Vdq_Mq);  //NEXT
+/** Opcode 0xf3 0x0f 0x16 - movshdup Vx, Wx   */
+FNIEMOP_STUB(iemOp_movshdup_Vx_Wx); //NEXT
+/*  Opcode 0xf2 0x0f 0x16 - invalid */
+/** Opcode      0x0f 0x17 - movhpsv1 Mq, Vq   */
+FNIEMOP_STUB(iemOp_movhpsv1_Mq_Vq);  //NEXT
+/** Opcode 0x66 0x0f 0x17 - movhpdv1 Mq, Vq   */
+FNIEMOP_STUB(iemOp_movhpdv1_Mq_Vq);  //NEXT
+/*  Opcode 0xf3 0x0f 0x17 - invalid */
+/*  Opcode 0xf2 0x0f 0x17 - invalid */
+
+
+/** Opcode 0x0f 0x18. */
+FNIEMOP_DEF(iemOp_prefetch_Grp16)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+    {
+        switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+        {
+            case 4: /* Aliased to /0 for the time being according to AMD. */
+            case 5: /* Aliased to /0 for the time being according to AMD. */
+            case 6: /* Aliased to /0 for the time being according to AMD. */
+            case 7: /* Aliased to /0 for the time being according to AMD. */
+            case 0: IEMOP_MNEMONIC(prefetchNTA, "prefetchNTA m8"); break;
+            case 1: IEMOP_MNEMONIC(prefetchT0, "prefetchT0  m8"); break;
+            case 2: IEMOP_MNEMONIC(prefetchT1, "prefetchT1  m8"); break;
+            case 3: IEMOP_MNEMONIC(prefetchT2, "prefetchT2  m8"); break;
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        /* Currently a NOP. */
+        NOREF(GCPtrEffSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+        return VINF_SUCCESS;
+    }
+
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x19..0x1f. */
+FNIEMOP_DEF(iemOp_nop_Ev)
+{
+    IEMOP_MNEMONIC(nop_Ev, "nop Ev");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        /* Currently a NOP. */
+        NOREF(GCPtrEffSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x20. */
+FNIEMOP_DEF(iemOp_mov_Rd_Cd)
+{
+    /* mod is ignored, as is operand size overrides. */
+    IEMOP_MNEMONIC(mov_Rd_Cd, "mov Rd,Cd");
+    IEMOP_HLP_MIN_386();
+    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
+        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
+    else
+        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    uint8_t iCrReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK)
+    {
+        /* The lock prefix can be used to encode CR8 accesses on some CPUs. */
+        if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCr8In32Bit)
+            return IEMOP_RAISE_INVALID_OPCODE(); /* #UD takes precedence over #GP(), see test. */
+        iCrReg |= 8;
+    }
+    switch (iCrReg)
+    {
+        case 0: case 2: case 3: case 4: case 8:
+            break;
+        default:
+            return IEMOP_RAISE_INVALID_OPCODE();
+    }
+    IEMOP_HLP_DONE_DECODING();
+
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Rd_Cd, (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB, iCrReg);
+}
+
+
+/** Opcode 0x0f 0x21. */
+FNIEMOP_DEF(iemOp_mov_Rd_Dd)
+{
+    IEMOP_MNEMONIC(mov_Rd_Dd, "mov Rd,Dd");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REX_R)
+        return IEMOP_RAISE_INVALID_OPCODE();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Rd_Dd,
+                                   (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB,
+                                   ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK));
+}
+
+
+/** Opcode 0x0f 0x22. */
+FNIEMOP_DEF(iemOp_mov_Cd_Rd)
+{
+    /* mod is ignored, as is operand size overrides. */
+    IEMOP_MNEMONIC(mov_Cd_Rd, "mov Cd,Rd");
+    IEMOP_HLP_MIN_386();
+    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
+        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_64BIT;
+    else
+        pVCpu->iem.s.enmEffOpSize = pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    uint8_t iCrReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK)
+    {
+        /* The lock prefix can be used to encode CR8 accesses on some CPUs. */
+        if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCr8In32Bit)
+            return IEMOP_RAISE_INVALID_OPCODE(); /* #UD takes precedence over #GP(), see test. */
+        iCrReg |= 8;
+    }
+    switch (iCrReg)
+    {
+        case 0: case 2: case 3: case 4: case 8:
+            break;
+        default:
+            return IEMOP_RAISE_INVALID_OPCODE();
+    }
+    IEMOP_HLP_DONE_DECODING();
+
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Cd_Rd, iCrReg, (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0x23. */
+FNIEMOP_DEF(iemOp_mov_Dd_Rd)
+{
+    IEMOP_MNEMONIC(mov_Dd_Rd, "mov Dd,Rd");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_REX_R)
+        return IEMOP_RAISE_INVALID_OPCODE();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Dd_Rd,
+                                   ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK),
+                                   (X86_MODRM_RM_MASK & bRm) | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0x24. */
+FNIEMOP_DEF(iemOp_mov_Rd_Td)
+{
+    IEMOP_MNEMONIC(mov_Rd_Td, "mov Rd,Td");
+    /** @todo works on 386 and 486. */
+    /* The RM byte is not considered, see testcase. */
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x26. */
+FNIEMOP_DEF(iemOp_mov_Td_Rd)
+{
+    IEMOP_MNEMONIC(mov_Td_Rd, "mov Td,Rd");
+    /** @todo works on 386 and 486. */
+    /* The RM byte is not considered, see testcase. */
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode      0x0f 0x28 - movaps Vps, Wps */
+FNIEMOP_DEF(iemOp_movaps_Vps_Wps)
+{
+    IEMOP_MNEMONIC(movaps_r_mr, "movaps r,mr");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
+                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x66 0x0f 0x28 - movapd Vpd, Wpd */
+FNIEMOP_DEF(iemOp_movapd_Vpd_Wpd)
+{
+    IEMOP_MNEMONIC(movapd_r_mr, "movapd r,mr");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
+                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/*  Opcode 0xf3 0x0f 0x28 - invalid */
+/*  Opcode 0xf2 0x0f 0x28 - invalid */
+
+/** Opcode      0x0f 0x29 - movaps Wps, Vps */
+FNIEMOP_DEF(iemOp_movaps_Wps_Vps)
+{
+    IEMOP_MNEMONIC(movaps_mr_r, "movaps Wps,Vps");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x66 0x0f 0x29 - movapd Wpd,Vpd */
+FNIEMOP_DEF(iemOp_movapd_Wpd_Vpd)
+{
+    IEMOP_MNEMONIC(movapd_mr_r, "movapd Wpd,Vpd");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/*  Opcode 0xf3 0x0f 0x29 - invalid */
+/*  Opcode 0xf2 0x0f 0x29 - invalid */
+
+
+/** Opcode      0x0f 0x2a - cvtpi2ps Vps, Qpi */
+FNIEMOP_STUB(iemOp_cvtpi2ps_Vps_Qpi); //NEXT
+/** Opcode 0x66 0x0f 0x2a - cvtpi2pd Vpd, Qpi */
+FNIEMOP_STUB(iemOp_cvtpi2pd_Vpd_Qpi); //NEXT
+/** Opcode 0xf3 0x0f 0x2a - vcvtsi2ss Vss, Hss, Ey */
+FNIEMOP_STUB(iemOp_cvtsi2ss_Vss_Ey); //NEXT
+/** Opcode 0xf2 0x0f 0x2a - vcvtsi2sd Vsd, Hsd, Ey */
+FNIEMOP_STUB(iemOp_cvtsi2sd_Vsd_Ey); //NEXT
+
+
+/** Opcode      0x0f 0x2b - vmovntps Mps, Vps */
+FNIEMOP_DEF(iemOp_movntps_Mps_Vps)
+{
+    IEMOP_MNEMONIC(movntps_mr_r, "movntps Mps,Vps");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+
+        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    /* The register, register encoding is invalid. */
+    else
+        return IEMOP_RAISE_INVALID_OPCODE();
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x66 0x0f 0x2b - movntpd Mpd, Vpd */
+FNIEMOP_DEF(iemOp_movntpd_Mpd_Vpd)
+{
+    IEMOP_MNEMONIC(movntpd_mr_r, "movntpd Mdq,Vpd");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+
+        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    /* The register, register encoding is invalid. */
+    else
+        return IEMOP_RAISE_INVALID_OPCODE();
+    return VINF_SUCCESS;
+}
+/*  Opcode 0xf3 0x0f 0x2b - invalid */
+/*  Opcode 0xf2 0x0f 0x2b - invalid */
+
+
+/** Opcode      0x0f 0x2c - cvttps2pi Ppi, Wps */
+FNIEMOP_STUB(iemOp_cvttps2pi_Ppi_Wps);
+/** Opcode 0x66 0x0f 0x2c - cvttpd2pi Ppi, Wpd */
+FNIEMOP_STUB(iemOp_cvttpd2pi_Ppi_Wpd);
+/** Opcode 0xf3 0x0f 0x2c - cvttss2si Gy, Wss */
+FNIEMOP_STUB(iemOp_cvttss2si_Gy_Wss);
+/** Opcode 0xf2 0x0f 0x2c - cvttsd2si Gy, Wsd */
+FNIEMOP_STUB(iemOp_cvttsd2si_Gy_Wsd);
+
+/** Opcode      0x0f 0x2d - cvtps2pi Ppi, Wps */
+FNIEMOP_STUB(iemOp_cvtps2pi_Ppi_Wps);
+/** Opcode 0x66 0x0f 0x2d - cvtpd2pi Qpi, Wpd */
+FNIEMOP_STUB(iemOp_cvtpd2pi_Qpi_Wpd);
+/** Opcode 0xf3 0x0f 0x2d - cvtss2si Gy, Wss */
+FNIEMOP_STUB(iemOp_cvtss2si_Gy_Wss);
+/** Opcode 0xf2 0x0f 0x2d - cvtsd2si Gy, Wsd */
+FNIEMOP_STUB(iemOp_cvtsd2si_Gy_Wsd);
+
+/** Opcode      0x0f 0x2e - ucomiss Vss, Wss */
+FNIEMOP_STUB(iemOp_ucomiss_Vss_Wss); // NEXT
+/** Opcode 0x66 0x0f 0x2e - ucomisd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_ucomisd_Vsd_Wsd); // NEXT
+/*  Opcode 0xf3 0x0f 0x2e - invalid */
+/*  Opcode 0xf2 0x0f 0x2e - invalid */
+
+/** Opcode      0x0f 0x2f - comiss Vss, Wss */
+FNIEMOP_STUB(iemOp_comiss_Vss_Wss);
+/** Opcode 0x66 0x0f 0x2f - comisd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_comisd_Vsd_Wsd);
+/*  Opcode 0xf3 0x0f 0x2f - invalid */
+/*  Opcode 0xf2 0x0f 0x2f - invalid */
+
+/** Opcode 0x0f 0x30. */
+FNIEMOP_DEF(iemOp_wrmsr)
+{
+    IEMOP_MNEMONIC(wrmsr, "wrmsr");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_wrmsr);
+}
+
+
+/** Opcode 0x0f 0x31. */
+FNIEMOP_DEF(iemOp_rdtsc)
+{
+    IEMOP_MNEMONIC(rdtsc, "rdtsc");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rdtsc);
+}
+
+
+/** Opcode 0x0f 0x33. */
+FNIEMOP_DEF(iemOp_rdmsr)
+{
+    IEMOP_MNEMONIC(rdmsr, "rdmsr");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rdmsr);
+}
+
+
+/** Opcode 0x0f 0x34. */
+FNIEMOP_DEF(iemOp_rdpmc)
+{
+    IEMOP_MNEMONIC(rdpmc, "rdpmc");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rdpmc);
+}
+
+
+/** Opcode 0x0f 0x34. */
+FNIEMOP_STUB(iemOp_sysenter);
+/** Opcode 0x0f 0x35. */
+FNIEMOP_STUB(iemOp_sysexit);
+/** Opcode 0x0f 0x37. */
+FNIEMOP_STUB(iemOp_getsec);
+
+
+/** Opcode 0x0f 0x38. */
+FNIEMOP_DEF(iemOp_3byte_Esc_0f_38)
+{
+#ifdef IEM_WITH_THREE_0F_38
+    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
+    return FNIEMOP_CALL(g_apfnThreeByte0f38[(uintptr_t)b * 4 + pVCpu->iem.s.idxPrefix]);
+#else
+    IEMOP_BITCH_ABOUT_STUB();
+    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/** Opcode 0x0f 0x3a. */
+FNIEMOP_DEF(iemOp_3byte_Esc_0f_3a)
+{
+#ifdef IEM_WITH_THREE_0F_3A
+    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
+    return FNIEMOP_CALL(g_apfnThreeByte0f3a[(uintptr_t)b * 4 + pVCpu->iem.s.idxPrefix]);
+#else
+    IEMOP_BITCH_ABOUT_STUB();
+    return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/**
+ * Implements a conditional move.
+ *
+ * Wish there was an obvious way to do this where we could share and reduce
+ * code bloat.
+ *
+ * @param   a_Cnd       The conditional "microcode" operation.
+ */
+#define CMOV_X(a_Cnd) \
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); \
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT)) \
+    { \
+        switch (pVCpu->iem.s.enmEffOpSize) \
+        { \
+            case IEMMODE_16BIT: \
+                IEM_MC_BEGIN(0, 1); \
+                IEM_MC_LOCAL(uint16_t, u16Tmp); \
+                a_Cnd { \
+                    IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
+                    IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp); \
+                } IEM_MC_ENDIF(); \
+                IEM_MC_ADVANCE_RIP(); \
+                IEM_MC_END(); \
+                return VINF_SUCCESS; \
+    \
+            case IEMMODE_32BIT: \
+                IEM_MC_BEGIN(0, 1); \
+                IEM_MC_LOCAL(uint32_t, u32Tmp); \
+                a_Cnd { \
+                    IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
+                    IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp); \
+                } IEM_MC_ELSE() { \
+                    IEM_MC_CLEAR_HIGH_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg); \
+                } IEM_MC_ENDIF(); \
+                IEM_MC_ADVANCE_RIP(); \
+                IEM_MC_END(); \
+                return VINF_SUCCESS; \
+    \
+            case IEMMODE_64BIT: \
+                IEM_MC_BEGIN(0, 1); \
+                IEM_MC_LOCAL(uint64_t, u64Tmp); \
+                a_Cnd { \
+                    IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB); \
+                    IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp); \
+                } IEM_MC_ENDIF(); \
+                IEM_MC_ADVANCE_RIP(); \
+                IEM_MC_END(); \
+                return VINF_SUCCESS; \
+    \
+            IEM_NOT_REACHED_DEFAULT_CASE_RET(); \
+        } \
+    } \
+    else \
+    { \
+        switch (pVCpu->iem.s.enmEffOpSize) \
+        { \
+            case IEMMODE_16BIT: \
+                IEM_MC_BEGIN(0, 2); \
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
+                IEM_MC_LOCAL(uint16_t, u16Tmp); \
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
+                IEM_MC_FETCH_MEM_U16(u16Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
+                a_Cnd { \
+                    IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Tmp); \
+                } IEM_MC_ENDIF(); \
+                IEM_MC_ADVANCE_RIP(); \
+                IEM_MC_END(); \
+                return VINF_SUCCESS; \
+    \
+            case IEMMODE_32BIT: \
+                IEM_MC_BEGIN(0, 2); \
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
+                IEM_MC_LOCAL(uint32_t, u32Tmp); \
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
+                IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
+                a_Cnd { \
+                    IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp); \
+                } IEM_MC_ELSE() { \
+                    IEM_MC_CLEAR_HIGH_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg); \
+                } IEM_MC_ENDIF(); \
+                IEM_MC_ADVANCE_RIP(); \
+                IEM_MC_END(); \
+                return VINF_SUCCESS; \
+    \
+            case IEMMODE_64BIT: \
+                IEM_MC_BEGIN(0, 2); \
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc); \
+                IEM_MC_LOCAL(uint64_t, u64Tmp); \
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
+                IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
+                a_Cnd { \
+                    IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp); \
+                } IEM_MC_ENDIF(); \
+                IEM_MC_ADVANCE_RIP(); \
+                IEM_MC_END(); \
+                return VINF_SUCCESS; \
+    \
+            IEM_NOT_REACHED_DEFAULT_CASE_RET(); \
+        } \
+    } do {} while (0)
+
+
+
+/** Opcode 0x0f 0x40. */
+FNIEMOP_DEF(iemOp_cmovo_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovo_Gv_Ev, "cmovo Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF));
+}
+
+
+/** Opcode 0x0f 0x41. */
+FNIEMOP_DEF(iemOp_cmovno_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovno_Gv_Ev, "cmovno Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_OF));
+}
+
+
+/** Opcode 0x0f 0x42. */
+FNIEMOP_DEF(iemOp_cmovc_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovc_Gv_Ev, "cmovc Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF));
+}
+
+
+/** Opcode 0x0f 0x43. */
+FNIEMOP_DEF(iemOp_cmovnc_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovnc_Gv_Ev, "cmovnc Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_CF));
+}
+
+
+/** Opcode 0x0f 0x44. */
+FNIEMOP_DEF(iemOp_cmove_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmove_Gv_Ev, "cmove Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF));
+}
+
+
+/** Opcode 0x0f 0x45. */
+FNIEMOP_DEF(iemOp_cmovne_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovne_Gv_Ev, "cmovne Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_ZF));
+}
+
+
+/** Opcode 0x0f 0x46. */
+FNIEMOP_DEF(iemOp_cmovbe_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovbe_Gv_Ev, "cmovbe Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF));
+}
+
+
+/** Opcode 0x0f 0x47. */
+FNIEMOP_DEF(iemOp_cmovnbe_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovnbe_Gv_Ev, "cmovnbe Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_NO_BITS_SET(X86_EFL_CF | X86_EFL_ZF));
+}
+
+
+/** Opcode 0x0f 0x48. */
+FNIEMOP_DEF(iemOp_cmovs_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovs_Gv_Ev, "cmovs Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF));
+}
+
+
+/** Opcode 0x0f 0x49. */
+FNIEMOP_DEF(iemOp_cmovns_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovns_Gv_Ev, "cmovns Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_SF));
+}
+
+
+/** Opcode 0x0f 0x4a. */
+FNIEMOP_DEF(iemOp_cmovp_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovp_Gv_Ev, "cmovp Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF));
+}
+
+
+/** Opcode 0x0f 0x4b. */
+FNIEMOP_DEF(iemOp_cmovnp_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovnp_Gv_Ev, "cmovnp Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_PF));
+}
+
+
+/** Opcode 0x0f 0x4c. */
+FNIEMOP_DEF(iemOp_cmovl_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovl_Gv_Ev, "cmovl Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF));
+}
+
+
+/** Opcode 0x0f 0x4d. */
+FNIEMOP_DEF(iemOp_cmovnl_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovnl_Gv_Ev, "cmovnl Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BITS_EQ(X86_EFL_SF, X86_EFL_OF));
+}
+
+
+/** Opcode 0x0f 0x4e. */
+FNIEMOP_DEF(iemOp_cmovle_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovle_Gv_Ev, "cmovle Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF));
+}
+
+
+/** Opcode 0x0f 0x4f. */
+FNIEMOP_DEF(iemOp_cmovnle_Gv_Ev)
+{
+    IEMOP_MNEMONIC(cmovnle_Gv_Ev, "cmovnle Gv,Ev");
+    CMOV_X(IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF));
+}
+
+#undef CMOV_X
+
+/** Opcode      0x0f 0x50 - movmskps Gy, Ups */
+FNIEMOP_STUB(iemOp_movmskps_Gy_Ups);
+/** Opcode 0x66 0x0f 0x50 - movmskpd Gy, Upd */
+FNIEMOP_STUB(iemOp_movmskpd_Gy_Upd);
+/*  Opcode 0xf3 0x0f 0x50 - invalid */
+/*  Opcode 0xf2 0x0f 0x50 - invalid */
+
+/** Opcode      0x0f 0x51 - sqrtps Vps, Wps */
+FNIEMOP_STUB(iemOp_sqrtps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x51 - sqrtpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_sqrtpd_Vpd_Wpd);
+/** Opcode 0xf3 0x0f 0x51 - sqrtss Vss, Wss */
+FNIEMOP_STUB(iemOp_sqrtss_Vss_Wss);
+/** Opcode 0xf2 0x0f 0x51 - sqrtsd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_sqrtsd_Vsd_Wsd);
+
+/** Opcode      0x0f 0x52 - rsqrtps Vps, Wps */
+FNIEMOP_STUB(iemOp_rsqrtps_Vps_Wps);
+/*  Opcode 0x66 0x0f 0x52 - invalid */
+/** Opcode 0xf3 0x0f 0x52 - rsqrtss Vss, Wss */
+FNIEMOP_STUB(iemOp_rsqrtss_Vss_Wss);
+/*  Opcode 0xf2 0x0f 0x52 - invalid */
+
+/** Opcode      0x0f 0x53 - rcpps Vps, Wps */
+FNIEMOP_STUB(iemOp_rcpps_Vps_Wps);
+/*  Opcode 0x66 0x0f 0x53 - invalid */
+/** Opcode 0xf3 0x0f 0x53 - rcpss Vss, Wss */
+FNIEMOP_STUB(iemOp_rcpss_Vss_Wss);
+/*  Opcode 0xf2 0x0f 0x53 - invalid */
+
+/** Opcode      0x0f 0x54 - andps Vps, Wps */
+FNIEMOP_STUB(iemOp_andps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x54 - andpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_andpd_Vpd_Wpd);
+/*  Opcode 0xf3 0x0f 0x54 - invalid */
+/*  Opcode 0xf2 0x0f 0x54 - invalid */
+
+/** Opcode      0x0f 0x55 - andnps Vps, Wps */
+FNIEMOP_STUB(iemOp_andnps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x55 - andnpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_andnpd_Vpd_Wpd);
+/*  Opcode 0xf3 0x0f 0x55 - invalid */
+/*  Opcode 0xf2 0x0f 0x55 - invalid */
+
+/** Opcode      0x0f 0x56 - orps Vps, Wps */
+FNIEMOP_STUB(iemOp_orps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x56 - orpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_orpd_Vpd_Wpd);
+/*  Opcode 0xf3 0x0f 0x56 - invalid */
+/*  Opcode 0xf2 0x0f 0x56 - invalid */
+
+/** Opcode      0x0f 0x57 - xorps Vps, Wps */
+FNIEMOP_STUB(iemOp_xorps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x57 - xorpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_xorpd_Vpd_Wpd);
+/*  Opcode 0xf3 0x0f 0x57 - invalid */
+/*  Opcode 0xf2 0x0f 0x57 - invalid */
+
+/** Opcode      0x0f 0x58 - addps Vps, Wps */
+FNIEMOP_STUB(iemOp_addps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x58 - addpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_addpd_Vpd_Wpd);
+/** Opcode 0xf3 0x0f 0x58 - addss Vss, Wss */
+FNIEMOP_STUB(iemOp_addss_Vss_Wss);
+/** Opcode 0xf2 0x0f 0x58 - addsd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_addsd_Vsd_Wsd);
+
+/** Opcode      0x0f 0x59 - mulps Vps, Wps */
+FNIEMOP_STUB(iemOp_mulps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x59 - mulpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_mulpd_Vpd_Wpd);
+/** Opcode 0xf3 0x0f 0x59 - mulss Vss, Wss */
+FNIEMOP_STUB(iemOp_mulss_Vss_Wss);
+/** Opcode 0xf2 0x0f 0x59 - mulsd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_mulsd_Vsd_Wsd);
+
+/** Opcode      0x0f 0x5a - cvtps2pd Vpd, Wps */
+FNIEMOP_STUB(iemOp_cvtps2pd_Vpd_Wps);
+/** Opcode 0x66 0x0f 0x5a - cvtpd2ps Vps, Wpd */
+FNIEMOP_STUB(iemOp_cvtpd2ps_Vps_Wpd);
+/** Opcode 0xf3 0x0f 0x5a - cvtss2sd Vsd, Wss */
+FNIEMOP_STUB(iemOp_cvtss2sd_Vsd_Wss);
+/** Opcode 0xf2 0x0f 0x5a - cvtsd2ss Vss, Wsd */
+FNIEMOP_STUB(iemOp_cvtsd2ss_Vss_Wsd);
+
+/** Opcode      0x0f 0x5b - cvtdq2ps Vps, Wdq */
+FNIEMOP_STUB(iemOp_cvtdq2ps_Vps_Wdq);
+/** Opcode 0x66 0x0f 0x5b - cvtps2dq Vdq, Wps */
+FNIEMOP_STUB(iemOp_cvtps2dq_Vdq_Wps);
+/** Opcode 0xf3 0x0f 0x5b - cvttps2dq Vdq, Wps */
+FNIEMOP_STUB(iemOp_cvttps2dq_Vdq_Wps);
+/*  Opcode 0xf2 0x0f 0x5b - invalid */
+
+/** Opcode      0x0f 0x5c - subps Vps, Wps */
+FNIEMOP_STUB(iemOp_subps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x5c - subpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_subpd_Vpd_Wpd);
+/** Opcode 0xf3 0x0f 0x5c - subss Vss, Wss */
+FNIEMOP_STUB(iemOp_subss_Vss_Wss);
+/** Opcode 0xf2 0x0f 0x5c - subsd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_subsd_Vsd_Wsd);
+
+/** Opcode      0x0f 0x5d - minps Vps, Wps */
+FNIEMOP_STUB(iemOp_minps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x5d - minpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_minpd_Vpd_Wpd);
+/** Opcode 0xf3 0x0f 0x5d - minss Vss, Wss */
+FNIEMOP_STUB(iemOp_minss_Vss_Wss);
+/** Opcode 0xf2 0x0f 0x5d - minsd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_minsd_Vsd_Wsd);
+
+/** Opcode      0x0f 0x5e - divps Vps, Wps */
+FNIEMOP_STUB(iemOp_divps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x5e - divpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_divpd_Vpd_Wpd);
+/** Opcode 0xf3 0x0f 0x5e - divss Vss, Wss */
+FNIEMOP_STUB(iemOp_divss_Vss_Wss);
+/** Opcode 0xf2 0x0f 0x5e - divsd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_divsd_Vsd_Wsd);
+
+/** Opcode      0x0f 0x5f - maxps Vps, Wps */
+FNIEMOP_STUB(iemOp_maxps_Vps_Wps);
+/** Opcode 0x66 0x0f 0x5f - maxpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_maxpd_Vpd_Wpd);
+/** Opcode 0xf3 0x0f 0x5f - maxss Vss, Wss */
+FNIEMOP_STUB(iemOp_maxss_Vss_Wss);
+/** Opcode 0xf2 0x0f 0x5f - maxsd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_maxsd_Vsd_Wsd);
+
+/**
+ * Common worker for MMX instructions on the forms:
+ *      pxxxx mm1, mm2/mem32
+ *
+ * The 2nd operand is the first half of a register, which in the memory case
+ * means a 32-bit memory access for MMX and 128-bit aligned 64-bit or 128-bit
+ * memory accessed for MMX.
+ *
+ * Exceptions type 4.
+ */
+FNIEMOP_DEF_1(iemOpCommonMmx_LowLow_To_Full, PCIEMOPMEDIAF1L1, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(PRTUINT128U,          pDst, 0);
+        IEM_MC_ARG(uint64_t const *,     pSrc, 1);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_XREG_U64_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_FETCH_MEM_U64_ALIGN_U128(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common worker for SSE2 instructions on the forms:
+ *      pxxxx xmm1, xmm2/mem128
+ *
+ * The 2nd operand is the first half of a register, which in the memory case
+ * means a 32-bit memory access for MMX and 128-bit aligned 64-bit or 128-bit
+ * memory accessed for MMX.
+ *
+ * Exceptions type 4.
+ */
+FNIEMOP_DEF_1(iemOpCommonSse_LowLow_To_Full, PCIEMOPMEDIAF1L1, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if (!pImpl->pfnU64)
+        return IEMOP_RAISE_INVALID_OPCODE();
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
+        /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(uint64_t *,          pDst, 0);
+        IEM_MC_ARG(uint32_t const *,    pSrc, 1);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_REF_MREG_U32_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
+        IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_ARG(uint64_t *,                  pDst,       0);
+        IEM_MC_LOCAL(uint32_t,                  uSrc);
+        IEM_MC_ARG_LOCAL_REF(uint32_t const *,  pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_FETCH_MEM_U32(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode      0x0f 0x60 - punpcklbw Pq, Qd */
+FNIEMOP_DEF(iemOp_punpcklbw_Pq_Qd)
+{
+    IEMOP_MNEMONIC(punpcklbw, "punpcklbw Pq, Qd");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_LowLow_To_Full, &g_iemAImpl_punpcklbw);
+}
+
+/** Opcode 0x66 0x0f 0x60 - punpcklbw Vx, W */
+FNIEMOP_DEF(iemOp_punpcklbw_Vx_Wx)
+{
+    IEMOP_MNEMONIC(vpunpcklbw_Vx_Wx, "vpunpcklbw Vx, Wx");
+    return FNIEMOP_CALL_1(iemOpCommonSse_LowLow_To_Full, &g_iemAImpl_punpcklbw);
+}
+
+/*  Opcode 0xf3 0x0f 0x60 - invalid */
+
+
+/** Opcode      0x0f 0x61 - punpcklwd Pq, Qd */
+FNIEMOP_DEF(iemOp_punpcklwd_Pq_Qd)
+{
+    IEMOP_MNEMONIC(punpcklwd, "punpcklwd Pq, Qd"); /** @todo AMD mark the MMX version as 3DNow!. Intel says MMX CPUID req. */
+    return FNIEMOP_CALL_1(iemOpCommonMmx_LowLow_To_Full, &g_iemAImpl_punpcklwd);
+}
+
+/** Opcode 0x66 0x0f 0x61 - punpcklwd Vx, Wx */
+FNIEMOP_DEF(iemOp_punpcklwd_Vx_Wx)
+{
+    IEMOP_MNEMONIC(vpunpcklwd_Vx_Wx, "punpcklwd Vx, Wx");
+    return FNIEMOP_CALL_1(iemOpCommonSse_LowLow_To_Full, &g_iemAImpl_punpcklwd);
+}
+
+/*  Opcode 0xf3 0x0f 0x61 - invalid */
+
+
+/** Opcode      0x0f 0x62 - punpckldq Pq, Qd */
+FNIEMOP_DEF(iemOp_punpckldq_Pq_Qd)
+{
+    IEMOP_MNEMONIC(punpckldq, "punpckldq Pq, Qd");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_LowLow_To_Full, &g_iemAImpl_punpckldq);
+}
+
+/** Opcode 0x66 0x0f 0x62 - punpckldq Vx, Wx */
+FNIEMOP_DEF(iemOp_punpckldq_Vx_Wx)
+{
+    IEMOP_MNEMONIC(punpckldq_Vx_Wx, "punpckldq Vx, Wx");
+    return FNIEMOP_CALL_1(iemOpCommonSse_LowLow_To_Full, &g_iemAImpl_punpckldq);
+}
+
+/*  Opcode 0xf3 0x0f 0x62 - invalid */
+
+
+
+/** Opcode      0x0f 0x63 - packsswb Pq, Qq */
+FNIEMOP_STUB(iemOp_packsswb_Pq_Qq);
+/** Opcode 0x66 0x0f 0x63 - packsswb Vx, Wx */
+FNIEMOP_STUB(iemOp_packsswb_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0x63 - invalid */
+
+/** Opcode      0x0f 0x64 - pcmpgtb Pq, Qq */
+FNIEMOP_STUB(iemOp_pcmpgtb_Pq_Qq);
+/** Opcode 0x66 0x0f 0x64 - pcmpgtb Vx, Wx */
+FNIEMOP_STUB(iemOp_pcmpgtb_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0x64 - invalid */
+
+/** Opcode      0x0f 0x65 - pcmpgtw Pq, Qq */
+FNIEMOP_STUB(iemOp_pcmpgtw_Pq_Qq);
+/** Opcode 0x66 0x0f 0x65 - pcmpgtw Vx, Wx */
+FNIEMOP_STUB(iemOp_pcmpgtw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0x65 - invalid */
+
+/** Opcode      0x0f 0x66 - pcmpgtd Pq, Qq */
+FNIEMOP_STUB(iemOp_pcmpgtd_Pq_Qq);
+/** Opcode 0x66 0x0f 0x66 - pcmpgtd Vx, Wx */
+FNIEMOP_STUB(iemOp_pcmpgtd_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0x66 - invalid */
+
+/** Opcode      0x0f 0x67 - packuswb Pq, Qq */
+FNIEMOP_STUB(iemOp_packuswb_Pq_Qq);
+/** Opcode 0x66 0x0f 0x67 - packuswb Vx, W */
+FNIEMOP_STUB(iemOp_packuswb_Vx_W);
+/*  Opcode 0xf3 0x0f 0x67 - invalid */
+
+
+/**
+ * Common worker for MMX instructions on the form:
+ *      pxxxx mm1, mm2/mem64
+ *
+ * The 2nd operand is the second half of a register, which in the memory case
+ * means a 64-bit memory access for MMX, and for SSE a 128-bit aligned access
+ * where it may read the full 128 bits or only the upper 64 bits.
+ *
+ * Exceptions type 4.
+ */
+FNIEMOP_DEF_1(iemOpCommonMmx_HighHigh_To_Full, PCIEMOPMEDIAF1H1, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    AssertReturn(pImpl->pfnU64, IEMOP_RAISE_INVALID_OPCODE());
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
+        /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(uint64_t *,          pDst, 0);
+        IEM_MC_ARG(uint64_t const *,    pSrc, 1);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
+        IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_ARG(uint64_t *,                  pDst,       0);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common worker for SSE2 instructions on the form:
+ *      pxxxx xmm1, xmm2/mem128
+ *
+ * The 2nd operand is the second half of a register, which in the memory case
+ * means a 64-bit memory access for MMX, and for SSE a 128-bit aligned access
+ * where it may read the full 128 bits or only the upper 64 bits.
+ *
+ * Exceptions type 4.
+ */
+FNIEMOP_DEF_1(iemOpCommonSse_HighHigh_To_Full, PCIEMOPMEDIAF1H1, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(PRTUINT128U,          pDst, 0);
+        IEM_MC_ARG(PCRTUINT128U,         pSrc, 1);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); /* Most CPUs probably only right high qword */
+
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode      0x0f 0x68 - punpckhbw Pq, Qd */
+FNIEMOP_DEF(iemOp_punpckhbw_Pq_Qd)
+{
+    IEMOP_MNEMONIC(punpckhbw, "punpckhbw Pq, Qd");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_HighHigh_To_Full, &g_iemAImpl_punpckhbw);
+}
+
+/** Opcode 0x66 0x0f 0x68 - punpckhbw Vx, Wx */
+FNIEMOP_DEF(iemOp_punpckhbw_Vx_Wx)
+{
+    IEMOP_MNEMONIC(vpunpckhbw_Vx_Wx, "vpunpckhbw Vx, Wx");
+    return FNIEMOP_CALL_1(iemOpCommonSse_HighHigh_To_Full, &g_iemAImpl_punpckhbw);
+}
+/*  Opcode 0xf3 0x0f 0x68 - invalid */
+
+
+/** Opcode      0x0f 0x69 - punpckhwd Pq, Qd */
+FNIEMOP_DEF(iemOp_punpckhwd_Pq_Qd)
+{
+    IEMOP_MNEMONIC(punpckhwd, "punpckhwd Pq, Qd");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_HighHigh_To_Full, &g_iemAImpl_punpckhwd);
+}
+
+/** Opcode 0x66 0x0f 0x69 - punpckhwd Vx, Hx, Wx */
+FNIEMOP_DEF(iemOp_punpckhwd_Vx_Wx)
+{
+    IEMOP_MNEMONIC(punpckhwd_Vx_Wx, "punpckhwd Vx, Wx");
+    return FNIEMOP_CALL_1(iemOpCommonSse_HighHigh_To_Full, &g_iemAImpl_punpckhwd);
+
+}
+/*  Opcode 0xf3 0x0f 0x69 - invalid */
+
+
+/** Opcode      0x0f 0x6a - punpckhdq Pq, Qd */
+FNIEMOP_DEF(iemOp_punpckhdq_Pq_Qd)
+{
+    IEMOP_MNEMONIC(punpckhdq, "punpckhdq Pq, Qd");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_HighHigh_To_Full, &g_iemAImpl_punpckhdq);
+}
+
+/** Opcode 0x66 0x0f 0x6a - punpckhdq Vx, W */
+FNIEMOP_DEF(iemOp_punpckhdq_Vx_W)
+{
+    IEMOP_MNEMONIC(punpckhdq_Vx_W, "punpckhdq Vx, W");
+    return FNIEMOP_CALL_1(iemOpCommonSse_HighHigh_To_Full, &g_iemAImpl_punpckhdq);
+}
+/*  Opcode 0xf3 0x0f 0x6a - invalid */
+
+
+/** Opcode      0x0f 0x6b - packssdw Pq, Qd */
+FNIEMOP_STUB(iemOp_packssdw_Pq_Qd);
+/** Opcode 0x66 0x0f 0x6b - packssdw Vx, Wx */
+FNIEMOP_STUB(iemOp_packssdw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0x6b - invalid */
+
+
+/*  Opcode      0x0f 0x6c - invalid */
+
+/** Opcode 0x66 0x0f 0x6c - punpcklqdq Vx, Wx */
+FNIEMOP_DEF(iemOp_punpcklqdq_Vx_Wx)
+{
+    IEMOP_MNEMONIC(punpcklqdq, "punpcklqdq Vx, Wx");
+    return FNIEMOP_CALL_1(iemOpCommonSse_LowLow_To_Full, &g_iemAImpl_punpcklqdq);
+}
+
+/*  Opcode 0xf3 0x0f 0x6c - invalid */
+/*  Opcode 0xf2 0x0f 0x6c - invalid */
+
+
+/*  Opcode      0x0f 0x6d - invalid */
+
+/** Opcode 0x66 0x0f 0x6d - punpckhqdq Vx, W */
+FNIEMOP_DEF(iemOp_punpckhqdq_Vx_W)
+{
+    IEMOP_MNEMONIC(punpckhqdq_Vx_W, "punpckhqdq Vx,W");
+    return FNIEMOP_CALL_1(iemOpCommonSse_HighHigh_To_Full, &g_iemAImpl_punpckhqdq);
+}
+
+/*  Opcode 0xf3 0x0f 0x6d - invalid */
+
+
+/** Opcode      0x0f 0x6e - movd/q Pd, Ey */
+FNIEMOP_DEF(iemOp_movd_q_Pd_Ey)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        IEMOP_MNEMONIC(movq_Pq_Eq, "movq Pq,Eq");
+    else
+        IEMOP_MNEMONIC(movd_Pd_Ed, "movd Pd,Ed");
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* MMX, greg */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
+        IEM_MC_LOCAL(uint64_t, u64Tmp);
+        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+            IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        else
+            IEM_MC_FETCH_GREG_U32_ZX_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* MMX, [mem] */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
+        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        {
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+            IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
+        }
+        else
+        {
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+            IEM_MC_STORE_MREG_U32_ZX_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u32Tmp);
+        }
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x66 0x0f 0x6e - movd/q Vy, Ey */
+FNIEMOP_DEF(iemOp_movd_q_Vy_Ey)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        IEMOP_MNEMONIC(movdq_Wq_Eq, "movq Wq,Eq");
+    else
+        IEMOP_MNEMONIC(movdq_Wd_Ed, "movd Wd,Ed");
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* XMM, greg*/
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        {
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+            IEM_MC_STORE_XREG_U64_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
+        }
+        else
+        {
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+            IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
+        }
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* XMM, [mem] */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT(); /** @todo order */
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        {
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+            IEM_MC_STORE_XREG_U64_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
+        }
+        else
+        {
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+            IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
+        }
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/*  Opcode 0xf3 0x0f 0x6e - invalid */
+
+
+/** Opcode      0x0f 0x6f - movq Pq, Qq */
+FNIEMOP_DEF(iemOp_movq_Pq_Qq)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_MNEMONIC(movq_Pq_Qq, "movq Pq,Qq");
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
+        /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint64_t, u64Tmp);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_MREG_U64(u64Tmp, bRm & X86_MODRM_RM_MASK);
+        IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t, u64Tmp);
+        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_STORE_MREG_U64((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK, u64Tmp);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x66 0x0f 0x6f - movdqa Vx, Wx */
+FNIEMOP_DEF(iemOp_movdqa_Vx_Wx)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_MNEMONIC(movdqa_Vdq_Wdq, "movdqa Vdq,Wdq");
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
+                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U, u128Tmp);
+        IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(u128Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u128Tmp);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0xf3 0x0f 0x6f - movdqu Vx, Wx */
+FNIEMOP_DEF(iemOp_movdqu_Vx_Wx)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_MNEMONIC(movdqu_Vdq_Wdq, "movdqu Vdq,Wdq");
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
+                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U, u128Tmp);
+        IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_MEM_U128(u128Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u128Tmp);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode      0x0f 0x70 - pshufw Pq, Qq, Ib */
+FNIEMOP_DEF(iemOp_pshufw_Pq_Qq_Ib)
+{
+    IEMOP_MNEMONIC(pshufw_Pq_Qq, "pshufw Pq,Qq,Ib");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint64_t *,          pDst, 0);
+        IEM_MC_ARG(uint64_t const *,    pSrc, 1);
+        IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
+        IEM_MC_CALL_MMX_AIMPL_3(iemAImpl_pshufw, pDst, pSrc, bEvilArg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(uint64_t *,                  pDst,       0);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+        IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
+
+        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_CALL_MMX_AIMPL_3(iemAImpl_pshufw, pDst, pSrc, bEvilArg);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x66 0x0f 0x70 - pshufd Vx, Wx, Ib */
+FNIEMOP_DEF(iemOp_pshufd_Vx_Wx_Ib)
+{
+    IEMOP_MNEMONIC(pshufd_Vx_Wx_Ib, "pshufd Vx,Wx,Ib");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(PRTUINT128U,         pDst, 0);
+        IEM_MC_ARG(PCRTUINT128U,        pSrc, 1);
+        IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshufd, pDst, pSrc, bEvilArg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+        IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshufd, pDst, pSrc, bEvilArg);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0xf3 0x0f 0x70 - pshufhw Vx, Wx, Ib */
+FNIEMOP_DEF(iemOp_pshufhw_Vx_Wx_Ib)
+{
+    IEMOP_MNEMONIC(pshufhw_Vx_Wx_Ib, "pshufhw Vx,Wx,Ib");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(PRTUINT128U,         pDst, 0);
+        IEM_MC_ARG(PCRTUINT128U,        pSrc, 1);
+        IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshufhw, pDst, pSrc, bEvilArg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+        IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshufhw, pDst, pSrc, bEvilArg);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0xf2 0x0f 0x70 - pshuflw Vx, Wx, Ib */
+FNIEMOP_DEF(iemOp_pshuflw_Vx_Wx_Ib)
+{
+    IEMOP_MNEMONIC(pshuflw_Vx_Wx_Ib, "pshuflw Vx,Wx,Ib");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(PRTUINT128U,         pDst, 0);
+        IEM_MC_ARG(PCRTUINT128U,        pSrc, 1);
+        IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshuflw, pDst, pSrc, bEvilArg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+        IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshuflw, pDst, pSrc, bEvilArg);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x71 11/2. */
+FNIEMOP_STUB_1(iemOp_Grp12_psrlw_Nq_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x71 11/2. */
+FNIEMOP_STUB_1(iemOp_Grp12_psrlw_Ux_Ib, uint8_t, bRm);
+
+/** Opcode 0x0f 0x71 11/4. */
+FNIEMOP_STUB_1(iemOp_Grp12_psraw_Nq_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x71 11/4. */
+FNIEMOP_STUB_1(iemOp_Grp12_psraw_Ux_Ib, uint8_t, bRm);
+
+/** Opcode 0x0f 0x71 11/6. */
+FNIEMOP_STUB_1(iemOp_Grp12_psllw_Nq_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x71 11/6. */
+FNIEMOP_STUB_1(iemOp_Grp12_psllw_Ux_Ib, uint8_t, bRm);
+
+
+/**
+ * Group 12 jump table for register variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup12RegReg[] =
+{
+    /* /0 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /1 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /2 */ iemOp_Grp12_psrlw_Nq_Ib,   iemOp_Grp12_psrlw_Ux_Ib,    iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /3 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /4 */ iemOp_Grp12_psraw_Nq_Ib,   iemOp_Grp12_psraw_Ux_Ib,    iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /5 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /6 */ iemOp_Grp12_psllw_Nq_Ib,   iemOp_Grp12_psllw_Ux_Ib,    iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /7 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8)
+};
+AssertCompile(RT_ELEMENTS(g_apfnGroup12RegReg) == 8*4);
+
+
+/** Opcode 0x0f 0x71. */
+FNIEMOP_DEF(iemOp_Grp12)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(g_apfnGroup12RegReg[  ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                  + pVCpu->iem.s.idxPrefix], bRm);
+    return FNIEMOP_CALL_1(iemOp_InvalidWithRMNeedImm8, bRm);
+}
+
+
+/** Opcode 0x0f 0x72 11/2. */
+FNIEMOP_STUB_1(iemOp_Grp13_psrld_Nq_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x72 11/2. */
+FNIEMOP_STUB_1(iemOp_Grp13_psrld_Ux_Ib, uint8_t, bRm);
+
+/** Opcode 0x0f 0x72 11/4. */
+FNIEMOP_STUB_1(iemOp_Grp13_psrad_Nq_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x72 11/4. */
+FNIEMOP_STUB_1(iemOp_Grp13_psrad_Ux_Ib, uint8_t, bRm);
+
+/** Opcode 0x0f 0x72 11/6. */
+FNIEMOP_STUB_1(iemOp_Grp13_pslld_Nq_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x72 11/6. */
+FNIEMOP_STUB_1(iemOp_Grp13_pslld_Ux_Ib, uint8_t, bRm);
+
+
+/**
+ * Group 13 jump table for register variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup13RegReg[] =
+{
+    /* /0 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /1 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /2 */ iemOp_Grp13_psrld_Nq_Ib,   iemOp_Grp13_psrld_Ux_Ib,    iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /3 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /4 */ iemOp_Grp13_psrad_Nq_Ib,   iemOp_Grp13_psrad_Ux_Ib,    iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /5 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /6 */ iemOp_Grp13_pslld_Nq_Ib,   iemOp_Grp13_pslld_Ux_Ib,    iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /7 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8)
+};
+AssertCompile(RT_ELEMENTS(g_apfnGroup13RegReg) == 8*4);
+
+/** Opcode 0x0f 0x72. */
+FNIEMOP_DEF(iemOp_Grp13)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(g_apfnGroup13RegReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                  + pVCpu->iem.s.idxPrefix], bRm);
+    return FNIEMOP_CALL_1(iemOp_InvalidWithRMNeedImm8, bRm);
+}
+
+
+/** Opcode 0x0f 0x73 11/2. */
+FNIEMOP_STUB_1(iemOp_Grp14_psrlq_Nq_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x73 11/2. */
+FNIEMOP_STUB_1(iemOp_Grp14_psrlq_Ux_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x73 11/3. */
+FNIEMOP_STUB_1(iemOp_Grp14_psrldq_Ux_Ib, uint8_t, bRm); //NEXT
+
+/** Opcode 0x0f 0x73 11/6. */
+FNIEMOP_STUB_1(iemOp_Grp14_psllq_Nq_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x73 11/6. */
+FNIEMOP_STUB_1(iemOp_Grp14_psllq_Ux_Ib, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0x73 11/7. */
+FNIEMOP_STUB_1(iemOp_Grp14_pslldq_Ux_Ib, uint8_t, bRm); //NEXT
+
+/**
+ * Group 14 jump table for register variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup14RegReg[] =
+{
+    /* /0 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /1 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /2 */ iemOp_Grp14_psrlq_Nq_Ib,     iemOp_Grp14_psrlq_Ux_Ib,  iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /3 */ iemOp_InvalidWithRMNeedImm8, iemOp_Grp14_psrldq_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /4 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /5 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /6 */ iemOp_Grp14_psllq_Nq_Ib,     iemOp_Grp14_psllq_Ux_Ib,  iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /7 */ iemOp_InvalidWithRMNeedImm8, iemOp_Grp14_pslldq_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+};
+AssertCompile(RT_ELEMENTS(g_apfnGroup14RegReg) == 8*4);
+
+
+/** Opcode 0x0f 0x73. */
+FNIEMOP_DEF(iemOp_Grp14)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(g_apfnGroup14RegReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                  + pVCpu->iem.s.idxPrefix], bRm);
+    return FNIEMOP_CALL_1(iemOp_InvalidWithRMNeedImm8, bRm);
+}
+
+
+/**
+ * Common worker for MMX instructions on the form:
+ *      pxxx    mm1, mm2/mem64
+ */
+FNIEMOP_DEF_1(iemOpCommonMmx_FullFull_To_Full, PCIEMOPMEDIAF2, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
+        /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(uint64_t *,          pDst, 0);
+        IEM_MC_ARG(uint64_t const *,    pSrc, 1);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
+        IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_ARG(uint64_t *,                  pDst,       0);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_ARG_LOCAL_REF(uint64_t const *,  pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common worker for SSE2 instructions on the forms:
+ *      pxxx    xmm1, xmm2/mem128
+ *
+ * Proper alignment of the 128-bit operand is enforced.
+ * Exceptions type 4. SSE2 cpuid checks.
+ */
+FNIEMOP_DEF_1(iemOpCommonSse2_FullFull_To_Full, PCIEMOPMEDIAF2, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(PRTUINT128U,          pDst, 0);
+        IEM_MC_ARG(PCRTUINT128U,         pSrc, 1);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode      0x0f 0x74 - pcmpeqb Pq, Qq */
+FNIEMOP_DEF(iemOp_pcmpeqb_Pq_Qq)
+{
+    IEMOP_MNEMONIC(pcmpeqb, "pcmpeqb");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_FullFull_To_Full, &g_iemAImpl_pcmpeqb);
+}
+
+/** Opcode 0x66 0x0f 0x74 - pcmpeqb Vx, Wx */
+FNIEMOP_DEF(iemOp_pcmpeqb_Vx_Wx)
+{
+    IEMOP_MNEMONIC(vpcmpeqb_Vx_Wx, "pcmpeqb");
+    return FNIEMOP_CALL_1(iemOpCommonSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqb);
+}
+
+/*  Opcode 0xf3 0x0f 0x74 - invalid */
+/*  Opcode 0xf2 0x0f 0x74 - invalid */
+
+
+/** Opcode      0x0f 0x75 - pcmpeqw Pq, Qq */
+FNIEMOP_DEF(iemOp_pcmpeqw_Pq_Qq)
+{
+    IEMOP_MNEMONIC(pcmpeqw, "pcmpeqw");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_FullFull_To_Full, &g_iemAImpl_pcmpeqw);
+}
+
+/** Opcode 0x66 0x0f 0x75 - pcmpeqw Vx, Wx */
+FNIEMOP_DEF(iemOp_pcmpeqw_Vx_Wx)
+{
+    IEMOP_MNEMONIC(pcmpeqw_Vx_Wx, "pcmpeqw");
+    return FNIEMOP_CALL_1(iemOpCommonSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqw);
+}
+
+/*  Opcode 0xf3 0x0f 0x75 - invalid */
+/*  Opcode 0xf2 0x0f 0x75 - invalid */
+
+
+/** Opcode      0x0f 0x76 - pcmpeqd Pq, Qq */
+FNIEMOP_DEF(iemOp_pcmpeqd_Pq_Qq)
+{
+    IEMOP_MNEMONIC(pcmpeqd, "pcmpeqd");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_FullFull_To_Full, &g_iemAImpl_pcmpeqd);
+}
+
+/** Opcode 0x66 0x0f 0x76 - pcmpeqd Vx, Wx */
+FNIEMOP_DEF(iemOp_pcmpeqd_Vx_Wx)
+{
+    IEMOP_MNEMONIC(pcmpeqd_Vx_Wx, "vpcmpeqd");
+    return FNIEMOP_CALL_1(iemOpCommonSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqd);
+}
+
+/*  Opcode 0xf3 0x0f 0x76 - invalid */
+/*  Opcode 0xf2 0x0f 0x76 - invalid */
+
+
+/** Opcode      0x0f 0x77 - emms (vex has vzeroall and vzeroupper here) */
+FNIEMOP_STUB(iemOp_emms);
+/*  Opcode 0x66 0x0f 0x77 - invalid */
+/*  Opcode 0xf3 0x0f 0x77 - invalid */
+/*  Opcode 0xf2 0x0f 0x77 - invalid */
+
+/** Opcode      0x0f 0x78 - VMREAD Ey, Gy */
+FNIEMOP_STUB(iemOp_vmread_Ey_Gy);
+/*  Opcode 0x66 0x0f 0x78 - AMD Group 17 */
+FNIEMOP_STUB(iemOp_AmdGrp17);
+/*  Opcode 0xf3 0x0f 0x78 - invalid */
+/*  Opcode 0xf2 0x0f 0x78 - invalid */
+
+/** Opcode      0x0f 0x79 - VMWRITE Gy, Ey */
+FNIEMOP_STUB(iemOp_vmwrite_Gy_Ey);
+/*  Opcode 0x66 0x0f 0x79 - invalid */
+/*  Opcode 0xf3 0x0f 0x79 - invalid */
+/*  Opcode 0xf2 0x0f 0x79 - invalid */
+
+/*  Opcode      0x0f 0x7a - invalid */
+/*  Opcode 0x66 0x0f 0x7a - invalid */
+/*  Opcode 0xf3 0x0f 0x7a - invalid */
+/*  Opcode 0xf2 0x0f 0x7a - invalid */
+
+/*  Opcode      0x0f 0x7b - invalid */
+/*  Opcode 0x66 0x0f 0x7b - invalid */
+/*  Opcode 0xf3 0x0f 0x7b - invalid */
+/*  Opcode 0xf2 0x0f 0x7b - invalid */
+
+/*  Opcode      0x0f 0x7c - invalid */
+/** Opcode 0x66 0x0f 0x7c - haddpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_haddpd_Vpd_Wpd);
+/*  Opcode 0xf3 0x0f 0x7c - invalid */
+/** Opcode 0xf2 0x0f 0x7c - haddps Vps, Wps */
+FNIEMOP_STUB(iemOp_haddps_Vps_Wps);
+
+/*  Opcode      0x0f 0x7d - invalid */
+/** Opcode 0x66 0x0f 0x7d - hsubpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_hsubpd_Vpd_Wpd);
+/*  Opcode 0xf3 0x0f 0x7d - invalid */
+/** Opcode 0xf2 0x0f 0x7d - hsubps Vps, Wps */
+FNIEMOP_STUB(iemOp_hsubps_Vps_Wps);
+
+
+/** Opcode      0x0f 0x7e - movd_q Ey, Pd */
+FNIEMOP_DEF(iemOp_movd_q_Ey_Pd)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        IEMOP_MNEMONIC(movq_Eq_Pq, "movq Eq,Pq");
+    else
+        IEMOP_MNEMONIC(movd_Ed_Pd, "movd Ed,Pd");
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* greg, MMX */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
+        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        {
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+            IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
+        }
+        else
+        {
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_MREG_U32(u32Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+            IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
+        }
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* [mem], MMX */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
+        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        {
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+            IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
+        }
+        else
+        {
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_MREG_U32(u32Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+            IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u32Tmp);
+        }
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x66 0x0f 0x7e - movd_q Ey, Vy */
+FNIEMOP_DEF(iemOp_movd_q_Ey_Vy)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        IEMOP_MNEMONIC(movq_Eq_Wq, "movq Eq,Wq");
+    else
+        IEMOP_MNEMONIC(movd_Ed_Wd, "movd Ed,Wd");
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* greg, XMM */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        {
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_XREG_U64(u64Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+            IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
+        }
+        else
+        {
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_XREG_U32(u32Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+            IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
+        }
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* [mem], XMM */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        {
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_XREG_U64(u64Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+            IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
+        }
+        else
+        {
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_XREG_U32(u32Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+            IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u32Tmp);
+        }
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0xf3 0x0f 0x7e - movq Vq, Wq */
+FNIEMOP_STUB(iemOp_movq_Vq_Wq);
+/*  Opcode 0xf2 0x0f 0x7e - invalid */
+
+
+/** Opcode      0x0f 0x7f - movq Qq, Pq */
+FNIEMOP_DEF(iemOp_movq_Qq_Pq)
+{
+    IEMOP_MNEMONIC(movq_Qq_Pq, "movq Qq,Pq");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
+        /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint64_t, u64Tmp);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
+        IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_STORE_MREG_U64(bRm & X86_MODRM_RM_MASK, u64Tmp);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t, u64Tmp);
+        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
+
+        IEM_MC_FETCH_MREG_U64(u64Tmp, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x66 0x0f 0x7f - movdqa Wx,Vx */
+FNIEMOP_DEF(iemOp_movdqa_Wx_Vx)
+{
+    IEMOP_MNEMONIC(movdqa_Wdq_Vdq, "movdqa Wx,Vx");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U, u128Tmp);
+        IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U128(u128Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u128Tmp);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0xf3 0x0f 0x7f - movdqu Wx,Vx */
+FNIEMOP_DEF(iemOp_movdqu_Wx_Vx)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_MNEMONIC(movdqu_Wdq_Vdq, "movdqu Wx,Vx");
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U, u128Tmp);
+        IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U128(u128Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u128Tmp);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/*  Opcode 0xf2 0x0f 0x7f - invalid */
+
+
+
+/** Opcode 0x0f 0x80. */
+FNIEMOP_DEF(iemOp_jo_Jv)
+{
+    IEMOP_MNEMONIC(jo_Jv, "jo  Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x81. */
+FNIEMOP_DEF(iemOp_jno_Jv)
+{
+    IEMOP_MNEMONIC(jno_Jv, "jno Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x82. */
+FNIEMOP_DEF(iemOp_jc_Jv)
+{
+    IEMOP_MNEMONIC(jc_Jv, "jc/jb/jnae Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x83. */
+FNIEMOP_DEF(iemOp_jnc_Jv)
+{
+    IEMOP_MNEMONIC(jnc_Jv, "jnc/jnb/jae Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x84. */
+FNIEMOP_DEF(iemOp_je_Jv)
+{
+    IEMOP_MNEMONIC(je_Jv, "je/jz Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x85. */
+FNIEMOP_DEF(iemOp_jne_Jv)
+{
+    IEMOP_MNEMONIC(jne_Jv, "jne/jnz Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x86. */
+FNIEMOP_DEF(iemOp_jbe_Jv)
+{
+    IEMOP_MNEMONIC(jbe_Jv, "jbe/jna Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x87. */
+FNIEMOP_DEF(iemOp_jnbe_Jv)
+{
+    IEMOP_MNEMONIC(ja_Jv, "jnbe/ja Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x88. */
+FNIEMOP_DEF(iemOp_js_Jv)
+{
+    IEMOP_MNEMONIC(js_Jv, "js  Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x89. */
+FNIEMOP_DEF(iemOp_jns_Jv)
+{
+    IEMOP_MNEMONIC(jns_Jv, "jns Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8a. */
+FNIEMOP_DEF(iemOp_jp_Jv)
+{
+    IEMOP_MNEMONIC(jp_Jv, "jp  Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8b. */
+FNIEMOP_DEF(iemOp_jnp_Jv)
+{
+    IEMOP_MNEMONIC(jnp_Jv, "jnp Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8c. */
+FNIEMOP_DEF(iemOp_jl_Jv)
+{
+    IEMOP_MNEMONIC(jl_Jv, "jl/jnge Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8d. */
+FNIEMOP_DEF(iemOp_jnl_Jv)
+{
+    IEMOP_MNEMONIC(jge_Jv, "jnl/jge Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8e. */
+FNIEMOP_DEF(iemOp_jle_Jv)
+{
+    IEMOP_MNEMONIC(jle_Jv, "jle/jng Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8f. */
+FNIEMOP_DEF(iemOp_jnle_Jv)
+{
+    IEMOP_MNEMONIC(jg_Jv, "jnle/jg Jv");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pVCpu->iem.s.enmEffOpSize == IEMMODE_16BIT)
+    {
+        int16_t i16Imm; IEM_OPCODE_GET_NEXT_S16(&i16Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16(i16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        int32_t i32Imm; IEM_OPCODE_GET_NEXT_S32(&i32Imm);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32(i32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x90. */
+FNIEMOP_DEF(iemOp_seto_Eb)
+{
+    IEMOP_MNEMONIC(seto_Eb, "seto Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x91. */
+FNIEMOP_DEF(iemOp_setno_Eb)
+{
+    IEMOP_MNEMONIC(setno_Eb, "setno Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x92. */
+FNIEMOP_DEF(iemOp_setc_Eb)
+{
+    IEMOP_MNEMONIC(setc_Eb, "setc Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x93. */
+FNIEMOP_DEF(iemOp_setnc_Eb)
+{
+    IEMOP_MNEMONIC(setnc_Eb, "setnc Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x94. */
+FNIEMOP_DEF(iemOp_sete_Eb)
+{
+    IEMOP_MNEMONIC(sete_Eb, "sete Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x95. */
+FNIEMOP_DEF(iemOp_setne_Eb)
+{
+    IEMOP_MNEMONIC(setne_Eb, "setne Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x96. */
+FNIEMOP_DEF(iemOp_setbe_Eb)
+{
+    IEMOP_MNEMONIC(setbe_Eb, "setbe Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x97. */
+FNIEMOP_DEF(iemOp_setnbe_Eb)
+{
+    IEMOP_MNEMONIC(setnbe_Eb, "setnbe Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x98. */
+FNIEMOP_DEF(iemOp_sets_Eb)
+{
+    IEMOP_MNEMONIC(sets_Eb, "sets Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x99. */
+FNIEMOP_DEF(iemOp_setns_Eb)
+{
+    IEMOP_MNEMONIC(setns_Eb, "setns Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x9a. */
+FNIEMOP_DEF(iemOp_setp_Eb)
+{
+    IEMOP_MNEMONIC(setp_Eb, "setp Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x9b. */
+FNIEMOP_DEF(iemOp_setnp_Eb)
+{
+    IEMOP_MNEMONIC(setnp_Eb, "setnp Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x9c. */
+FNIEMOP_DEF(iemOp_setl_Eb)
+{
+    IEMOP_MNEMONIC(setl_Eb, "setl Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x9d. */
+FNIEMOP_DEF(iemOp_setnl_Eb)
+{
+    IEMOP_MNEMONIC(setnl_Eb, "setnl Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x9e. */
+FNIEMOP_DEF(iemOp_setle_Eb)
+{
+    IEMOP_MNEMONIC(setle_Eb, "setle Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x9f. */
+FNIEMOP_DEF(iemOp_setnle_Eb)
+{
+    IEMOP_MNEMONIC(setnle_Eb, "setnle Eb");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Encoding test: Check if the 'reg' field is ignored or decoded in
+     *        any way. AMD says it's "unused", whatever that means.  We're
+     *        ignoring for now. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_GREG_U8_CONST((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        } IEM_MC_ELSE() {
+            IEM_MC_STORE_MEM_U8_CONST(pVCpu->iem.s.iEffSeg, GCPtrEffDst, 1);
+        } IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common 'push segment-register' helper.
+ */
+FNIEMOP_DEF_1(iemOpCommonPushSReg, uint8_t, iReg)
+{
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    Assert(iReg < X86_SREG_FS || pVCpu->iem.s.enmCpuMode != IEMMODE_64BIT);
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    switch (pVCpu->iem.s.enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint16_t, u16Value);
+            IEM_MC_FETCH_SREG_U16(u16Value, iReg);
+            IEM_MC_PUSH_U16(u16Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint32_t, u32Value);
+            IEM_MC_FETCH_SREG_ZX_U32(u32Value, iReg);
+            IEM_MC_PUSH_U32_SREG(u32Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint64_t, u64Value);
+            IEM_MC_FETCH_SREG_ZX_U64(u64Value, iReg);
+            IEM_MC_PUSH_U64(u64Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0xa0. */
+FNIEMOP_DEF(iemOp_push_fs)
+{
+    IEMOP_MNEMONIC(push_fs, "push fs");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_FS);
+}
+
+
+/** Opcode 0x0f 0xa1. */
+FNIEMOP_DEF(iemOp_pop_fs)
+{
+    IEMOP_MNEMONIC(pop_fs, "pop fs");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_FS, pVCpu->iem.s.enmEffOpSize);
+}
+
+
+/** Opcode 0x0f 0xa2. */
+FNIEMOP_DEF(iemOp_cpuid)
+{
+    IEMOP_MNEMONIC(cpuid, "cpuid");
+    IEMOP_HLP_MIN_486(); /* not all 486es. */
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_cpuid);
+}
+
+
+/**
+ * Common worker for iemOp_bt_Ev_Gv, iemOp_btc_Ev_Gv, iemOp_btr_Ev_Gv and
+ * iemOp_bts_Ev_Gv.
+ */
+FNIEMOP_DEF_1(iemOpCommonBit_Ev_Gv, PCIEMOPBINSIZES, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register destination. */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+                IEM_MC_ARG(uint16_t,        u16Src,                 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_AND_LOCAL_U16(u16Src, 0xf);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+                IEM_MC_ARG(uint32_t,        u32Src,                 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_AND_LOCAL_U32(u32Src, 0x1f);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+                IEM_MC_ARG(uint64_t,        u64Src,                 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_AND_LOCAL_U64(u64Src, 0x3f);
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* memory destination. */
+
+        uint32_t fAccess;
+        if (pImpl->pfnLockedU16)
+            fAccess = IEM_ACCESS_DATA_RW;
+        else /* BT */
+            fAccess = IEM_ACCESS_DATA_R;
+
+        /** @todo test negative bit offsets! */
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
+                IEM_MC_ARG(uint16_t,                u16Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+                IEM_MC_LOCAL(int16_t,               i16AddrAdj);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                if (pImpl->pfnLockedU16)
+                    IEMOP_HLP_DONE_DECODING();
+                else
+                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_ASSIGN(i16AddrAdj, u16Src);
+                IEM_MC_AND_ARG_U16(u16Src, 0x0f);
+                IEM_MC_SAR_LOCAL_S16(i16AddrAdj, 4);
+                IEM_MC_SHL_LOCAL_S16(i16AddrAdj, 1);
+                IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(GCPtrEffDst, i16AddrAdj);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+
+                IEM_MC_MEM_MAP(pu16Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
+
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
+                IEM_MC_ARG(uint32_t,                u32Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+                IEM_MC_LOCAL(int32_t,               i32AddrAdj);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                if (pImpl->pfnLockedU16)
+                    IEMOP_HLP_DONE_DECODING();
+                else
+                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_ASSIGN(i32AddrAdj, u32Src);
+                IEM_MC_AND_ARG_U32(u32Src, 0x1f);
+                IEM_MC_SAR_LOCAL_S32(i32AddrAdj, 5);
+                IEM_MC_SHL_LOCAL_S32(i32AddrAdj, 2);
+                IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(GCPtrEffDst, i32AddrAdj);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+
+                IEM_MC_MEM_MAP(pu32Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
+
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
+                IEM_MC_ARG(uint64_t,                u64Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+                IEM_MC_LOCAL(int64_t,               i64AddrAdj);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                if (pImpl->pfnLockedU16)
+                    IEMOP_HLP_DONE_DECODING();
+                else
+                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_ASSIGN(i64AddrAdj, u64Src);
+                IEM_MC_AND_ARG_U64(u64Src, 0x3f);
+                IEM_MC_SAR_LOCAL_S64(i64AddrAdj, 6);
+                IEM_MC_SHL_LOCAL_S64(i64AddrAdj, 3);
+                IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(GCPtrEffDst, i64AddrAdj);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+
+                IEM_MC_MEM_MAP(pu64Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
+
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/** Opcode 0x0f 0xa3. */
+FNIEMOP_DEF(iemOp_bt_Ev_Gv)
+{
+    IEMOP_MNEMONIC(bt_Ev_Gv, "bt  Ev,Gv");
+    IEMOP_HLP_MIN_386();
+    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_bt);
+}
+
+
+/**
+ * Common worker for iemOp_shrd_Ev_Gv_Ib and iemOp_shld_Ev_Gv_Ib.
+ */
+FNIEMOP_DEF_1(iemOpCommonShldShrd_Ib, PCIEMOPSHIFTDBLSIZES, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF | X86_EFL_OF);
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+                IEM_MC_ARG(uint16_t,        u16Src,                 1);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+                IEM_MC_ARG(uint32_t,        u32Src,                 1);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
+
+                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+                IEM_MC_ARG(uint64_t,        u64Src,                 1);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, /*=*/cShift, 2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(4, 2);
+                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
+                IEM_MC_ARG(uint16_t,                u16Src,                 1);
+                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
+                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
+                IEM_MC_ASSIGN(cShiftArg, cShift);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(4, 2);
+                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
+                IEM_MC_ARG(uint32_t,                u32Src,                 1);
+                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
+                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
+                IEM_MC_ASSIGN(cShiftArg, cShift);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(4, 2);
+                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
+                IEM_MC_ARG(uint64_t,                u64Src,                 1);
+                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
+                uint8_t cShift; IEM_OPCODE_GET_NEXT_U8(&cShift);
+                IEM_MC_ASSIGN(cShiftArg, cShift);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/**
+ * Common worker for iemOp_shrd_Ev_Gv_CL and iemOp_shld_Ev_Gv_CL.
+ */
+FNIEMOP_DEF_1(iemOpCommonShldShrd_CL, PCIEMOPSHIFTDBLSIZES, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_AF | X86_EFL_OF);
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+                IEM_MC_ARG(uint16_t,        u16Src,                 1);
+                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+                IEM_MC_ARG(uint32_t,        u32Src,                 1);
+                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
+
+                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+                IEM_MC_ARG(uint64_t,        u64Src,                 1);
+                IEM_MC_ARG(uint8_t,         cShiftArg,              2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(4, 2);
+                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
+                IEM_MC_ARG(uint16_t,                u16Src,                 1);
+                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU16, pu16Dst, u16Src, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(4, 2);
+                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
+                IEM_MC_ARG(uint32_t,                u32Src,                 1);
+                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU32, pu32Dst, u32Src, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(4, 2);
+                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
+                IEM_MC_ARG(uint64_t,                u64Src,                 1);
+                IEM_MC_ARG(uint8_t,                 cShiftArg,              2);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_CALL_VOID_AIMPL_4(pImpl->pfnNormalU64, pu64Dst, u64Src, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+
+/** Opcode 0x0f 0xa4. */
+FNIEMOP_DEF(iemOp_shld_Ev_Gv_Ib)
+{
+    IEMOP_MNEMONIC(shld_Ev_Gv_Ib, "shld Ev,Gv,Ib");
+    IEMOP_HLP_MIN_386();
+    return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, &g_iemAImpl_shld);
+}
+
+
+/** Opcode 0x0f 0xa5. */
+FNIEMOP_DEF(iemOp_shld_Ev_Gv_CL)
+{
+    IEMOP_MNEMONIC(shld_Ev_Gv_CL, "shld Ev,Gv,CL");
+    IEMOP_HLP_MIN_386();
+    return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, &g_iemAImpl_shld);
+}
+
+
+/** Opcode 0x0f 0xa8. */
+FNIEMOP_DEF(iemOp_push_gs)
+{
+    IEMOP_MNEMONIC(push_gs, "push gs");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_GS);
+}
+
+
+/** Opcode 0x0f 0xa9. */
+FNIEMOP_DEF(iemOp_pop_gs)
+{
+    IEMOP_MNEMONIC(pop_gs, "pop gs");
+    IEMOP_HLP_MIN_386();
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_pop_Sreg, X86_SREG_GS, pVCpu->iem.s.enmEffOpSize);
+}
+
+
+/** Opcode 0x0f 0xaa. */
+FNIEMOP_DEF(iemOp_rsm)
+{
+    IEMOP_MNEMONIC(rsm, "rsm");
+    IEMOP_HLP_SVM_CTRL_INTERCEPT(pVCpu, SVM_CTRL_INTERCEPT_RSM, SVM_EXIT_RSM, 0, 0);
+    /** @todo rsm - for the regular case (above handles only the SVM nested-guest
+     *        intercept). */
+    IEMOP_BITCH_ABOUT_STUB();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+//IEMOP_HLP_MIN_386();
+
+
+/** Opcode 0x0f 0xab. */
+FNIEMOP_DEF(iemOp_bts_Ev_Gv)
+{
+    IEMOP_MNEMONIC(bts_Ev_Gv, "bts Ev,Gv");
+    IEMOP_HLP_MIN_386();
+    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_bts);
+}
+
+
+/** Opcode 0x0f 0xac. */
+FNIEMOP_DEF(iemOp_shrd_Ev_Gv_Ib)
+{
+    IEMOP_MNEMONIC(shrd_Ev_Gv_Ib, "shrd Ev,Gv,Ib");
+    IEMOP_HLP_MIN_386();
+    return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, &g_iemAImpl_shrd);
+}
+
+
+/** Opcode 0x0f 0xad. */
+FNIEMOP_DEF(iemOp_shrd_Ev_Gv_CL)
+{
+    IEMOP_MNEMONIC(shrd_Ev_Gv_CL, "shrd Ev,Gv,CL");
+    IEMOP_HLP_MIN_386();
+    return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, &g_iemAImpl_shrd);
+}
+
+
+/** Opcode 0x0f 0xae mem/0. */
+FNIEMOP_DEF_1(iemOp_Grp15_fxsave,   uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(fxsave, "fxsave m512");
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fFxSaveRstor)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(3, 1);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_3(iemCImpl_fxsave, iEffSeg, GCPtrEff, enmEffOpSize);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0xae mem/1. */
+FNIEMOP_DEF_1(iemOp_Grp15_fxrstor,  uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(fxrstor, "fxrstor m512");
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fFxSaveRstor)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(3, 1);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_3(iemCImpl_fxrstor, iEffSeg, GCPtrEff, enmEffOpSize);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opmaps      grp15
+ * @opcode      !11/2
+ * @oppfx       none
+ * @opcpuid     sse
+ * @opgroup     og_sse_mxcsrsm
+ * @opxcpttype  5
+ * @optest      op1=0      -> mxcsr=0
+ * @optest      op1=0x2083 -> mxcsr=0x2083
+ * @optest      op1=0xfffffffe -> value.xcpt=0xd
+ * @optest      op1=0x2083 cr0|=ts -> value.xcpt=0x7
+ * @optest      op1=0x2083 cr0|=em -> value.xcpt=0x6
+ * @optest      op1=0x2083 cr0|=mp -> mxcsr=0x2083
+ * @optest      op1=0x2083 cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      op1=0x2083 cr0|=ts,em -> value.xcpt=0x6
+ * @optest      op1=0x2083 cr0|=em cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      op1=0x2083 cr0|=ts,em cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      op1=0x2083 cr0|=ts,em,mp cr4&~=osfxsr -> value.xcpt=0x6
+ */
+FNIEMOP_DEF_1(iemOp_Grp15_ldmxcsr, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC1(M_MEM, LDMXCSR, ldmxcsr, MdRO, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(2, 0);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_2(iemCImpl_ldmxcsr, iEffSeg, GCPtrEff);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opmaps      grp15
+ * @opcode      !11/3
+ * @oppfx       none
+ * @opcpuid     sse
+ * @opgroup     og_sse_mxcsrsm
+ * @opxcpttype  5
+ * @optest      mxcsr=0      -> op1=0
+ * @optest      mxcsr=0x2083 -> op1=0x2083
+ * @optest      mxcsr=0x2084 cr0|=ts -> value.xcpt=0x7
+ * @optest      mxcsr=0x2085 cr0|=em -> value.xcpt=0x6
+ * @optest      mxcsr=0x2086 cr0|=mp -> op1=0x2086
+ * @optest      mxcsr=0x2087 cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      mxcsr=0x2088 cr0|=ts,em -> value.xcpt=0x6
+ * @optest      mxcsr=0x2089 cr0|=em cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      mxcsr=0x208a cr0|=ts,em cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      mxcsr=0x208b cr0|=ts,em,mp cr4&~=osfxsr -> value.xcpt=0x6
+ */
+FNIEMOP_DEF_1(iemOp_Grp15_stmxcsr,  uint8_t, bRm)
+{
+    IEMOP_MNEMONIC1(M_MEM, STMXCSR, stmxcsr, MdWO, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(2, 0);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_2(iemCImpl_stmxcsr, iEffSeg, GCPtrEff);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opmaps      grp15
+ * @opcode      !11/4
+ * @oppfx       none
+ * @opcpuid     xsave
+ * @opgroup     og_system
+ * @opxcpttype  none
+ */
+FNIEMOP_DEF_1(iemOp_Grp15_xsave,    uint8_t, bRm)
+{
+    IEMOP_MNEMONIC1(M_MEM, XSAVE, xsave, MRW, DISOPTYPE_HARMLESS, 0);
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXSaveRstor)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(3, 0);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_3(iemCImpl_xsave, iEffSeg, GCPtrEff, enmEffOpSize);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * @opmaps      grp15
+ * @opcode      !11/5
+ * @oppfx       none
+ * @opcpuid     xsave
+ * @opgroup     og_system
+ * @opxcpttype  none
+ */
+FNIEMOP_DEF_1(iemOp_Grp15_xrstor,   uint8_t, bRm)
+{
+    IEMOP_MNEMONIC1(M_MEM, XRSTOR, xrstor, MRO, DISOPTYPE_HARMLESS, 0);
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fXSaveRstor)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(3, 0);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 2);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_3(iemCImpl_xrstor, iEffSeg, GCPtrEff, enmEffOpSize);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x0f 0xae mem/6. */
+FNIEMOP_UD_STUB_1(iemOp_Grp15_xsaveopt, uint8_t, bRm);
+
+/**
+ * @opmaps      grp15
+ * @opcode      !11/7
+ * @oppfx       none
+ * @opcpuid     clfsh
+ * @opgroup     og_cachectl
+ * @optest      op1=1 ->
+ */
+FNIEMOP_DEF_1(iemOp_Grp15_clflush,  uint8_t, bRm)
+{
+    IEMOP_MNEMONIC1(M_MEM, CLFLUSH, clflush, MbRO, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fClFlush)
+        return FNIEMOP_CALL_1(iemOp_InvalidWithRMAllNeeded, bRm);
+
+    IEM_MC_BEGIN(2, 0);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_2(iemCImpl_clflush_clflushopt, iEffSeg, GCPtrEff);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+/**
+ * @opmaps      grp15
+ * @opcode      !11/7
+ * @oppfx       0x66
+ * @opcpuid     clflushopt
+ * @opgroup     og_cachectl
+ * @optest      op1=1 ->
+ */
+FNIEMOP_DEF_1(iemOp_Grp15_clflushopt,  uint8_t, bRm)
+{
+    IEMOP_MNEMONIC1(M_MEM, CLFLUSHOPT, clflushopt, MbRO, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fClFlushOpt)
+        return FNIEMOP_CALL_1(iemOp_InvalidWithRMAllNeeded, bRm);
+
+    IEM_MC_BEGIN(2, 0);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_2(iemCImpl_clflush_clflushopt, iEffSeg, GCPtrEff);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0xae 11b/5. */
+FNIEMOP_DEF_1(iemOp_Grp15_lfence,   uint8_t, bRm)
+{
+    RT_NOREF_PV(bRm);
+    IEMOP_MNEMONIC(lfence, "lfence");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(0, 0);
+    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
+        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_lfence);
+    else
+        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0xae 11b/6. */
+FNIEMOP_DEF_1(iemOp_Grp15_mfence,   uint8_t, bRm)
+{
+    RT_NOREF_PV(bRm);
+    IEMOP_MNEMONIC(mfence, "mfence");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(0, 0);
+    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
+        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_mfence);
+    else
+        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0xae 11b/7. */
+FNIEMOP_DEF_1(iemOp_Grp15_sfence,   uint8_t, bRm)
+{
+    RT_NOREF_PV(bRm);
+    IEMOP_MNEMONIC(sfence, "sfence");
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(0, 0);
+    if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fSse2)
+        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_sfence);
+    else
+        IEM_MC_CALL_VOID_AIMPL_0(iemAImpl_alt_mem_fence);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xf3 0x0f 0xae 11b/0. */
+FNIEMOP_UD_STUB_1(iemOp_Grp15_rdfsbase, uint8_t, bRm);
+
+/** Opcode 0xf3 0x0f 0xae 11b/1. */
+FNIEMOP_UD_STUB_1(iemOp_Grp15_rdgsbase, uint8_t, bRm);
+
+/** Opcode 0xf3 0x0f 0xae 11b/2. */
+FNIEMOP_UD_STUB_1(iemOp_Grp15_wrfsbase, uint8_t, bRm);
+
+/** Opcode 0xf3 0x0f 0xae 11b/3. */
+FNIEMOP_UD_STUB_1(iemOp_Grp15_wrgsbase, uint8_t, bRm);
+
+
+/**
+ * Group 15 jump table for register variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup15RegReg[] =
+{   /* pfx:  none,                          066h,                           0f3h,                           0f2h */
+    /* /0 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_Grp15_rdfsbase,           iemOp_InvalidWithRM,
+    /* /1 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_Grp15_rdgsbase,           iemOp_InvalidWithRM,
+    /* /2 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_Grp15_wrfsbase,           iemOp_InvalidWithRM,
+    /* /3 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_Grp15_wrgsbase,           iemOp_InvalidWithRM,
+    /* /4 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /5 */ iemOp_Grp15_lfence,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /6 */ iemOp_Grp15_mfence,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /7 */ iemOp_Grp15_sfence,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+};
+AssertCompile(RT_ELEMENTS(g_apfnGroup15RegReg) == 8*4);
+
+
+/**
+ * Group 15 jump table for memory variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup15MemReg[] =
+{   /* pfx:  none,                          066h,                           0f3h,                           0f2h */
+    /* /0 */ iemOp_Grp15_fxsave,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /1 */ iemOp_Grp15_fxrstor,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /2 */ iemOp_Grp15_ldmxcsr,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /3 */ iemOp_Grp15_stmxcsr,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /4 */ iemOp_Grp15_xsave,             iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /5 */ iemOp_Grp15_xrstor,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /6 */ iemOp_Grp15_xsaveopt,          iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /7 */ iemOp_Grp15_clflush,           iemOp_Grp15_clflushopt,         iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+};
+AssertCompile(RT_ELEMENTS(g_apfnGroup15MemReg) == 8*4);
+
+
+/** Opcode 0x0f 0xae. */
+FNIEMOP_DEF(iemOp_Grp15)
+{
+    IEMOP_HLP_MIN_586(); /* Not entirely accurate nor needed, but useful for debugging 286 code. */
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(g_apfnGroup15RegReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                  + pVCpu->iem.s.idxPrefix], bRm);
+    /* memory, register */
+    return FNIEMOP_CALL_1(g_apfnGroup15MemReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                              + pVCpu->iem.s.idxPrefix], bRm);
+}
+
+
+/** Opcode 0x0f 0xaf. */
+FNIEMOP_DEF(iemOp_imul_Gv_Ev)
+{
+    IEMOP_MNEMONIC(imul_Gv_Ev, "imul Gv,Ev");
+    IEMOP_HLP_MIN_386();
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_imul_two);
+}
+
+
+/** Opcode 0x0f 0xb0. */
+FNIEMOP_DEF(iemOp_cmpxchg_Eb_Gb)
+{
+    IEMOP_MNEMONIC(cmpxchg_Eb_Gb, "cmpxchg Eb,Gb");
+    IEMOP_HLP_MIN_486();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING();
+        IEM_MC_BEGIN(4, 0);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
+        IEM_MC_ARG(uint8_t *,       pu8Al,                  1);
+        IEM_MC_ARG(uint8_t,         u8Src,                  2);
+        IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_REF_GREG_U8(pu8Al, X86_GREG_xAX);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8, pu8Dst, pu8Al, u8Src, pEFlags);
+        else
+            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8_locked, pu8Dst, pu8Al, u8Src, pEFlags);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        IEM_MC_BEGIN(4, 3);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
+        IEM_MC_ARG(uint8_t *,       pu8Al,                  1);
+        IEM_MC_ARG(uint8_t,         u8Src,                  2);
+        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
+        IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
+        IEM_MC_LOCAL(uint8_t,       u8Al);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING();
+        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_FETCH_GREG_U8(u8Al, X86_GREG_xAX);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        IEM_MC_REF_LOCAL(pu8Al, u8Al);
+        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8, pu8Dst, pu8Al, u8Src, pEFlags);
+        else
+            IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u8_locked, pu8Dst, pu8Al, u8Src, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_STORE_GREG_U8(X86_GREG_xAX, u8Al);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x0f 0xb1. */
+FNIEMOP_DEF(iemOp_cmpxchg_Ev_Gv)
+{
+    IEMOP_MNEMONIC(cmpxchg_Ev_Gv, "cmpxchg Ev,Gv");
+    IEMOP_HLP_MIN_486();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING();
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+                IEM_MC_ARG(uint16_t *,      pu16Ax,                 1);
+                IEM_MC_ARG(uint16_t,        u16Src,                 2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_GREG_U16(pu16Ax, X86_GREG_xAX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16, pu16Dst, pu16Ax, u16Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16_locked, pu16Dst, pu16Ax, u16Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+                IEM_MC_ARG(uint32_t *,      pu32Eax,                1);
+                IEM_MC_ARG(uint32_t,        u32Src,                 2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_GREG_U32(pu32Eax, X86_GREG_xAX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32, pu32Dst, pu32Eax, u32Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32_locked, pu32Dst, pu32Eax, u32Src, pEFlags);
+
+                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Eax);
+                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(4, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+                IEM_MC_ARG(uint64_t *,      pu64Rax,                1);
+#ifdef RT_ARCH_X86
+                IEM_MC_ARG(uint64_t *,      pu64Src,                2);
+#else
+                IEM_MC_ARG(uint64_t,        u64Src,                 2);
+#endif
+                IEM_MC_ARG(uint32_t *,      pEFlags,                3);
+
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_GREG_U64(pu64Rax, X86_GREG_xAX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+#ifdef RT_ARCH_X86
+                IEM_MC_REF_GREG_U64(pu64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, pu64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, pu64Src, pEFlags);
+#else
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, u64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, u64Src, pEFlags);
+#endif
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(4, 3);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+                IEM_MC_ARG(uint16_t *,      pu16Ax,                 1);
+                IEM_MC_ARG(uint16_t,        u16Src,                 2);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
+                IEM_MC_LOCAL(uint16_t,      u16Ax);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING();
+                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_GREG_U16(u16Ax, X86_GREG_xAX);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_REF_LOCAL(pu16Ax, u16Ax);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16, pu16Dst, pu16Ax, u16Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u16_locked, pu16Dst, pu16Ax, u16Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_STORE_GREG_U16(X86_GREG_xAX, u16Ax);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(4, 3);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+                IEM_MC_ARG(uint32_t *,      pu32Eax,                 1);
+                IEM_MC_ARG(uint32_t,        u32Src,                 2);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
+                IEM_MC_LOCAL(uint32_t,      u32Eax);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING();
+                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_FETCH_GREG_U32(u32Eax, X86_GREG_xAX);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_REF_LOCAL(pu32Eax, u32Eax);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32, pu32Dst, pu32Eax, u32Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u32_locked, pu32Dst, pu32Eax, u32Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u32Eax);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(4, 3);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+                IEM_MC_ARG(uint64_t *,      pu64Rax,                1);
+#ifdef RT_ARCH_X86
+                IEM_MC_ARG(uint64_t *,      pu64Src,                2);
+#else
+                IEM_MC_ARG(uint64_t,        u64Src,                 2);
+#endif
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        3);
+                IEM_MC_LOCAL(RTGCPTR,       GCPtrEffDst);
+                IEM_MC_LOCAL(uint64_t,      u64Rax);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING();
+                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                IEM_MC_FETCH_GREG_U64(u64Rax, X86_GREG_xAX);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_REF_LOCAL(pu64Rax, u64Rax);
+#ifdef RT_ARCH_X86
+                IEM_MC_REF_GREG_U64(pu64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, pu64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, pu64Src, pEFlags);
+#else
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64, pu64Dst, pu64Rax, u64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg_u64_locked, pu64Dst, pu64Rax, u64Src, pEFlags);
+#endif
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_STORE_GREG_U64(X86_GREG_xAX, u64Rax);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+FNIEMOP_DEF_2(iemOpCommonLoadSRegAndGreg, uint8_t, iSegReg, uint8_t, bRm)
+{
+    Assert((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT)); /* Caller checks this */
+    uint8_t const iGReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg;
+
+    switch (pVCpu->iem.s.enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(5, 1);
+            IEM_MC_ARG(uint16_t,        uSel,                                    0);
+            IEM_MC_ARG(uint16_t,        offSeg,                                  1);
+            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
+            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
+            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
+            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+            IEM_MC_FETCH_MEM_U16(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
+            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 2);
+            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(5, 1);
+            IEM_MC_ARG(uint16_t,        uSel,                                    0);
+            IEM_MC_ARG(uint32_t,        offSeg,                                  1);
+            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
+            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
+            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
+            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+            IEM_MC_FETCH_MEM_U32(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
+            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 4);
+            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(5, 1);
+            IEM_MC_ARG(uint16_t,        uSel,                                    0);
+            IEM_MC_ARG(uint64_t,        offSeg,                                  1);
+            IEM_MC_ARG_CONST(uint8_t,   iSegRegArg,/*=*/iSegReg,                 2);
+            IEM_MC_ARG_CONST(uint8_t,   iGRegArg,  /*=*/iGReg,                   3);
+            IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSize,/*=*/pVCpu->iem.s.enmEffOpSize, 4);
+            IEM_MC_LOCAL(RTGCPTR,       GCPtrEff);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+            if (IEM_IS_GUEST_CPU_AMD(pVCpu)) /** @todo testcase: rev 3.15 of the amd manuals claims it only loads a 32-bit greg. */
+                IEM_MC_FETCH_MEM_U32_SX_U64(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
+            else
+                IEM_MC_FETCH_MEM_U64(offSeg, pVCpu->iem.s.iEffSeg, GCPtrEff);
+            IEM_MC_FETCH_MEM_U16_DISP(uSel, pVCpu->iem.s.iEffSeg, GCPtrEff, 8);
+            IEM_MC_CALL_CIMPL_5(iemCImpl_load_SReg_Greg, uSel, offSeg, iSegRegArg, iGRegArg, enmEffOpSize);
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0x0f 0xb2. */
+FNIEMOP_DEF(iemOp_lss_Gv_Mp)
+{
+    IEMOP_MNEMONIC(lss_Gv_Mp, "lss Gv,Mp");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        return IEMOP_RAISE_INVALID_OPCODE();
+    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_SS, bRm);
+}
+
+
+/** Opcode 0x0f 0xb3. */
+FNIEMOP_DEF(iemOp_btr_Ev_Gv)
+{
+    IEMOP_MNEMONIC(btr_Ev_Gv, "btr Ev,Gv");
+    IEMOP_HLP_MIN_386();
+    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_btr);
+}
+
+
+/** Opcode 0x0f 0xb4. */
+FNIEMOP_DEF(iemOp_lfs_Gv_Mp)
+{
+    IEMOP_MNEMONIC(lfs_Gv_Mp, "lfs Gv,Mp");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        return IEMOP_RAISE_INVALID_OPCODE();
+    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_FS, bRm);
+}
+
+
+/** Opcode 0x0f 0xb5. */
+FNIEMOP_DEF(iemOp_lgs_Gv_Mp)
+{
+    IEMOP_MNEMONIC(lgs_Gv_Mp, "lgs Gv,Mp");
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        return IEMOP_RAISE_INVALID_OPCODE();
+    return FNIEMOP_CALL_2(iemOpCommonLoadSRegAndGreg, X86_SREG_GS, bRm);
+}
+
+
+/** Opcode 0x0f 0xb6. */
+FNIEMOP_DEF(iemOp_movzx_Gv_Eb)
+{
+    IEMOP_MNEMONIC(movzx_Gv_Eb, "movzx Gv,Eb");
+    IEMOP_HLP_MIN_386();
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_FETCH_GREG_U8_ZX_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_FETCH_GREG_U8_ZX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_FETCH_GREG_U8_ZX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /*
+         * We're loading a register from memory.
+         */
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_MEM_U8_ZX_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_MEM_U8_ZX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_MEM_U8_ZX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/** Opcode 0x0f 0xb7. */
+FNIEMOP_DEF(iemOp_movzx_Gv_Ew)
+{
+    IEMOP_MNEMONIC(movzx_Gv_Ew, "movzx Gv,Ew");
+    IEMOP_HLP_MIN_386();
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Not entirely sure how the operand size prefix is handled here,
+     *        assuming that it will be ignored. Would be nice to have a few
+     *        test for this. */
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
+        {
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint32_t, u32Value);
+            IEM_MC_FETCH_GREG_U16_ZX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+        }
+        else
+        {
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint64_t, u64Value);
+            IEM_MC_FETCH_GREG_U16_ZX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+        }
+    }
+    else
+    {
+        /*
+         * We're loading a register from memory.
+         */
+        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
+        {
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint32_t, u32Value);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+            IEM_MC_FETCH_MEM_U16_ZX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+        }
+        else
+        {
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint64_t, u64Value);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+            IEM_MC_FETCH_MEM_U16_ZX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode      0x0f 0xb8 - JMPE (reserved for emulator on IPF) */
+FNIEMOP_UD_STUB(iemOp_jmpe);
+/** Opcode 0xf3 0x0f 0xb8 - POPCNT Gv, Ev */
+FNIEMOP_STUB(iemOp_popcnt_Gv_Ev);
+
+
+/**
+ * @opcode      0xb9
+ * @opinvalid   intel-modrm
+ * @optest      ->
+ */
+FNIEMOP_DEF(iemOp_Grp10)
+{
+    /*
+     * AMD does not decode beyond the 0xb9 whereas intel does the modr/m bit
+     * too. See bs3-cpu-decoder-1.c32.  So, we can forward to iemOp_InvalidNeedRM.
+     */
+    Log(("iemOp_Grp10 aka UD1 -> #UD\n"));
+    IEMOP_MNEMONIC2EX(ud1, "ud1", RM, UD1, ud1, Gb, Eb, DISOPTYPE_INVALID, IEMOPHINT_IGNORES_OP_SIZE); /* just picked Gb,Eb here. */
+    return FNIEMOP_CALL(iemOp_InvalidNeedRM);
+}
+
+
+/** Opcode 0x0f 0xba. */
+FNIEMOP_DEF(iemOp_Grp8)
+{
+    IEMOP_HLP_MIN_386();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    PCIEMOPBINSIZES pImpl;
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: case 1: case 2: case 3:
+            /* Both AMD and Intel want full modr/m decoding and imm8. */
+            return FNIEMOP_CALL_1(iemOp_InvalidWithRMAllNeedImm8, bRm);
+        case 4: pImpl = &g_iemAImpl_bt;  IEMOP_MNEMONIC(bt_Ev_Ib,  "bt  Ev,Ib"); break;
+        case 5: pImpl = &g_iemAImpl_bts; IEMOP_MNEMONIC(bts_Ev_Ib, "bts Ev,Ib"); break;
+        case 6: pImpl = &g_iemAImpl_btr; IEMOP_MNEMONIC(btr_Ev_Ib, "btr Ev,Ib"); break;
+        case 7: pImpl = &g_iemAImpl_btc; IEMOP_MNEMONIC(btc_Ev_Ib, "btc Ev,Ib"); break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register destination. */
+        uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
+                IEM_MC_ARG_CONST(uint16_t,  u16Src, /*=*/ u8Bit & 0x0f, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
+                IEM_MC_ARG_CONST(uint32_t,  u32Src, /*=*/ u8Bit & 0x1f, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
+                IEM_MC_ARG_CONST(uint64_t,  u64Src, /*=*/ u8Bit & 0x3f, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* memory destination. */
+
+        uint32_t fAccess;
+        if (pImpl->pfnLockedU16)
+            fAccess = IEM_ACCESS_DATA_RW;
+        else /* BT */
+            fAccess = IEM_ACCESS_DATA_R;
+
+        /** @todo test negative bit offsets! */
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t *,              pu16Dst,                0);
+                IEM_MC_ARG(uint16_t,                u16Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
+                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
+                IEM_MC_ASSIGN(u16Src, u8Bit & 0x0f);
+                if (pImpl->pfnLockedU16)
+                    IEMOP_HLP_DONE_DECODING();
+                else
+                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu16Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
+
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint32_t *,              pu32Dst,                0);
+                IEM_MC_ARG(uint32_t,                u32Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
+                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
+                IEM_MC_ASSIGN(u32Src, u8Bit & 0x1f);
+                if (pImpl->pfnLockedU16)
+                    IEMOP_HLP_DONE_DECODING();
+                else
+                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu32Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
+
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint64_t *,              pu64Dst,                0);
+                IEM_MC_ARG(uint64_t,                u64Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(            pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR,               GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 1);
+                uint8_t u8Bit; IEM_OPCODE_GET_NEXT_U8(&u8Bit);
+                IEM_MC_ASSIGN(u64Src, u8Bit & 0x3f);
+                if (pImpl->pfnLockedU16)
+                    IEMOP_HLP_DONE_DECODING();
+                else
+                    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_MEM_MAP(pu64Dst, fAccess, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
+
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/** Opcode 0x0f 0xbb. */
+FNIEMOP_DEF(iemOp_btc_Ev_Gv)
+{
+    IEMOP_MNEMONIC(btc_Ev_Gv, "btc Ev,Gv");
+    IEMOP_HLP_MIN_386();
+    return FNIEMOP_CALL_1(iemOpCommonBit_Ev_Gv, &g_iemAImpl_btc);
+}
+
+
+/** Opcode 0x0f 0xbc. */
+FNIEMOP_DEF(iemOp_bsf_Gv_Ev)
+{
+    IEMOP_MNEMONIC(bsf_Gv_Ev, "bsf Gv,Ev");
+    IEMOP_HLP_MIN_386();
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF);
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_bsf);
+}
+
+
+/** Opcode 0xf3 0x0f 0xbc - TZCNT Gv, Ev */
+FNIEMOP_STUB(iemOp_tzcnt_Gv_Ev);
+
+
+/** Opcode 0x0f 0xbd. */
+FNIEMOP_DEF(iemOp_bsr_Gv_Ev)
+{
+    IEMOP_MNEMONIC(bsr_Gv_Ev, "bsr Gv,Ev");
+    IEMOP_HLP_MIN_386();
+    IEMOP_VERIFICATION_UNDEFINED_EFLAGS(X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF);
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_bsr);
+}
+
+
+/** Opcode 0xf3 0x0f 0xbd - LZCNT Gv, Ev */
+FNIEMOP_STUB(iemOp_lzcnt_Gv_Ev);
+
+
+/** Opcode 0x0f 0xbe. */
+FNIEMOP_DEF(iemOp_movsx_Gv_Eb)
+{
+    IEMOP_MNEMONIC(movsx_Gv_Eb, "movsx Gv,Eb");
+    IEMOP_HLP_MIN_386();
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_FETCH_GREG_U8_SX_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_FETCH_GREG_U8_SX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_FETCH_GREG_U8_SX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /*
+         * We're loading a register from memory.
+         */
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_MEM_U8_SX_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_MEM_U8_SX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                IEM_MC_FETCH_MEM_U8_SX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/** Opcode 0x0f 0xbf. */
+FNIEMOP_DEF(iemOp_movsx_Gv_Ew)
+{
+    IEMOP_MNEMONIC(movsx_Gv_Ew, "movsx Gv,Ew");
+    IEMOP_HLP_MIN_386();
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /** @todo Not entirely sure how the operand size prefix is handled here,
+     *        assuming that it will be ignored. Would be nice to have a few
+     *        test for this. */
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
+        {
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint32_t, u32Value);
+            IEM_MC_FETCH_GREG_U16_SX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+        }
+        else
+        {
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint64_t, u64Value);
+            IEM_MC_FETCH_GREG_U16_SX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+        }
+    }
+    else
+    {
+        /*
+         * We're loading a register from memory.
+         */
+        if (pVCpu->iem.s.enmEffOpSize != IEMMODE_64BIT)
+        {
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint32_t, u32Value);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+            IEM_MC_FETCH_MEM_U16_SX_U32(u32Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+        }
+        else
+        {
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint64_t, u64Value);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+            IEM_MC_FETCH_MEM_U16_SX_U64(u64Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
+            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0xc0. */
+FNIEMOP_DEF(iemOp_xadd_Eb_Gb)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    IEMOP_HLP_MIN_486();
+    IEMOP_MNEMONIC(xadd_Eb_Gb, "xadd Eb,Gb");
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
+        IEM_MC_ARG(uint8_t *,  pu8Reg,  1);
+        IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_REF_GREG_U8(pu8Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8, pu8Dst, pu8Reg, pEFlags);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * We're accessing memory.
+         */
+        IEM_MC_BEGIN(3, 3);
+        IEM_MC_ARG(uint8_t *,   pu8Dst,          0);
+        IEM_MC_ARG(uint8_t *,   pu8Reg,          1);
+        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+        IEM_MC_LOCAL(uint8_t,  u8RegCopy);
+        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
+        IEM_MC_FETCH_GREG_U8(u8RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_LOCAL(pu8Reg, u8RegCopy);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+            IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8, pu8Dst, pu8Reg, pEFlags);
+        else
+            IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u8_locked, pu8Dst, pu8Reg, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_STORE_GREG_U8(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u8RegCopy);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+        return VINF_SUCCESS;
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0xc1. */
+FNIEMOP_DEF(iemOp_xadd_Ev_Gv)
+{
+    IEMOP_MNEMONIC(xadd_Ev_Gv, "xadd Ev,Gv");
+    IEMOP_HLP_MIN_486();
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *, pu16Dst,  0);
+                IEM_MC_ARG(uint16_t *, pu16Reg,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_GREG_U16(pu16Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16, pu16Dst, pu16Reg, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *, pu32Dst,  0);
+                IEM_MC_ARG(uint32_t *, pu32Reg,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_GREG_U32(pu32Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32, pu32Dst, pu32Reg, pEFlags);
+
+                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
+                IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Reg);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *, pu64Dst,  0);
+                IEM_MC_ARG(uint64_t *, pu64Reg,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+                IEM_MC_REF_GREG_U64(pu64Reg, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64, pu64Dst, pu64Reg, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /*
+         * We're accessing memory.
+         */
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 3);
+                IEM_MC_ARG(uint16_t *,  pu16Dst,         0);
+                IEM_MC_ARG(uint16_t *,  pu16Reg,         1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(uint16_t,  u16RegCopy);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_GREG_U16(u16RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_LOCAL(pu16Reg, u16RegCopy);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16, pu16Dst, pu16Reg, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u16_locked, pu16Dst, pu16Reg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u16RegCopy);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 3);
+                IEM_MC_ARG(uint32_t *,  pu32Dst,         0);
+                IEM_MC_ARG(uint32_t *,  pu32Reg,         1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(uint32_t,  u32RegCopy);
+                IEM_MC_LOCAL(RTGCPTR,   GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_GREG_U32(u32RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_LOCAL(pu32Reg, u32RegCopy);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32, pu32Dst, pu32Reg, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u32_locked, pu32Dst, pu32Reg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32RegCopy);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 3);
+                IEM_MC_ARG(uint64_t *,  pu64Dst,         0);
+                IEM_MC_ARG(uint64_t *,  pu64Reg,         1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(uint64_t,  u64RegCopy);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_GREG_U64(u64RegCopy, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_REF_LOCAL(pu64Reg, u64RegCopy);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64, pu64Dst, pu64Reg, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_xadd_u64_locked, pu64Dst, pu64Reg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64RegCopy);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/** Opcode      0x0f 0xc2 - cmpps Vps,Wps,Ib */
+FNIEMOP_STUB(iemOp_cmpps_Vps_Wps_Ib);
+/** Opcode 0x66 0x0f 0xc2 - cmppd Vpd,Wpd,Ib */
+FNIEMOP_STUB(iemOp_cmppd_Vpd_Wpd_Ib);
+/** Opcode 0xf3 0x0f 0xc2 - cmpss Vss,Wss,Ib */
+FNIEMOP_STUB(iemOp_cmpss_Vss_Wss_Ib);
+/** Opcode 0xf2 0x0f 0xc2 - cmpsd Vsd,Wsd,Ib */
+FNIEMOP_STUB(iemOp_cmpsd_Vsd_Wsd_Ib);
+
+
+/** Opcode 0x0f 0xc3. */
+FNIEMOP_DEF(iemOp_movnti_My_Gy)
+{
+    IEMOP_MNEMONIC(movnti_My_Gy, "movnti My,Gy");
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    /* Only the register -> memory form makes sense, assuming #UD for the other form. */
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+    {
+        switch (pVCpu->iem.s.enmEffOpSize)
+        {
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
+                    return IEMOP_RAISE_INVALID_OPCODE();
+
+                IEM_MC_FETCH_GREG_U32(u32Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+                IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+                if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse2)
+                    return IEMOP_RAISE_INVALID_OPCODE();
+
+                IEM_MC_FETCH_GREG_U64(u64Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+                IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffDst, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_16BIT:
+                /** @todo check this form.   */
+                return IEMOP_RAISE_INVALID_OPCODE();
+        }
+    }
+    else
+        return IEMOP_RAISE_INVALID_OPCODE();
+    return VINF_SUCCESS;
+}
+/*  Opcode 0x66 0x0f 0xc3 - invalid */
+/*  Opcode 0xf3 0x0f 0xc3 - invalid */
+/*  Opcode 0xf2 0x0f 0xc3 - invalid */
+
+/** Opcode      0x0f 0xc4 - pinsrw Pq, Ry/Mw,Ib */
+FNIEMOP_STUB(iemOp_pinsrw_Pq_RyMw_Ib);
+/** Opcode 0x66 0x0f 0xc4 - pinsrw Vdq, Ry/Mw,Ib */
+FNIEMOP_STUB(iemOp_pinsrw_Vdq_RyMw_Ib);
+/*  Opcode 0xf3 0x0f 0xc4 - invalid */
+/*  Opcode 0xf2 0x0f 0xc4 - invalid */
+
+/** Opcode      0x0f 0xc5 - pextrw Gd, Nq, Ib */
+FNIEMOP_STUB(iemOp_pextrw_Gd_Nq_Ib);
+/** Opcode 0x66 0x0f 0xc5 - pextrw Gd, Udq, Ib */
+FNIEMOP_STUB(iemOp_pextrw_Gd_Udq_Ib);
+/*  Opcode 0xf3 0x0f 0xc5 - invalid */
+/*  Opcode 0xf2 0x0f 0xc5 - invalid */
+
+/** Opcode      0x0f 0xc6 - shufps Vps, Wps, Ib */
+FNIEMOP_STUB(iemOp_shufps_Vps_Wps_Ib);
+/** Opcode 0x66 0x0f 0xc6 - shufpd Vpd, Wpd, Ib */
+FNIEMOP_STUB(iemOp_shufpd_Vpd_Wpd_Ib);
+/*  Opcode 0xf3 0x0f 0xc6 - invalid */
+/*  Opcode 0xf2 0x0f 0xc6 - invalid */
+
+
+/** Opcode 0x0f 0xc7 !11/1. */
+FNIEMOP_DEF_1(iemOp_Grp9_cmpxchg8b_Mq, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(cmpxchg8b, "cmpxchg8b Mq");
+
+    IEM_MC_BEGIN(4, 3);
+    IEM_MC_ARG(uint64_t *, pu64MemDst,     0);
+    IEM_MC_ARG(PRTUINT64U, pu64EaxEdx,     1);
+    IEM_MC_ARG(PRTUINT64U, pu64EbxEcx,     2);
+    IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 3);
+    IEM_MC_LOCAL(RTUINT64U, u64EaxEdx);
+    IEM_MC_LOCAL(RTUINT64U, u64EbxEcx);
+    IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+    IEMOP_HLP_DONE_DECODING();
+    IEM_MC_MEM_MAP(pu64MemDst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
+
+    IEM_MC_FETCH_GREG_U32(u64EaxEdx.s.Lo, X86_GREG_xAX);
+    IEM_MC_FETCH_GREG_U32(u64EaxEdx.s.Hi, X86_GREG_xDX);
+    IEM_MC_REF_LOCAL(pu64EaxEdx, u64EaxEdx);
+
+    IEM_MC_FETCH_GREG_U32(u64EbxEcx.s.Lo, X86_GREG_xBX);
+    IEM_MC_FETCH_GREG_U32(u64EbxEcx.s.Hi, X86_GREG_xCX);
+    IEM_MC_REF_LOCAL(pu64EbxEcx, u64EbxEcx);
+
+    IEM_MC_FETCH_EFLAGS(EFlags);
+    if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+        IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg8b, pu64MemDst, pu64EaxEdx, pu64EbxEcx, pEFlags);
+    else
+        IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg8b_locked, pu64MemDst, pu64EaxEdx, pu64EbxEcx, pEFlags);
+
+    IEM_MC_MEM_COMMIT_AND_UNMAP(pu64MemDst, IEM_ACCESS_DATA_RW);
+    IEM_MC_COMMIT_EFLAGS(EFlags);
+    IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_ZF)
+        /** @todo Testcase: Check effect of cmpxchg8b on bits 63:32 in rax and rdx. */
+        IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u64EaxEdx.s.Lo);
+        IEM_MC_STORE_GREG_U32(X86_GREG_xDX, u64EaxEdx.s.Hi);
+    IEM_MC_ENDIF();
+    IEM_MC_ADVANCE_RIP();
+
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode REX.W 0x0f 0xc7 !11/1. */
+FNIEMOP_DEF_1(iemOp_Grp9_cmpxchg16b_Mdq, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC(cmpxchg16b, "cmpxchg16b Mdq");
+    if (IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fMovCmpXchg16b)
+    {
+#if 0
+        RT_NOREF(bRm);
+        IEMOP_BITCH_ABOUT_STUB();
+        return VERR_IEM_INSTR_NOT_IMPLEMENTED;
+#else
+        IEM_MC_BEGIN(4, 3);
+        IEM_MC_ARG(PRTUINT128U, pu128MemDst,     0);
+        IEM_MC_ARG(PRTUINT128U, pu128RaxRdx,     1);
+        IEM_MC_ARG(PRTUINT128U, pu128RbxRcx,     2);
+        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 3);
+        IEM_MC_LOCAL(RTUINT128U, u128RaxRdx);
+        IEM_MC_LOCAL(RTUINT128U, u128RbxRcx);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
+        IEMOP_HLP_DONE_DECODING();
+        IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(GCPtrEffDst, 16);
+        IEM_MC_MEM_MAP(pu128MemDst, IEM_ACCESS_DATA_RW, pVCpu->iem.s.iEffSeg, GCPtrEffDst, 0 /*arg*/);
+
+        IEM_MC_FETCH_GREG_U64(u128RaxRdx.s.Lo, X86_GREG_xAX);
+        IEM_MC_FETCH_GREG_U64(u128RaxRdx.s.Hi, X86_GREG_xDX);
+        IEM_MC_REF_LOCAL(pu128RaxRdx, u128RaxRdx);
+
+        IEM_MC_FETCH_GREG_U64(u128RbxRcx.s.Lo, X86_GREG_xBX);
+        IEM_MC_FETCH_GREG_U64(u128RbxRcx.s.Hi, X86_GREG_xCX);
+        IEM_MC_REF_LOCAL(pu128RbxRcx, u128RbxRcx);
+
+        IEM_MC_FETCH_EFLAGS(EFlags);
+# ifdef RT_ARCH_AMD64
+        if (IEM_GET_HOST_CPU_FEATURES(pVCpu)->fMovCmpXchg16b)
+        {
+            if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_LOCK))
+                IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg16b, pu128MemDst, pu128RaxRdx, pu128RbxRcx, pEFlags);
+            else
+                IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg16b_locked, pu128MemDst, pu128RaxRdx, pu128RbxRcx, pEFlags);
+        }
+        else
+# endif
+        {
+            /* Note! The fallback for 32-bit systems and systems without CX16 is multiple
+                     accesses and not all all atomic, which works fine on in UNI CPU guest
+                     configuration (ignoring DMA).  If guest SMP is active we have no choice
+                     but to use a rendezvous callback here.  Sigh. */
+            if (pVCpu->CTX_SUFF(pVM)->cCpus == 1)
+                IEM_MC_CALL_VOID_AIMPL_4(iemAImpl_cmpxchg16b_fallback, pu128MemDst, pu128RaxRdx, pu128RbxRcx, pEFlags);
+            else
+            {
+                IEM_MC_CALL_CIMPL_4(iemCImpl_cmpxchg16b_fallback_rendezvous, pu128MemDst, pu128RaxRdx, pu128RbxRcx, pEFlags);
+                /* Does not get here, tail code is duplicated in iemCImpl_cmpxchg16b_fallback_rendezvous. */
+            }
+        }
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu128MemDst, IEM_ACCESS_DATA_RW);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_IF_EFL_BIT_NOT_SET(X86_EFL_ZF)
+            IEM_MC_STORE_GREG_U64(X86_GREG_xAX, u128RaxRdx.s.Lo);
+            IEM_MC_STORE_GREG_U64(X86_GREG_xDX, u128RaxRdx.s.Hi);
+        IEM_MC_ENDIF();
+        IEM_MC_ADVANCE_RIP();
+
+        IEM_MC_END();
+        return VINF_SUCCESS;
+#endif
+    }
+    Log(("cmpxchg16b -> #UD\n"));
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+FNIEMOP_DEF_1(iemOp_Grp9_cmpxchg8bOr16b, uint8_t, bRm)
+{
+    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+        return FNIEMOP_CALL_1(iemOp_Grp9_cmpxchg16b_Mdq, bRm);
+    return FNIEMOP_CALL_1(iemOp_Grp9_cmpxchg8b_Mq, bRm);
+}
+
+/** Opcode 0x0f 0xc7 11/6. */
+FNIEMOP_UD_STUB_1(iemOp_Grp9_rdrand_Rv, uint8_t, bRm);
+
+/** Opcode 0x0f 0xc7 !11/6. */
+FNIEMOP_UD_STUB_1(iemOp_Grp9_vmptrld_Mq, uint8_t, bRm);
+
+/** Opcode 0x66 0x0f 0xc7 !11/6. */
+FNIEMOP_UD_STUB_1(iemOp_Grp9_vmclear_Mq, uint8_t, bRm);
+
+/** Opcode 0xf3 0x0f 0xc7 !11/6. */
+FNIEMOP_UD_STUB_1(iemOp_Grp9_vmxon_Mq, uint8_t, bRm);
+
+/** Opcode [0xf3] 0x0f 0xc7 !11/7. */
+FNIEMOP_UD_STUB_1(iemOp_Grp9_vmptrst_Mq, uint8_t, bRm);
+
+/** Opcode 0x0f 0xc7 11/7. */
+FNIEMOP_UD_STUB_1(iemOp_Grp9_rdseed_Rv, uint8_t, bRm);
+
+
+/**
+ * Group 9 jump table for register variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup9RegReg[] =
+{   /* pfx:  none,                          066h,                           0f3h,                           0f2h */
+    /* /0 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /1 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /2 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /3 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /4 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /5 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /6 */ iemOp_Grp9_rdrand_Rv,          iemOp_Grp9_rdrand_Rv,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /7 */ iemOp_Grp9_rdseed_Rv,          iemOp_Grp9_rdseed_Rv,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+};
+AssertCompile(RT_ELEMENTS(g_apfnGroup9RegReg) == 8*4);
+
+
+/**
+ * Group 9 jump table for memory variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnGroup9MemReg[] =
+{   /* pfx:  none,                          066h,                           0f3h,                           0f2h */
+    /* /0 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /1 */ iemOp_Grp9_cmpxchg8bOr16b,     iemOp_Grp9_cmpxchg8bOr16b,      iemOp_Grp9_cmpxchg8bOr16b,      iemOp_Grp9_cmpxchg8bOr16b, /* see bs3-cpu-decoding-1 */
+    /* /2 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /3 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /4 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /5 */ IEMOP_X4(iemOp_InvalidWithRM),
+    /* /6 */ iemOp_Grp9_vmptrld_Mq,         iemOp_Grp9_vmclear_Mq,          iemOp_Grp9_vmxon_Mq,            iemOp_InvalidWithRM,
+    /* /7 */ iemOp_Grp9_vmptrst_Mq,         iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+};
+AssertCompile(RT_ELEMENTS(g_apfnGroup9MemReg) == 8*4);
+
+
+/** Opcode 0x0f 0xc7. */
+FNIEMOP_DEF(iemOp_Grp9)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(g_apfnGroup9RegReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                 + pVCpu->iem.s.idxPrefix], bRm);
+    /* memory, register */
+    return FNIEMOP_CALL_1(g_apfnGroup9MemReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                             + pVCpu->iem.s.idxPrefix], bRm);
+}
+
+
+/**
+ * Common 'bswap register' helper.
+ */
+FNIEMOP_DEF_1(iemOpCommonBswapGReg, uint8_t, iReg)
+{
+    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+    switch (pVCpu->iem.s.enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(1, 0);
+            IEM_MC_ARG(uint32_t *,  pu32Dst, 0);
+            IEM_MC_REF_GREG_U32(pu32Dst, iReg);     /* Don't clear the high dword! */
+            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u16, pu32Dst);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(1, 0);
+            IEM_MC_ARG(uint32_t *,  pu32Dst, 0);
+            IEM_MC_REF_GREG_U32(pu32Dst, iReg);
+            IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF(pu32Dst);
+            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u32, pu32Dst);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(1, 0);
+            IEM_MC_ARG(uint64_t *,  pu64Dst, 0);
+            IEM_MC_REF_GREG_U64(pu64Dst, iReg);
+            IEM_MC_CALL_VOID_AIMPL_1(iemAImpl_bswap_u64, pu64Dst);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0x0f 0xc8. */
+FNIEMOP_DEF(iemOp_bswap_rAX_r8)
+{
+    IEMOP_MNEMONIC(bswap_rAX_r8, "bswap rAX/r8");
+    /* Note! Intel manuals states that R8-R15 can be accessed by using a REX.X
+             prefix.  REX.B is the correct prefix it appears.  For a parallel
+             case, see iemOp_mov_AL_Ib and iemOp_mov_eAX_Iv. */
+    IEMOP_HLP_MIN_486();
+    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xAX | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0xc9. */
+FNIEMOP_DEF(iemOp_bswap_rCX_r9)
+{
+    IEMOP_MNEMONIC(bswap_rCX_r9, "bswap rCX/r9");
+    IEMOP_HLP_MIN_486();
+    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xCX | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0xca. */
+FNIEMOP_DEF(iemOp_bswap_rDX_r10)
+{
+    IEMOP_MNEMONIC(bswap_rDX_r9, "bswap rDX/r9");
+    IEMOP_HLP_MIN_486();
+    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xDX | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0xcb. */
+FNIEMOP_DEF(iemOp_bswap_rBX_r11)
+{
+    IEMOP_MNEMONIC(bswap_rBX_r9, "bswap rBX/r9");
+    IEMOP_HLP_MIN_486();
+    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xBX | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0xcc. */
+FNIEMOP_DEF(iemOp_bswap_rSP_r12)
+{
+    IEMOP_MNEMONIC(bswap_rSP_r12, "bswap rSP/r12");
+    IEMOP_HLP_MIN_486();
+    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xSP | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0xcd. */
+FNIEMOP_DEF(iemOp_bswap_rBP_r13)
+{
+    IEMOP_MNEMONIC(bswap_rBP_r13, "bswap rBP/r13");
+    IEMOP_HLP_MIN_486();
+    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xBP | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0xce. */
+FNIEMOP_DEF(iemOp_bswap_rSI_r14)
+{
+    IEMOP_MNEMONIC(bswap_rSI_r14, "bswap rSI/r14");
+    IEMOP_HLP_MIN_486();
+    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xSI | pVCpu->iem.s.uRexB);
+}
+
+
+/** Opcode 0x0f 0xcf. */
+FNIEMOP_DEF(iemOp_bswap_rDI_r15)
+{
+    IEMOP_MNEMONIC(bswap_rDI_r15, "bswap rDI/r15");
+    IEMOP_HLP_MIN_486();
+    return FNIEMOP_CALL_1(iemOpCommonBswapGReg, X86_GREG_xDI | pVCpu->iem.s.uRexB);
+}
+
+
+/*  Opcode      0x0f 0xd0 - invalid */
+/** Opcode 0x66 0x0f 0xd0 - addsubpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_addsubpd_Vpd_Wpd);
+/*  Opcode 0xf3 0x0f 0xd0 - invalid */
+/** Opcode 0xf2 0x0f 0xd0 - addsubps Vps, Wps */
+FNIEMOP_STUB(iemOp_addsubps_Vps_Wps);
+
+/** Opcode      0x0f 0xd1 - psrlw Pq, Qq */
+FNIEMOP_STUB(iemOp_psrlw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xd1 - psrlw Vx, W */
+FNIEMOP_STUB(iemOp_psrlw_Vx_W);
+/*  Opcode 0xf3 0x0f 0xd1 - invalid */
+/*  Opcode 0xf2 0x0f 0xd1 - invalid */
+
+/** Opcode      0x0f 0xd2 - psrld Pq, Qq */
+FNIEMOP_STUB(iemOp_psrld_Pq_Qq);
+/** Opcode 0x66 0x0f 0xd2 - psrld Vx, Wx */
+FNIEMOP_STUB(iemOp_psrld_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xd2 - invalid */
+/*  Opcode 0xf2 0x0f 0xd2 - invalid */
+
+/** Opcode      0x0f 0xd3 - psrlq Pq, Qq */
+FNIEMOP_STUB(iemOp_psrlq_Pq_Qq);
+/** Opcode 0x66 0x0f 0xd3 - psrlq Vx, Wx */
+FNIEMOP_STUB(iemOp_psrlq_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xd3 - invalid */
+/*  Opcode 0xf2 0x0f 0xd3 - invalid */
+
+/** Opcode      0x0f 0xd4 - paddq Pq, Qq */
+FNIEMOP_STUB(iemOp_paddq_Pq_Qq);
+/** Opcode 0x66 0x0f 0xd4 - paddq Vx, W */
+FNIEMOP_STUB(iemOp_paddq_Vx_W);
+/*  Opcode 0xf3 0x0f 0xd4 - invalid */
+/*  Opcode 0xf2 0x0f 0xd4 - invalid */
+
+/** Opcode      0x0f 0xd5 - pmullw Pq, Qq */
+FNIEMOP_STUB(iemOp_pmullw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xd5 - pmullw Vx, Wx */
+FNIEMOP_STUB(iemOp_pmullw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xd5 - invalid */
+/*  Opcode 0xf2 0x0f 0xd5 - invalid */
+
+/*  Opcode      0x0f 0xd6 - invalid */
+
+/**
+ * @opcode      0xd6
+ * @oppfx       0x66
+ * @opcpuid     sse2
+ * @opgroup     og_sse2_pcksclr_datamove
+ * @opxcpttype  none
+ * @optest      op1=-1 op2=2 -> op1=2
+ * @optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_DEF(iemOp_movq_Wq_Vq)
+{
+    IEMOP_MNEMONIC2(MR, MOVQ, movq, WqZxReg, Vq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+
+        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_XREG_U64_ZX_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Memory, register.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xf3 0x0f 0xd6 - movq2dq Vdq, Nq */
+FNIEMOP_STUB(iemOp_movq2dq_Vdq_Nq);
+/** Opcode 0xf2 0x0f 0xd6 - movdq2q Pq, Uq */
+FNIEMOP_STUB(iemOp_movdq2q_Pq_Uq);
+#if 0
+FNIEMOP_DEF(iemOp_movq_Wq_Vq__movq2dq_Vdq_Nq__movdq2q_Pq_Uq)
+{
+    /* Docs says register only. */
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+
+    switch (pVCpu->iem.s.fPrefixes & (IEM_OP_PRF_SIZE_OP | IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
+    {
+        case IEM_OP_PRF_SIZE_OP: /* SSE */
+            I E M O P _ M N E M O N I C(movq_Wq_Vq, "movq Wq,Vq");
+            IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_SSE | DISOPTYPE_HARMLESS);
+            IEM_MC_BEGIN(2, 0);
+            IEM_MC_ARG(uint64_t *,           pDst, 0);
+            IEM_MC_ARG(PCRTUINT128U,         pSrc, 1);
+            IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+            IEM_MC_PREPARE_SSE_USAGE();
+            IEM_MC_REF_GREG_U64(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+            IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+            IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_pmovmskb_u128, pDst, pSrc);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case 0: /* MMX */
+            I E M O P _ M N E M O N I C(pmovmskb_Gd_Udq, "pmovmskb Gd,Udq");
+            IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_MMX | DISOPTYPE_HARMLESS);
+            IEM_MC_BEGIN(2, 0);
+            IEM_MC_ARG(uint64_t *,          pDst, 0);
+            IEM_MC_ARG(uint64_t const *,    pSrc, 1);
+            IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
+            IEM_MC_PREPARE_FPU_USAGE();
+            IEM_MC_REF_GREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+            IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
+            IEM_MC_CALL_MMX_AIMPL_2(iemAImpl_pmovmskb_u64, pDst, pSrc);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        default:
+            return IEMOP_RAISE_INVALID_OPCODE();
+    }
+}
+#endif
+
+
+/** Opcode      0x0f 0xd7 - pmovmskb Gd, Nq */
+FNIEMOP_DEF(iemOp_pmovmskb_Gd_Nq)
+{
+    /* Note! Taking the lazy approch here wrt the high 32-bits of the GREG. */
+    /** @todo testcase: Check that the instruction implicitly clears the high
+     *        bits in 64-bit mode.  The REX.W is first necessary when VLMAX > 256
+     *        and opcode modifications are made to work with the whole width (not
+     *        just 128). */
+    IEMOP_MNEMONIC(pmovmskb_Gd_Udq, "pmovmskb Gd,Nq");
+    /* Docs says register only. */
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT)) /** @todo test that this is registers only. */
+    {
+        IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_MMX | DISOPTYPE_HARMLESS);
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(uint64_t *,          pDst, 0);
+        IEM_MC_ARG(uint64_t const *,    pSrc, 1);
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT();
+        IEM_MC_PREPARE_FPU_USAGE();
+        IEM_MC_REF_GREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_REF_MREG_U64_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
+        IEM_MC_CALL_MMX_AIMPL_2(iemAImpl_pmovmskb_u64, pDst, pSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+        return VINF_SUCCESS;
+    }
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+/** Opcode 0x66 0x0f 0xd7 -  */
+FNIEMOP_DEF(iemOp_pmovmskb_Gd_Ux)
+{
+    /* Note! Taking the lazy approch here wrt the high 32-bits of the GREG. */
+    /** @todo testcase: Check that the instruction implicitly clears the high
+     *        bits in 64-bit mode.  The REX.W is first necessary when VLMAX > 256
+     *        and opcode modifications are made to work with the whole width (not
+     *        just 128). */
+    IEMOP_MNEMONIC(pmovmskb_Gd_Nq, "vpmovmskb Gd, Ux");
+    /* Docs says register only. */
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT)) /** @todo test that this is registers only. */
+    {
+        IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_SSE | DISOPTYPE_HARMLESS);
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(uint64_t *,           pDst, 0);
+        IEM_MC_ARG(PCRTUINT128U,         pSrc, 1);
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_GREG_U64(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_pmovmskb_u128, pDst, pSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+        return VINF_SUCCESS;
+    }
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+/*  Opcode 0xf3 0x0f 0xd7 - invalid */
+/*  Opcode 0xf2 0x0f 0xd7 - invalid */
+
+
+/** Opcode      0x0f 0xd8 - psubusb Pq, Qq */
+FNIEMOP_STUB(iemOp_psubusb_Pq_Qq);
+/** Opcode 0x66 0x0f 0xd8 - psubusb Vx, W */
+FNIEMOP_STUB(iemOp_psubusb_Vx_W);
+/*  Opcode 0xf3 0x0f 0xd8 - invalid */
+/*  Opcode 0xf2 0x0f 0xd8 - invalid */
+
+/** Opcode      0x0f 0xd9 - psubusw Pq, Qq */
+FNIEMOP_STUB(iemOp_psubusw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xd9 - psubusw Vx, Wx */
+FNIEMOP_STUB(iemOp_psubusw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xd9 - invalid */
+/*  Opcode 0xf2 0x0f 0xd9 - invalid */
+
+/** Opcode      0x0f 0xda - pminub Pq, Qq */
+FNIEMOP_STUB(iemOp_pminub_Pq_Qq);
+/** Opcode 0x66 0x0f 0xda - pminub Vx, Wx */
+FNIEMOP_STUB(iemOp_pminub_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xda - invalid */
+/*  Opcode 0xf2 0x0f 0xda - invalid */
+
+/** Opcode      0x0f 0xdb - pand Pq, Qq */
+FNIEMOP_STUB(iemOp_pand_Pq_Qq);
+/** Opcode 0x66 0x0f 0xdb - pand Vx, W */
+FNIEMOP_STUB(iemOp_pand_Vx_W);
+/*  Opcode 0xf3 0x0f 0xdb - invalid */
+/*  Opcode 0xf2 0x0f 0xdb - invalid */
+
+/** Opcode      0x0f 0xdc - paddusb Pq, Qq */
+FNIEMOP_STUB(iemOp_paddusb_Pq_Qq);
+/** Opcode 0x66 0x0f 0xdc - paddusb Vx, Wx */
+FNIEMOP_STUB(iemOp_paddusb_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xdc - invalid */
+/*  Opcode 0xf2 0x0f 0xdc - invalid */
+
+/** Opcode      0x0f 0xdd - paddusw Pq, Qq */
+FNIEMOP_STUB(iemOp_paddusw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xdd - paddusw Vx, Wx */
+FNIEMOP_STUB(iemOp_paddusw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xdd - invalid */
+/*  Opcode 0xf2 0x0f 0xdd - invalid */
+
+/** Opcode      0x0f 0xde - pmaxub Pq, Qq */
+FNIEMOP_STUB(iemOp_pmaxub_Pq_Qq);
+/** Opcode 0x66 0x0f 0xde - pmaxub Vx, W */
+FNIEMOP_STUB(iemOp_pmaxub_Vx_W);
+/*  Opcode 0xf3 0x0f 0xde - invalid */
+/*  Opcode 0xf2 0x0f 0xde - invalid */
+
+/** Opcode      0x0f 0xdf - pandn Pq, Qq */
+FNIEMOP_STUB(iemOp_pandn_Pq_Qq);
+/** Opcode 0x66 0x0f 0xdf - pandn Vx, Wx */
+FNIEMOP_STUB(iemOp_pandn_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xdf - invalid */
+/*  Opcode 0xf2 0x0f 0xdf - invalid */
+
+/** Opcode      0x0f 0xe0 - pavgb Pq, Qq */
+FNIEMOP_STUB(iemOp_pavgb_Pq_Qq);
+/** Opcode 0x66 0x0f 0xe0 - pavgb Vx, Wx */
+FNIEMOP_STUB(iemOp_pavgb_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xe0 - invalid */
+/*  Opcode 0xf2 0x0f 0xe0 - invalid */
+
+/** Opcode      0x0f 0xe1 - psraw Pq, Qq */
+FNIEMOP_STUB(iemOp_psraw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xe1 - psraw Vx, W */
+FNIEMOP_STUB(iemOp_psraw_Vx_W);
+/*  Opcode 0xf3 0x0f 0xe1 - invalid */
+/*  Opcode 0xf2 0x0f 0xe1 - invalid */
+
+/** Opcode      0x0f 0xe2 - psrad Pq, Qq */
+FNIEMOP_STUB(iemOp_psrad_Pq_Qq);
+/** Opcode 0x66 0x0f 0xe2 - psrad Vx, Wx */
+FNIEMOP_STUB(iemOp_psrad_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xe2 - invalid */
+/*  Opcode 0xf2 0x0f 0xe2 - invalid */
+
+/** Opcode      0x0f 0xe3 - pavgw Pq, Qq */
+FNIEMOP_STUB(iemOp_pavgw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xe3 - pavgw Vx, Wx */
+FNIEMOP_STUB(iemOp_pavgw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xe3 - invalid */
+/*  Opcode 0xf2 0x0f 0xe3 - invalid */
+
+/** Opcode      0x0f 0xe4 - pmulhuw Pq, Qq */
+FNIEMOP_STUB(iemOp_pmulhuw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xe4 - pmulhuw Vx, W */
+FNIEMOP_STUB(iemOp_pmulhuw_Vx_W);
+/*  Opcode 0xf3 0x0f 0xe4 - invalid */
+/*  Opcode 0xf2 0x0f 0xe4 - invalid */
+
+/** Opcode      0x0f 0xe5 - pmulhw Pq, Qq */
+FNIEMOP_STUB(iemOp_pmulhw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xe5 - pmulhw Vx, Wx */
+FNIEMOP_STUB(iemOp_pmulhw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xe5 - invalid */
+/*  Opcode 0xf2 0x0f 0xe5 - invalid */
+
+/*  Opcode      0x0f 0xe6 - invalid */
+/** Opcode 0x66 0x0f 0xe6 - cvttpd2dq Vx, Wpd */
+FNIEMOP_STUB(iemOp_cvttpd2dq_Vx_Wpd);
+/** Opcode 0xf3 0x0f 0xe6 - cvtdq2pd Vx, Wpd */
+FNIEMOP_STUB(iemOp_cvtdq2pd_Vx_Wpd);
+/** Opcode 0xf2 0x0f 0xe6 - cvtpd2dq Vx, Wpd */
+FNIEMOP_STUB(iemOp_cvtpd2dq_Vx_Wpd);
+
+
+/** Opcode      0x0f 0xe7 - movntq Mq, Pq */
+FNIEMOP_DEF(iemOp_movntq_Mq_Pq)
+{
+    IEMOP_MNEMONIC(movntq_Mq_Pq, "movntq Mq,Pq");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* Register, memory. */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint64_t,                  uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ();
+
+        IEM_MC_FETCH_MREG_U64(uSrc, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+        IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+        return VINF_SUCCESS;
+    }
+    /* The register, register encoding is invalid. */
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+/** Opcode 0x66 0x0f 0xe7 - movntdq Mx, Vx */
+FNIEMOP_DEF(iemOp_movntdq_Mx_Vx)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* Register, memory. */
+        IEMOP_MNEMONIC(movntdq_Mx_Vx, "movntdq Mx,Vx");
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+
+        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+        return VINF_SUCCESS;
+    }
+
+    /* The register, register encoding is invalid. */
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+/*  Opcode 0xf3 0x0f 0xe7 - invalid */
+/*  Opcode 0xf2 0x0f 0xe7 - invalid */
+
+
+/** Opcode      0x0f 0xe8 - psubsb Pq, Qq */
+FNIEMOP_STUB(iemOp_psubsb_Pq_Qq);
+/** Opcode 0x66 0x0f 0xe8 - psubsb Vx, W */
+FNIEMOP_STUB(iemOp_psubsb_Vx_W);
+/*  Opcode 0xf3 0x0f 0xe8 - invalid */
+/*  Opcode 0xf2 0x0f 0xe8 - invalid */
+
+/** Opcode      0x0f 0xe9 - psubsw Pq, Qq */
+FNIEMOP_STUB(iemOp_psubsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xe9 - psubsw Vx, Wx */
+FNIEMOP_STUB(iemOp_psubsw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xe9 - invalid */
+/*  Opcode 0xf2 0x0f 0xe9 - invalid */
+
+/** Opcode      0x0f 0xea - pminsw Pq, Qq */
+FNIEMOP_STUB(iemOp_pminsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xea - pminsw Vx, Wx */
+FNIEMOP_STUB(iemOp_pminsw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xea - invalid */
+/*  Opcode 0xf2 0x0f 0xea - invalid */
+
+/** Opcode      0x0f 0xeb - por Pq, Qq */
+FNIEMOP_STUB(iemOp_por_Pq_Qq);
+/** Opcode 0x66 0x0f 0xeb - por Vx, W */
+FNIEMOP_STUB(iemOp_por_Vx_W);
+/*  Opcode 0xf3 0x0f 0xeb - invalid */
+/*  Opcode 0xf2 0x0f 0xeb - invalid */
+
+/** Opcode      0x0f 0xec - paddsb Pq, Qq */
+FNIEMOP_STUB(iemOp_paddsb_Pq_Qq);
+/** Opcode 0x66 0x0f 0xec - paddsb Vx, Wx */
+FNIEMOP_STUB(iemOp_paddsb_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xec - invalid */
+/*  Opcode 0xf2 0x0f 0xec - invalid */
+
+/** Opcode      0x0f 0xed - paddsw Pq, Qq */
+FNIEMOP_STUB(iemOp_paddsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xed - paddsw Vx, Wx */
+FNIEMOP_STUB(iemOp_paddsw_Vx_Wx);
+/*  Opcode 0xf3 0x0f 0xed - invalid */
+/*  Opcode 0xf2 0x0f 0xed - invalid */
+
+/** Opcode      0x0f 0xee - pmaxsw Pq, Qq */
+FNIEMOP_STUB(iemOp_pmaxsw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xee - pmaxsw Vx, W */
+FNIEMOP_STUB(iemOp_pmaxsw_Vx_W);
+/*  Opcode 0xf3 0x0f 0xee - invalid */
+/*  Opcode 0xf2 0x0f 0xee - invalid */
+
+
+/** Opcode      0x0f 0xef - pxor Pq, Qq */
+FNIEMOP_DEF(iemOp_pxor_Pq_Qq)
+{
+    IEMOP_MNEMONIC(pxor, "pxor");
+    return FNIEMOP_CALL_1(iemOpCommonMmx_FullFull_To_Full, &g_iemAImpl_pxor);
+}
+
+/** Opcode 0x66 0x0f 0xef - pxor Vx, Wx */
+FNIEMOP_DEF(iemOp_pxor_Vx_Wx)
+{
+    IEMOP_MNEMONIC(pxor_Vx_Wx, "pxor");
+    return FNIEMOP_CALL_1(iemOpCommonSse2_FullFull_To_Full, &g_iemAImpl_pxor);
+}
+
+/*  Opcode 0xf3 0x0f 0xef - invalid */
+/*  Opcode 0xf2 0x0f 0xef - invalid */
+
+/*  Opcode      0x0f 0xf0 - invalid */
+/*  Opcode 0x66 0x0f 0xf0 - invalid */
+/** Opcode 0xf2 0x0f 0xf0 - lddqu Vx, Mx */
+FNIEMOP_STUB(iemOp_lddqu_Vx_Mx);
+
+/** Opcode      0x0f 0xf1 - psllw Pq, Qq */
+FNIEMOP_STUB(iemOp_psllw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xf1 - psllw Vx, W */
+FNIEMOP_STUB(iemOp_psllw_Vx_W);
+/*  Opcode 0xf2 0x0f 0xf1 - invalid */
+
+/** Opcode      0x0f 0xf2 - pslld Pq, Qq */
+FNIEMOP_STUB(iemOp_pslld_Pq_Qq);
+/** Opcode 0x66 0x0f 0xf2 - pslld Vx, Wx */
+FNIEMOP_STUB(iemOp_pslld_Vx_Wx);
+/*  Opcode 0xf2 0x0f 0xf2 - invalid */
+
+/** Opcode      0x0f 0xf3 - psllq Pq, Qq */
+FNIEMOP_STUB(iemOp_psllq_Pq_Qq);
+/** Opcode 0x66 0x0f 0xf3 - psllq Vx, Wx */
+FNIEMOP_STUB(iemOp_psllq_Vx_Wx);
+/*  Opcode 0xf2 0x0f 0xf3 - invalid */
+
+/** Opcode      0x0f 0xf4 - pmuludq Pq, Qq */
+FNIEMOP_STUB(iemOp_pmuludq_Pq_Qq);
+/** Opcode 0x66 0x0f 0xf4 - pmuludq Vx, W */
+FNIEMOP_STUB(iemOp_pmuludq_Vx_W);
+/*  Opcode 0xf2 0x0f 0xf4 - invalid */
+
+/** Opcode      0x0f 0xf5 - pmaddwd Pq, Qq */
+FNIEMOP_STUB(iemOp_pmaddwd_Pq_Qq);
+/** Opcode 0x66 0x0f 0xf5 - pmaddwd Vx, Wx */
+FNIEMOP_STUB(iemOp_pmaddwd_Vx_Wx);
+/*  Opcode 0xf2 0x0f 0xf5 - invalid */
+
+/** Opcode      0x0f 0xf6 - psadbw Pq, Qq */
+FNIEMOP_STUB(iemOp_psadbw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xf6 - psadbw Vx, Wx */
+FNIEMOP_STUB(iemOp_psadbw_Vx_Wx);
+/*  Opcode 0xf2 0x0f 0xf6 - invalid */
+
+/** Opcode      0x0f 0xf7 - maskmovq Pq, Nq */
+FNIEMOP_STUB(iemOp_maskmovq_Pq_Nq);
+/** Opcode 0x66 0x0f 0xf7 - maskmovdqu Vdq, Udq */
+FNIEMOP_STUB(iemOp_maskmovdqu_Vdq_Udq);
+/*  Opcode 0xf2 0x0f 0xf7 - invalid */
+
+/** Opcode      0x0f 0xf8 - psubb Pq, Qq */
+FNIEMOP_STUB(iemOp_psubb_Pq_Qq);
+/** Opcode 0x66 0x0f 0xf8 - psubb Vx, W */
+FNIEMOP_STUB(iemOp_psubb_Vx_W);
+/*  Opcode 0xf2 0x0f 0xf8 - invalid */
+
+/** Opcode      0x0f 0xf9 - psubw Pq, Qq */
+FNIEMOP_STUB(iemOp_psubw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xf9 - psubw Vx, Wx */
+FNIEMOP_STUB(iemOp_psubw_Vx_Wx);
+/*  Opcode 0xf2 0x0f 0xf9 - invalid */
+
+/** Opcode      0x0f 0xfa - psubd Pq, Qq */
+FNIEMOP_STUB(iemOp_psubd_Pq_Qq);
+/** Opcode 0x66 0x0f 0xfa - psubd Vx, Wx */
+FNIEMOP_STUB(iemOp_psubd_Vx_Wx);
+/*  Opcode 0xf2 0x0f 0xfa - invalid */
+
+/** Opcode      0x0f 0xfb - psubq Pq, Qq */
+FNIEMOP_STUB(iemOp_psubq_Pq_Qq);
+/** Opcode 0x66 0x0f 0xfb - psubq Vx, W */
+FNIEMOP_STUB(iemOp_psubq_Vx_W);
+/*  Opcode 0xf2 0x0f 0xfb - invalid */
+
+/** Opcode      0x0f 0xfc - paddb Pq, Qq */
+FNIEMOP_STUB(iemOp_paddb_Pq_Qq);
+/** Opcode 0x66 0x0f 0xfc - paddb Vx, Wx */
+FNIEMOP_STUB(iemOp_paddb_Vx_Wx);
+/*  Opcode 0xf2 0x0f 0xfc - invalid */
+
+/** Opcode      0x0f 0xfd - paddw Pq, Qq */
+FNIEMOP_STUB(iemOp_paddw_Pq_Qq);
+/** Opcode 0x66 0x0f 0xfd - paddw Vx, Wx */
+FNIEMOP_STUB(iemOp_paddw_Vx_Wx);
+/*  Opcode 0xf2 0x0f 0xfd - invalid */
+
+/** Opcode      0x0f 0xfe - paddd Pq, Qq */
+FNIEMOP_STUB(iemOp_paddd_Pq_Qq);
+/** Opcode 0x66 0x0f 0xfe - paddd Vx, W */
+FNIEMOP_STUB(iemOp_paddd_Vx_W);
+/*  Opcode 0xf2 0x0f 0xfe - invalid */
+
+
+/** Opcode **** 0x0f 0xff - UD0 */
+FNIEMOP_DEF(iemOp_ud0)
+{
+    IEMOP_MNEMONIC(ud0, "ud0");
+    if (pVCpu->iem.s.enmCpuVendor == CPUMCPUVENDOR_INTEL)
+    {
+        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); RT_NOREF(bRm);
+#ifndef TST_IEM_CHECK_MC
+        RTGCPTR      GCPtrEff;
+        VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+#endif
+        IEMOP_HLP_DONE_DECODING();
+    }
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+
+/**
+ * Two byte opcode map, first byte 0x0f.
+ *
+ * @remarks The g_apfnVexMap1 table is currently a subset of this one, so please
+ *          check if it needs updating as well when making changes.
+ */
+IEM_STATIC const PFNIEMOP g_apfnTwoByteMap[] =
+{
+    /*          no prefix,                  066h prefix                 f3h prefix,                 f2h prefix */
+    /* 0x00 */  IEMOP_X4(iemOp_Grp6),
+    /* 0x01 */  IEMOP_X4(iemOp_Grp7),
+    /* 0x02 */  IEMOP_X4(iemOp_lar_Gv_Ew),
+    /* 0x03 */  IEMOP_X4(iemOp_lsl_Gv_Ew),
+    /* 0x04 */  IEMOP_X4(iemOp_Invalid),
+    /* 0x05 */  IEMOP_X4(iemOp_syscall),
+    /* 0x06 */  IEMOP_X4(iemOp_clts),
+    /* 0x07 */  IEMOP_X4(iemOp_sysret),
+    /* 0x08 */  IEMOP_X4(iemOp_invd),
+    /* 0x09 */  IEMOP_X4(iemOp_wbinvd),
+    /* 0x0a */  IEMOP_X4(iemOp_Invalid),
+    /* 0x0b */  IEMOP_X4(iemOp_ud2),
+    /* 0x0c */  IEMOP_X4(iemOp_Invalid),
+    /* 0x0d */  IEMOP_X4(iemOp_nop_Ev_GrpP),
+    /* 0x0e */  IEMOP_X4(iemOp_femms),
+    /* 0x0f */  IEMOP_X4(iemOp_3Dnow),
+
+    /* 0x10 */  iemOp_movups_Vps_Wps,       iemOp_movupd_Vpd_Wpd,      iemOp_movss_Vss_Wss,        iemOp_movsd_Vx_Wsd,
+    /* 0x11 */  iemOp_movups_Wps_Vps,       iemOp_movupd_Wpd_Vpd,      iemOp_movss_Wss_Vss,        iemOp_movsd_Wsd_Vsd,
+    /* 0x12 */  iemOp_movlps_Vq_Mq__movhlps, iemOp_movlpd_Vq_Mq,       iemOp_movsldup_Vdq_Wdq,     iemOp_movddup_Vdq_Wdq,
+    /* 0x13 */  iemOp_movlps_Mq_Vq,         iemOp_movlpd_Mq_Vq,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x14 */  iemOp_unpcklps_Vx_Wx,       iemOp_unpcklpd_Vx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x15 */  iemOp_unpckhps_Vx_Wx,       iemOp_unpckhpd_Vx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x16 */  iemOp_movhpsv1_Vdq_Mq__movlhps_Vdq_Uq,  iemOp_movhpdv1_Vdq_Mq, iemOp_movshdup_Vx_Wx, iemOp_InvalidNeedRM,
+    /* 0x17 */  iemOp_movhpsv1_Mq_Vq,       iemOp_movhpdv1_Mq_Vq,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x18 */  IEMOP_X4(iemOp_prefetch_Grp16),
+    /* 0x19 */  IEMOP_X4(iemOp_nop_Ev),
+    /* 0x1a */  IEMOP_X4(iemOp_nop_Ev),
+    /* 0x1b */  IEMOP_X4(iemOp_nop_Ev),
+    /* 0x1c */  IEMOP_X4(iemOp_nop_Ev),
+    /* 0x1d */  IEMOP_X4(iemOp_nop_Ev),
+    /* 0x1e */  IEMOP_X4(iemOp_nop_Ev),
+    /* 0x1f */  IEMOP_X4(iemOp_nop_Ev),
+
+    /* 0x20 */  iemOp_mov_Rd_Cd,            iemOp_mov_Rd_Cd,            iemOp_mov_Rd_Cd,            iemOp_mov_Rd_Cd,
+    /* 0x21 */  iemOp_mov_Rd_Dd,            iemOp_mov_Rd_Dd,            iemOp_mov_Rd_Dd,            iemOp_mov_Rd_Dd,
+    /* 0x22 */  iemOp_mov_Cd_Rd,            iemOp_mov_Cd_Rd,            iemOp_mov_Cd_Rd,            iemOp_mov_Cd_Rd,
+    /* 0x23 */  iemOp_mov_Dd_Rd,            iemOp_mov_Dd_Rd,            iemOp_mov_Dd_Rd,            iemOp_mov_Dd_Rd,
+    /* 0x24 */  iemOp_mov_Rd_Td,            iemOp_mov_Rd_Td,            iemOp_mov_Rd_Td,            iemOp_mov_Rd_Td,
+    /* 0x25 */  iemOp_Invalid,              iemOp_Invalid,              iemOp_Invalid,              iemOp_Invalid,
+    /* 0x26 */  iemOp_mov_Td_Rd,            iemOp_mov_Td_Rd,            iemOp_mov_Td_Rd,            iemOp_mov_Td_Rd,
+    /* 0x27 */  iemOp_Invalid,              iemOp_Invalid,              iemOp_Invalid,              iemOp_Invalid,
+    /* 0x28 */  iemOp_movaps_Vps_Wps,       iemOp_movapd_Vpd_Wpd,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x29 */  iemOp_movaps_Wps_Vps,       iemOp_movapd_Wpd_Vpd,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2a */  iemOp_cvtpi2ps_Vps_Qpi,     iemOp_cvtpi2pd_Vpd_Qpi,     iemOp_cvtsi2ss_Vss_Ey,      iemOp_cvtsi2sd_Vsd_Ey,
+    /* 0x2b */  iemOp_movntps_Mps_Vps,      iemOp_movntpd_Mpd_Vpd,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2c */  iemOp_cvttps2pi_Ppi_Wps,    iemOp_cvttpd2pi_Ppi_Wpd,    iemOp_cvttss2si_Gy_Wss,     iemOp_cvttsd2si_Gy_Wsd,
+    /* 0x2d */  iemOp_cvtps2pi_Ppi_Wps,     iemOp_cvtpd2pi_Qpi_Wpd,     iemOp_cvtss2si_Gy_Wss,      iemOp_cvtsd2si_Gy_Wsd,
+    /* 0x2e */  iemOp_ucomiss_Vss_Wss,      iemOp_ucomisd_Vsd_Wsd,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2f */  iemOp_comiss_Vss_Wss,       iemOp_comisd_Vsd_Wsd,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0x30 */  IEMOP_X4(iemOp_wrmsr),
+    /* 0x31 */  IEMOP_X4(iemOp_rdtsc),
+    /* 0x32 */  IEMOP_X4(iemOp_rdmsr),
+    /* 0x33 */  IEMOP_X4(iemOp_rdpmc),
+    /* 0x34 */  IEMOP_X4(iemOp_sysenter),
+    /* 0x35 */  IEMOP_X4(iemOp_sysexit),
+    /* 0x36 */  IEMOP_X4(iemOp_Invalid),
+    /* 0x37 */  IEMOP_X4(iemOp_getsec),
+    /* 0x38 */  IEMOP_X4(iemOp_3byte_Esc_0f_38),
+    /* 0x39 */  IEMOP_X4(iemOp_InvalidNeed3ByteEscRM),
+    /* 0x3a */  IEMOP_X4(iemOp_3byte_Esc_0f_3a),
+    /* 0x3b */  IEMOP_X4(iemOp_InvalidNeed3ByteEscRMImm8),
+    /* 0x3c */  IEMOP_X4(iemOp_InvalidNeed3ByteEscRM),
+    /* 0x3d */  IEMOP_X4(iemOp_InvalidNeed3ByteEscRM),
+    /* 0x3e */  IEMOP_X4(iemOp_InvalidNeed3ByteEscRMImm8),
+    /* 0x3f */  IEMOP_X4(iemOp_InvalidNeed3ByteEscRMImm8),
+
+    /* 0x40 */  IEMOP_X4(iemOp_cmovo_Gv_Ev),
+    /* 0x41 */  IEMOP_X4(iemOp_cmovno_Gv_Ev),
+    /* 0x42 */  IEMOP_X4(iemOp_cmovc_Gv_Ev),
+    /* 0x43 */  IEMOP_X4(iemOp_cmovnc_Gv_Ev),
+    /* 0x44 */  IEMOP_X4(iemOp_cmove_Gv_Ev),
+    /* 0x45 */  IEMOP_X4(iemOp_cmovne_Gv_Ev),
+    /* 0x46 */  IEMOP_X4(iemOp_cmovbe_Gv_Ev),
+    /* 0x47 */  IEMOP_X4(iemOp_cmovnbe_Gv_Ev),
+    /* 0x48 */  IEMOP_X4(iemOp_cmovs_Gv_Ev),
+    /* 0x49 */  IEMOP_X4(iemOp_cmovns_Gv_Ev),
+    /* 0x4a */  IEMOP_X4(iemOp_cmovp_Gv_Ev),
+    /* 0x4b */  IEMOP_X4(iemOp_cmovnp_Gv_Ev),
+    /* 0x4c */  IEMOP_X4(iemOp_cmovl_Gv_Ev),
+    /* 0x4d */  IEMOP_X4(iemOp_cmovnl_Gv_Ev),
+    /* 0x4e */  IEMOP_X4(iemOp_cmovle_Gv_Ev),
+    /* 0x4f */  IEMOP_X4(iemOp_cmovnle_Gv_Ev),
+
+    /* 0x50 */  iemOp_movmskps_Gy_Ups,      iemOp_movmskpd_Gy_Upd,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x51 */  iemOp_sqrtps_Vps_Wps,       iemOp_sqrtpd_Vpd_Wpd,       iemOp_sqrtss_Vss_Wss,       iemOp_sqrtsd_Vsd_Wsd,
+    /* 0x52 */  iemOp_rsqrtps_Vps_Wps,      iemOp_InvalidNeedRM,        iemOp_rsqrtss_Vss_Wss,      iemOp_InvalidNeedRM,
+    /* 0x53 */  iemOp_rcpps_Vps_Wps,        iemOp_InvalidNeedRM,        iemOp_rcpss_Vss_Wss,        iemOp_InvalidNeedRM,
+    /* 0x54 */  iemOp_andps_Vps_Wps,        iemOp_andpd_Vpd_Wpd,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x55 */  iemOp_andnps_Vps_Wps,       iemOp_andnpd_Vpd_Wpd,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x56 */  iemOp_orps_Vps_Wps,         iemOp_orpd_Vpd_Wpd,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x57 */  iemOp_xorps_Vps_Wps,        iemOp_xorpd_Vpd_Wpd,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x58 */  iemOp_addps_Vps_Wps,        iemOp_addpd_Vpd_Wpd,        iemOp_addss_Vss_Wss,        iemOp_addsd_Vsd_Wsd,
+    /* 0x59 */  iemOp_mulps_Vps_Wps,        iemOp_mulpd_Vpd_Wpd,        iemOp_mulss_Vss_Wss,        iemOp_mulsd_Vsd_Wsd,
+    /* 0x5a */  iemOp_cvtps2pd_Vpd_Wps,     iemOp_cvtpd2ps_Vps_Wpd,     iemOp_cvtss2sd_Vsd_Wss,     iemOp_cvtsd2ss_Vss_Wsd,
+    /* 0x5b */  iemOp_cvtdq2ps_Vps_Wdq,     iemOp_cvtps2dq_Vdq_Wps,     iemOp_cvttps2dq_Vdq_Wps,    iemOp_InvalidNeedRM,
+    /* 0x5c */  iemOp_subps_Vps_Wps,        iemOp_subpd_Vpd_Wpd,        iemOp_subss_Vss_Wss,        iemOp_subsd_Vsd_Wsd,
+    /* 0x5d */  iemOp_minps_Vps_Wps,        iemOp_minpd_Vpd_Wpd,        iemOp_minss_Vss_Wss,        iemOp_minsd_Vsd_Wsd,
+    /* 0x5e */  iemOp_divps_Vps_Wps,        iemOp_divpd_Vpd_Wpd,        iemOp_divss_Vss_Wss,        iemOp_divsd_Vsd_Wsd,
+    /* 0x5f */  iemOp_maxps_Vps_Wps,        iemOp_maxpd_Vpd_Wpd,        iemOp_maxss_Vss_Wss,        iemOp_maxsd_Vsd_Wsd,
+
+    /* 0x60 */  iemOp_punpcklbw_Pq_Qd,      iemOp_punpcklbw_Vx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x61 */  iemOp_punpcklwd_Pq_Qd,      iemOp_punpcklwd_Vx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x62 */  iemOp_punpckldq_Pq_Qd,      iemOp_punpckldq_Vx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x63 */  iemOp_packsswb_Pq_Qq,       iemOp_packsswb_Vx_Wx,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x64 */  iemOp_pcmpgtb_Pq_Qq,        iemOp_pcmpgtb_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x65 */  iemOp_pcmpgtw_Pq_Qq,        iemOp_pcmpgtw_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x66 */  iemOp_pcmpgtd_Pq_Qq,        iemOp_pcmpgtd_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x67 */  iemOp_packuswb_Pq_Qq,       iemOp_packuswb_Vx_W,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x68 */  iemOp_punpckhbw_Pq_Qd,      iemOp_punpckhbw_Vx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x69 */  iemOp_punpckhwd_Pq_Qd,      iemOp_punpckhwd_Vx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6a */  iemOp_punpckhdq_Pq_Qd,      iemOp_punpckhdq_Vx_W,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6b */  iemOp_packssdw_Pq_Qd,       iemOp_packssdw_Vx_Wx,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6c */  iemOp_InvalidNeedRM,        iemOp_punpcklqdq_Vx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6d */  iemOp_InvalidNeedRM,        iemOp_punpckhqdq_Vx_W,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6e */  iemOp_movd_q_Pd_Ey,         iemOp_movd_q_Vy_Ey,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6f */  iemOp_movq_Pq_Qq,           iemOp_movdqa_Vx_Wx,         iemOp_movdqu_Vx_Wx,         iemOp_InvalidNeedRM,
+
+    /* 0x70 */  iemOp_pshufw_Pq_Qq_Ib,      iemOp_pshufd_Vx_Wx_Ib,      iemOp_pshufhw_Vx_Wx_Ib,     iemOp_pshuflw_Vx_Wx_Ib,
+    /* 0x71 */  IEMOP_X4(iemOp_Grp12),
+    /* 0x72 */  IEMOP_X4(iemOp_Grp13),
+    /* 0x73 */  IEMOP_X4(iemOp_Grp14),
+    /* 0x74 */  iemOp_pcmpeqb_Pq_Qq,        iemOp_pcmpeqb_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x75 */  iemOp_pcmpeqw_Pq_Qq,        iemOp_pcmpeqw_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x76 */  iemOp_pcmpeqd_Pq_Qq,        iemOp_pcmpeqd_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x77 */  iemOp_emms,                 iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0x78 */  iemOp_vmread_Ey_Gy,         iemOp_AmdGrp17,             iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x79 */  iemOp_vmwrite_Gy_Ey,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x7a */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x7b */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x7c */  iemOp_InvalidNeedRM,        iemOp_haddpd_Vpd_Wpd,       iemOp_InvalidNeedRM,        iemOp_haddps_Vps_Wps,
+    /* 0x7d */  iemOp_InvalidNeedRM,        iemOp_hsubpd_Vpd_Wpd,       iemOp_InvalidNeedRM,        iemOp_hsubps_Vps_Wps,
+    /* 0x7e */  iemOp_movd_q_Ey_Pd,         iemOp_movd_q_Ey_Vy,         iemOp_movq_Vq_Wq,           iemOp_InvalidNeedRM,
+    /* 0x7f */  iemOp_movq_Qq_Pq,           iemOp_movdqa_Wx_Vx,         iemOp_movdqu_Wx_Vx,         iemOp_InvalidNeedRM,
+
+    /* 0x80 */  IEMOP_X4(iemOp_jo_Jv),
+    /* 0x81 */  IEMOP_X4(iemOp_jno_Jv),
+    /* 0x82 */  IEMOP_X4(iemOp_jc_Jv),
+    /* 0x83 */  IEMOP_X4(iemOp_jnc_Jv),
+    /* 0x84 */  IEMOP_X4(iemOp_je_Jv),
+    /* 0x85 */  IEMOP_X4(iemOp_jne_Jv),
+    /* 0x86 */  IEMOP_X4(iemOp_jbe_Jv),
+    /* 0x87 */  IEMOP_X4(iemOp_jnbe_Jv),
+    /* 0x88 */  IEMOP_X4(iemOp_js_Jv),
+    /* 0x89 */  IEMOP_X4(iemOp_jns_Jv),
+    /* 0x8a */  IEMOP_X4(iemOp_jp_Jv),
+    /* 0x8b */  IEMOP_X4(iemOp_jnp_Jv),
+    /* 0x8c */  IEMOP_X4(iemOp_jl_Jv),
+    /* 0x8d */  IEMOP_X4(iemOp_jnl_Jv),
+    /* 0x8e */  IEMOP_X4(iemOp_jle_Jv),
+    /* 0x8f */  IEMOP_X4(iemOp_jnle_Jv),
+
+    /* 0x90 */  IEMOP_X4(iemOp_seto_Eb),
+    /* 0x91 */  IEMOP_X4(iemOp_setno_Eb),
+    /* 0x92 */  IEMOP_X4(iemOp_setc_Eb),
+    /* 0x93 */  IEMOP_X4(iemOp_setnc_Eb),
+    /* 0x94 */  IEMOP_X4(iemOp_sete_Eb),
+    /* 0x95 */  IEMOP_X4(iemOp_setne_Eb),
+    /* 0x96 */  IEMOP_X4(iemOp_setbe_Eb),
+    /* 0x97 */  IEMOP_X4(iemOp_setnbe_Eb),
+    /* 0x98 */  IEMOP_X4(iemOp_sets_Eb),
+    /* 0x99 */  IEMOP_X4(iemOp_setns_Eb),
+    /* 0x9a */  IEMOP_X4(iemOp_setp_Eb),
+    /* 0x9b */  IEMOP_X4(iemOp_setnp_Eb),
+    /* 0x9c */  IEMOP_X4(iemOp_setl_Eb),
+    /* 0x9d */  IEMOP_X4(iemOp_setnl_Eb),
+    /* 0x9e */  IEMOP_X4(iemOp_setle_Eb),
+    /* 0x9f */  IEMOP_X4(iemOp_setnle_Eb),
+
+    /* 0xa0 */  IEMOP_X4(iemOp_push_fs),
+    /* 0xa1 */  IEMOP_X4(iemOp_pop_fs),
+    /* 0xa2 */  IEMOP_X4(iemOp_cpuid),
+    /* 0xa3 */  IEMOP_X4(iemOp_bt_Ev_Gv),
+    /* 0xa4 */  IEMOP_X4(iemOp_shld_Ev_Gv_Ib),
+    /* 0xa5 */  IEMOP_X4(iemOp_shld_Ev_Gv_CL),
+    /* 0xa6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa8 */  IEMOP_X4(iemOp_push_gs),
+    /* 0xa9 */  IEMOP_X4(iemOp_pop_gs),
+    /* 0xaa */  IEMOP_X4(iemOp_rsm),
+    /* 0xab */  IEMOP_X4(iemOp_bts_Ev_Gv),
+    /* 0xac */  IEMOP_X4(iemOp_shrd_Ev_Gv_Ib),
+    /* 0xad */  IEMOP_X4(iemOp_shrd_Ev_Gv_CL),
+    /* 0xae */  IEMOP_X4(iemOp_Grp15),
+    /* 0xaf */  IEMOP_X4(iemOp_imul_Gv_Ev),
+
+    /* 0xb0 */  IEMOP_X4(iemOp_cmpxchg_Eb_Gb),
+    /* 0xb1 */  IEMOP_X4(iemOp_cmpxchg_Ev_Gv),
+    /* 0xb2 */  IEMOP_X4(iemOp_lss_Gv_Mp),
+    /* 0xb3 */  IEMOP_X4(iemOp_btr_Ev_Gv),
+    /* 0xb4 */  IEMOP_X4(iemOp_lfs_Gv_Mp),
+    /* 0xb5 */  IEMOP_X4(iemOp_lgs_Gv_Mp),
+    /* 0xb6 */  IEMOP_X4(iemOp_movzx_Gv_Eb),
+    /* 0xb7 */  IEMOP_X4(iemOp_movzx_Gv_Ew),
+    /* 0xb8 */  iemOp_jmpe,                 iemOp_InvalidNeedRM,        iemOp_popcnt_Gv_Ev,         iemOp_InvalidNeedRM,
+    /* 0xb9 */  IEMOP_X4(iemOp_Grp10),
+    /* 0xba */  IEMOP_X4(iemOp_Grp8),
+    /* 0xbb */  IEMOP_X4(iemOp_btc_Ev_Gv), // 0xf3?
+    /* 0xbc */  iemOp_bsf_Gv_Ev,            iemOp_bsf_Gv_Ev,            iemOp_tzcnt_Gv_Ev,          iemOp_bsf_Gv_Ev,
+    /* 0xbd */  iemOp_bsr_Gv_Ev,            iemOp_bsr_Gv_Ev,            iemOp_lzcnt_Gv_Ev,          iemOp_bsr_Gv_Ev,
+    /* 0xbe */  IEMOP_X4(iemOp_movsx_Gv_Eb),
+    /* 0xbf */  IEMOP_X4(iemOp_movsx_Gv_Ew),
+
+    /* 0xc0 */  IEMOP_X4(iemOp_xadd_Eb_Gb),
+    /* 0xc1 */  IEMOP_X4(iemOp_xadd_Ev_Gv),
+    /* 0xc2 */  iemOp_cmpps_Vps_Wps_Ib,     iemOp_cmppd_Vpd_Wpd_Ib,     iemOp_cmpss_Vss_Wss_Ib,     iemOp_cmpsd_Vsd_Wsd_Ib,
+    /* 0xc3 */  iemOp_movnti_My_Gy,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xc4 */  iemOp_pinsrw_Pq_RyMw_Ib,    iemOp_pinsrw_Vdq_RyMw_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0xc5 */  iemOp_pextrw_Gd_Nq_Ib,      iemOp_pextrw_Gd_Udq_Ib,     iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0xc6 */  iemOp_shufps_Vps_Wps_Ib,    iemOp_shufpd_Vpd_Wpd_Ib,    iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0xc7 */  IEMOP_X4(iemOp_Grp9),
+    /* 0xc8 */  IEMOP_X4(iemOp_bswap_rAX_r8),
+    /* 0xc9 */  IEMOP_X4(iemOp_bswap_rCX_r9),
+    /* 0xca */  IEMOP_X4(iemOp_bswap_rDX_r10),
+    /* 0xcb */  IEMOP_X4(iemOp_bswap_rBX_r11),
+    /* 0xcc */  IEMOP_X4(iemOp_bswap_rSP_r12),
+    /* 0xcd */  IEMOP_X4(iemOp_bswap_rBP_r13),
+    /* 0xce */  IEMOP_X4(iemOp_bswap_rSI_r14),
+    /* 0xcf */  IEMOP_X4(iemOp_bswap_rDI_r15),
+
+    /* 0xd0 */  iemOp_InvalidNeedRM,        iemOp_addsubpd_Vpd_Wpd,     iemOp_InvalidNeedRM,        iemOp_addsubps_Vps_Wps,
+    /* 0xd1 */  iemOp_psrlw_Pq_Qq,          iemOp_psrlw_Vx_W,           iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd2 */  iemOp_psrld_Pq_Qq,          iemOp_psrld_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd3 */  iemOp_psrlq_Pq_Qq,          iemOp_psrlq_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd4 */  iemOp_paddq_Pq_Qq,          iemOp_paddq_Vx_W,           iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd5 */  iemOp_pmullw_Pq_Qq,         iemOp_pmullw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd6 */  iemOp_InvalidNeedRM,        iemOp_movq_Wq_Vq,           iemOp_movq2dq_Vdq_Nq,       iemOp_movdq2q_Pq_Uq,
+    /* 0xd7 */  iemOp_pmovmskb_Gd_Nq,       iemOp_pmovmskb_Gd_Ux,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd8 */  iemOp_psubusb_Pq_Qq,        iemOp_psubusb_Vx_W,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd9 */  iemOp_psubusw_Pq_Qq,        iemOp_psubusw_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xda */  iemOp_pminub_Pq_Qq,         iemOp_pminub_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdb */  iemOp_pand_Pq_Qq,           iemOp_pand_Vx_W,            iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdc */  iemOp_paddusb_Pq_Qq,        iemOp_paddusb_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdd */  iemOp_paddusw_Pq_Qq,        iemOp_paddusw_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xde */  iemOp_pmaxub_Pq_Qq,         iemOp_pmaxub_Vx_W,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdf */  iemOp_pandn_Pq_Qq,          iemOp_pandn_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0xe0 */  iemOp_pavgb_Pq_Qq,          iemOp_pavgb_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe1 */  iemOp_psraw_Pq_Qq,          iemOp_psraw_Vx_W,           iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe2 */  iemOp_psrad_Pq_Qq,          iemOp_psrad_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe3 */  iemOp_pavgw_Pq_Qq,          iemOp_pavgw_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe4 */  iemOp_pmulhuw_Pq_Qq,        iemOp_pmulhuw_Vx_W,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe5 */  iemOp_pmulhw_Pq_Qq,         iemOp_pmulhw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe6 */  iemOp_InvalidNeedRM,        iemOp_cvttpd2dq_Vx_Wpd,     iemOp_cvtdq2pd_Vx_Wpd,      iemOp_cvtpd2dq_Vx_Wpd,
+    /* 0xe7 */  iemOp_movntq_Mq_Pq,         iemOp_movntdq_Mx_Vx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe8 */  iemOp_psubsb_Pq_Qq,         iemOp_psubsb_Vx_W,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe9 */  iemOp_psubsw_Pq_Qq,         iemOp_psubsw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xea */  iemOp_pminsw_Pq_Qq,         iemOp_pminsw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xeb */  iemOp_por_Pq_Qq,            iemOp_por_Vx_W,             iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xec */  iemOp_paddsb_Pq_Qq,         iemOp_paddsb_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xed */  iemOp_paddsw_Pq_Qq,         iemOp_paddsw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xee */  iemOp_pmaxsw_Pq_Qq,         iemOp_pmaxsw_Vx_W,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xef */  iemOp_pxor_Pq_Qq,           iemOp_pxor_Vx_Wx,           iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0xf0 */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_lddqu_Vx_Mx,
+    /* 0xf1 */  iemOp_psllw_Pq_Qq,          iemOp_psllw_Vx_W,           iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf2 */  iemOp_pslld_Pq_Qq,          iemOp_pslld_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf3 */  iemOp_psllq_Pq_Qq,          iemOp_psllq_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf4 */  iemOp_pmuludq_Pq_Qq,        iemOp_pmuludq_Vx_W,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf5 */  iemOp_pmaddwd_Pq_Qq,        iemOp_pmaddwd_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf6 */  iemOp_psadbw_Pq_Qq,         iemOp_psadbw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf7 */  iemOp_maskmovq_Pq_Nq,       iemOp_maskmovdqu_Vdq_Udq,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf8 */  iemOp_psubb_Pq_Qq,          iemOp_psubb_Vx_W,           iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf9 */  iemOp_psubw_Pq_Qq,          iemOp_psubw_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfa */  iemOp_psubd_Pq_Qq,          iemOp_psubd_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfb */  iemOp_psubq_Pq_Qq,          iemOp_psubq_Vx_W,           iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfc */  iemOp_paddb_Pq_Qq,          iemOp_paddb_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfd */  iemOp_paddw_Pq_Qq,          iemOp_paddw_Vx_Wx,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfe */  iemOp_paddd_Pq_Qq,          iemOp_paddd_Vx_W,           iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xff */  IEMOP_X4(iemOp_ud0),
+};
+AssertCompile(RT_ELEMENTS(g_apfnTwoByteMap) == 1024);
+
+/** @} */
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap1.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap1.cpp.h
new file mode 100644
index 0000000..4bd8e2b
--- /dev/null
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap1.cpp.h
@@ -0,0 +1,3065 @@
+/* $Id: IEMAllInstructionsVexMap1.cpp.h $ */
+/** @file
+ * IEM - Instruction Decoding and Emulation.
+ *
+ * @remarks IEMAllInstructionsTwoByte0f.cpp.h is a legacy mirror of this file.
+ *          Any update here is likely needed in that file too.
+ */
+
+/*
+ * Copyright (C) 2011-2016 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @name VEX Opcode Map 1
+ * @{
+ */
+
+
+/*  Opcode VEX.0F 0x00 - invalid */
+/*  Opcode VEX.0F 0x01 - invalid */
+/*  Opcode VEX.0F 0x02 - invalid */
+/*  Opcode VEX.0F 0x03 - invalid */
+/*  Opcode VEX.0F 0x04 - invalid */
+/*  Opcode VEX.0F 0x05 - invalid */
+/*  Opcode VEX.0F 0x06 - invalid */
+/*  Opcode VEX.0F 0x07 - invalid */
+/*  Opcode VEX.0F 0x08 - invalid */
+/*  Opcode VEX.0F 0x09 - invalid */
+/*  Opcode VEX.0F 0x0a - invalid */
+
+/** Opcode VEX.0F 0x0b. */
+FNIEMOP_DEF(iemOp_vud2)
+{
+    IEMOP_MNEMONIC(vud2, "vud2");
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+/*  Opcode VEX.0F 0x0c - invalid */
+/*  Opcode VEX.0F 0x0d - invalid */
+/*  Opcode VEX.0F 0x0e - invalid */
+/*  Opcode VEX.0F 0x0f - invalid */
+
+
+/** Opcode VEX.0F 0x10 - vmovups Vps, Wps */
+FNIEMOP_STUB(iemOp_vmovups_Vps_Wps);
+/** Opcode VEX.66.0F 0x10 - vmovupd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_vmovupd_Vpd_Wpd);
+
+
+/** Opcode VEX 0xf3 0x0f 0x10 - vmovsd Vx, Hx, Wsd */
+/**
+ * @ opcode      0x10
+ * @ oppfx       0xf3
+ * @ opcpuid     sse
+ * @ opgroup     og_sse_simdfp_datamove
+ * @ opxcpttype  5
+ * @ optest      op1=1 op2=2 -> op1=2
+ * @ optest      op1=0 op2=-22 -> op1=-22
+ * @ oponly
+ */
+FNIEMOP_STUB(iemOp_vmovss_Vx_Hx_Wss);
+//FNIEMOP_DEF(iemOp_movss_Vss_Wss)
+//{
+//    I E M O P _ M N E M O N I C 2(RM, VMOVSS, vmovss, VssZxReg, Wss, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 1);
+//        IEM_MC_LOCAL(uint32_t,                  uSrc);
+//
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_FETCH_XREG_U32(uSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_STORE_XREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(uint32_t,                  uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//
+//        IEM_MC_FETCH_MEM_U32(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.F2.0F 0x10 - vmovsd Vx, Hx, Wsd */
+FNIEMOP_STUB(iemOp_vmovsd_Vx_Hx_Wsd);
+
+
+/**
+ * @ opcode      0x11
+ * @ oppfx       none
+ * @ opcpuid     sse
+ * @ opgroup     og_sse_simdfp_datamove
+ * @ opxcpttype  4UA
+ * @ optest      op1=1 op2=2 -> op1=2
+ * @ optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_STUB(iemOp_vmovups_Wps_Vps);
+//FNIEMOP_DEF(iemOp_vmovups_Wps_Vps)
+//{
+//    IEMOP_MNEMONIC2(MR, VMOVUPS, vmovups, Wps, Vps, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+//                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/**
+ * @ opcode      0x11
+ * @ oppfx       0x66
+ * @ opcpuid     sse2
+ * @ opgroup     og_sse2_pcksclr_datamove
+ * @ opxcpttype  4UA
+ * @ optest      op1=1 op2=2 -> op1=2
+ * @ optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_STUB(iemOp_vmovupd_Wpd_Vpd);
+//FNIEMOP_DEF(iemOp_vmovupd_Wpd_Vpd)
+//{
+//    IEMOP_MNEMONIC2(MR, VMOVUPD, vmovupd, Wpd, Vpd, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+//                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/**
+ * @ opcode      0x11
+ * @ oppfx       0xf3
+ * @ opcpuid     sse
+ * @ opgroup     og_sse_simdfp_datamove
+ * @ opxcpttype  5
+ * @ optest      op1=1 op2=2 -> op1=2
+ * @ optest      op1=0 op2=-22 -> op1=-22
+ */
+FNIEMOP_STUB(iemOp_vmovss_Wss_Hx_Vss);
+//FNIEMOP_DEF(iemOp_vmovss_Wss_Hx_Vss)
+//{
+//    IEMOP_MNEMONIC2(MR, VMOVSS, vmovss, Wss, Vss, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 1);
+//        IEM_MC_LOCAL(uint32_t,                  uSrc);
+//
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_FETCH_XREG_U32(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_XREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(uint32_t,                  uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U32(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/**
+ * @ opcode      0x11
+ * @ oppfx       0xf2
+ * @ opcpuid     sse2
+ * @ opgroup     og_sse2_pcksclr_datamove
+ * @ opxcpttype  5
+ * @ optest      op1=1 op2=2 -> op1=2
+ * @ optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_STUB(iemOp_vmovsd_Wsd_Hx_Vsd);
+//FNIEMOP_DEF(iemOp_vmovsd_Wsd_Hx_Vsd)
+//{
+//    IEMOP_MNEMONIC2(MR, VMOVSD, vmovsd, Wsd, Vsd, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 1);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_XREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+FNIEMOP_STUB(iemOp_vmovlps_Vq_Hq_Mq__vmovhlps);
+//FNIEMOP_DEF(iemOp_vmovlps_Vq_Hq_Mq__vmovhlps)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /**
+//         * @ opcode      0x12
+//         * @ opcodesub   11 mr/reg
+//         * @ oppfx       none
+//         * @ opcpuid     sse
+//         * @ opgroup     og_sse_simdfp_datamove
+//         * @ opxcpttype  5
+//         * @ optest      op1=1 op2=2 -> op1=2
+//         * @ optest      op1=0 op2=-42 -> op1=-42
+//         */
+//        IEMOP_MNEMONIC2(RM_REG, VMOVHLPS, vmovhlps, Vq, UqHi, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 1);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_FETCH_XREG_HI_U64(uSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_STORE_XREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /**
+//         * @ opdone
+//         * @ opcode      0x12
+//         * @ opcodesub   !11 mr/reg
+//         * @ oppfx       none
+//         * @ opcpuid     sse
+//         * @ opgroup     og_sse_simdfp_datamove
+//         * @ opxcpttype  5
+//         * @ optest      op1=1 op2=2 -> op1=2
+//         * @ optest      op1=0 op2=-42 -> op1=-42
+//         * @ opfunction  iemOp_vmovlps_Vq_Hq_Mq__vmovhlps
+//         */
+//        IEMOP_MNEMONIC2(RM_MEM, VMOVLPS, vmovlps, Vq, Mq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//
+//        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_STORE_XREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/**
+ * @ opcode      0x12
+ * @ opcodesub   !11 mr/reg
+ * @ oppfx       0x66
+ * @ opcpuid     sse2
+ * @ opgroup     og_sse2_pcksclr_datamove
+ * @ opxcpttype  5
+ * @ optest      op1=1 op2=2 -> op1=2
+ * @ optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_STUB(iemOp_vmovlpd_Vq_Hq_Mq);
+//FNIEMOP_DEF(iemOp_vmovlpd_Vq_Hq_Mq)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        IEMOP_MNEMONIC2(RM_MEM, VMOVLPD, vmovlpd, Vq, Mq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//
+//        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_STORE_XREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//        return VINF_SUCCESS;
+//    }
+//
+//    /**
+//     * @ opdone
+//     * @ opmnemonic  ud660f12m3
+//     * @ opcode      0x12
+//     * @ opcodesub   11 mr/reg
+//     * @ oppfx       0x66
+//     * @ opunused    immediate
+//     * @ opcpuid     sse
+//     * @ optest      ->
+//     */
+//    return IEMOP_RAISE_INVALID_OPCODE();
+//}
+
+
+/**
+ * @ opcode      0x12
+ * @ oppfx       0xf3
+ * @ opcpuid     sse3
+ * @ opgroup     og_sse3_pcksclr_datamove
+ * @ opxcpttype  4
+ * @ optest      op1=-1 op2=0xdddddddd00000002eeeeeeee00000001 ->
+ *               op1=0x00000002000000020000000100000001
+ */
+FNIEMOP_STUB(iemOp_vmovsldup_Vx_Wx);
+//FNIEMOP_DEF(iemOp_vmovsldup_Vx_Wx)
+//{
+//    IEMOP_MNEMONIC2(RM, VMOVSLDUP, vmovsldup, Vdq, Wdq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(2, 0);
+//        IEM_MC_ARG(PRTUINT128U,                 puDst, 0);
+//        IEM_MC_ARG(PCRTUINT128U,                puSrc, 1);
+//
+//        IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//
+//        IEM_MC_REF_XREG_U128_CONST(puSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_REF_XREG_U128(puDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_movsldup, puDst, puSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(2, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//        IEM_MC_ARG(PRTUINT128U,                 puDst, 0);
+//        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      puSrc, uSrc, 1);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_REF_XREG_U128(puDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_movsldup, puDst, puSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/**
+ * @ opcode      0x12
+ * @ oppfx       0xf2
+ * @ opcpuid     sse3
+ * @ opgroup     og_sse3_pcksclr_datamove
+ * @ opxcpttype  5
+ * @ optest      op1=-1 op2=0xddddddddeeeeeeee2222222211111111 ->
+ *               op1=0x22222222111111112222222211111111
+ */
+FNIEMOP_STUB(iemOp_vmovddup_Vx_Wx);
+//FNIEMOP_DEF(iemOp_vmovddup_Vx_Wx)
+//{
+//    IEMOP_MNEMONIC2(RM, VMOVDDUP, vmovddup, Vdq, Wdq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(2, 0);
+//        IEM_MC_ARG(PRTUINT128U,                 puDst, 0);
+//        IEM_MC_ARG(uint64_t,                    uSrc, 1);
+//
+//        IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//
+//        IEM_MC_FETCH_XREG_U64(uSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_REF_XREG_U128(puDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_movddup, puDst, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(2, 2);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//        IEM_MC_ARG(PRTUINT128U,                 puDst, 0);
+//        IEM_MC_ARG(uint64_t,                    uSrc, 1);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//
+//        IEM_MC_FETCH_MEM_U64(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_REF_XREG_U128(puDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_movddup, puDst, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/** Opcode VEX.0F 0x13 - vmovlps Mq, Vq */
+FNIEMOP_STUB(iemOp_vmovlps_Mq_Vq);
+
+/** Opcode VEX.66.0F 0x13 - vmovlpd Mq, Vq */
+FNIEMOP_STUB(iemOp_vmovlpd_Mq_Vq);
+//FNIEMOP_DEF(iemOp_vmovlpd_Mq_Vq)
+//{
+//    IEMOP_MNEMONIC(vmovlpd_Mq_Vq, "movlpd Mq,Vq");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//#if 0
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 1);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_XREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//#else
+//        return IEMOP_RAISE_INVALID_OPCODE();
+//#endif
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/*  Opcode VEX.F3.0F 0x13 - invalid */
+/*  Opcode VEX.F2.0F 0x13 - invalid */
+
+/** Opcode VEX.0F 0x14 - vunpcklps Vx, Hx, Wx*/
+FNIEMOP_STUB(iemOp_vunpcklps_Vx_Hx_Wx);
+/** Opcode VEX.66.0F 0x14 - vunpcklpd Vx,Hx,Wx   */
+FNIEMOP_STUB(iemOp_vunpcklpd_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0x14 - invalid */
+/*  Opcode VEX.F2.0F 0x14 - invalid */
+/** Opcode VEX.0F 0x15 - vunpckhps Vx, Hx, Wx   */
+FNIEMOP_STUB(iemOp_vunpckhps_Vx_Hx_Wx);
+/** Opcode VEX.66.0F 0x15 - vunpckhpd Vx,Hx,Wx   */
+FNIEMOP_STUB(iemOp_vunpckhpd_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0x15 - invalid */
+/*  Opcode VEX.F2.0F 0x15 - invalid */
+/** Opcode VEX.0F 0x16 - vmovhpsv1 Vdq, Hq, Mq vmovlhps Vdq, Hq, Uq   */
+FNIEMOP_STUB(iemOp_vmovhpsv1_Vdq_Hq_Mq__vmovlhps_Vdq_Hq_Uq);  //NEXT
+/** Opcode VEX.66.0F 0x16 - vmovhpdv1 Vdq, Hq, Mq   */
+FNIEMOP_STUB(iemOp_vmovhpdv1_Vdq_Hq_Mq);  //NEXT
+/** Opcode VEX.F3.0F 0x16 - vmovshdup Vx, Wx   */
+FNIEMOP_STUB(iemOp_vmovshdup_Vx_Wx); //NEXT
+/*  Opcode VEX.F2.0F 0x16 - invalid */
+/** Opcode VEX.0F 0x17 - vmovhpsv1 Mq, Vq   */
+FNIEMOP_STUB(iemOp_vmovhpsv1_Mq_Vq);  //NEXT
+/** Opcode VEX.66.0F 0x17 - vmovhpdv1 Mq, Vq   */
+FNIEMOP_STUB(iemOp_vmovhpdv1_Mq_Vq);  //NEXT
+/*  Opcode VEX.F3.0F 0x17 - invalid */
+/*  Opcode VEX.F2.0F 0x17 - invalid */
+
+
+/*  Opcode VEX.0F 0x18 - invalid */
+/*  Opcode VEX.0F 0x19 - invalid */
+/*  Opcode VEX.0F 0x1a - invalid */
+/*  Opcode VEX.0F 0x1b - invalid */
+/*  Opcode VEX.0F 0x1c - invalid */
+/*  Opcode VEX.0F 0x1d - invalid */
+/*  Opcode VEX.0F 0x1e - invalid */
+/*  Opcode VEX.0F 0x1f - invalid */
+
+/*  Opcode VEX.0F 0x20 - invalid */
+/*  Opcode VEX.0F 0x21 - invalid */
+/*  Opcode VEX.0F 0x22 - invalid */
+/*  Opcode VEX.0F 0x23 - invalid */
+/*  Opcode VEX.0F 0x24 - invalid */
+/*  Opcode VEX.0F 0x25 - invalid */
+/*  Opcode VEX.0F 0x26 - invalid */
+/*  Opcode VEX.0F 0x27 - invalid */
+
+/** Opcode VEX.0F 0x28 - vmovaps Vps, Wps */
+FNIEMOP_STUB(iemOp_vmovaps_Vps_Wps);
+//FNIEMOP_DEF(iemOp_vmovaps_Vps_Wps)
+//{
+//    IEMOP_MNEMONIC(vmovaps_Vps_Wps, "vmovaps Vps,Wps");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
+//                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.66.0F 0x28 - vmovapd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_vmovapd_Vpd_Wpd);
+//FNIEMOP_DEF(iemOp_vmovapd_Vpd_Wpd)
+//{
+//    IEMOP_MNEMONIC(vmovapd_Wpd_Wpd, "vmovapd Wpd,Wpd");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
+//                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/*  Opcode VEX.F3.0F 0x28 - invalid */
+/*  Opcode VEX.F2.0F 0x28 - invalid */
+
+/** Opcode VEX.0F 0x29 - vmovaps Wps, Vps */
+FNIEMOP_STUB(iemOp_vmovaps_Wps_Vps);
+//FNIEMOP_DEF(iemOp_vmovaps_Wps_Vps)
+//{
+//    IEMOP_MNEMONIC(vmovaps_Wps_Vps, "vmovaps Wps,Vps");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+//                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.66.0F 0x29 - vmovapd Wpd,Vpd */
+FNIEMOP_STUB(iemOp_vmovapd_Wpd_Vpd);
+//FNIEMOP_DEF(iemOp_vmovapd_Wpd_Vpd)
+//{
+//    IEMOP_MNEMONIC(vmovapd_Wpd_Vpd, "movapd Wpd,Vpd");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+//                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/*  Opcode VEX.F3.0F 0x29 - invalid */
+/*  Opcode VEX.F2.0F 0x29 - invalid */
+
+
+/** Opcode VEX.0F 0x2a - invalid */
+/** Opcode VEX.66.0F 0x2a - invalid */
+/** Opcode VEX.F3.0F 0x2a - vcvtsi2ss Vss, Hss, Ey */
+FNIEMOP_STUB(iemOp_vcvtsi2ss_Vss_Hss_Ey);
+/** Opcode VEX.F2.0F 0x2a - vcvtsi2sd Vsd, Hsd, Ey */
+FNIEMOP_STUB(iemOp_vcvtsi2sd_Vsd_Hsd_Ey);
+
+
+/** Opcode VEX.0F 0x2b - vmovntps Mps, Vps */
+FNIEMOP_STUB(iemOp_vmovntps_Mps_Vps);
+//FNIEMOP_DEF(iemOp_vmovntps_Mps_Vps)
+//{
+//    IEMOP_MNEMONIC(vmovntps_Mps_Vps, "movntps Mps,Vps");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//
+//        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    /* The register, register encoding is invalid. */
+//    else
+//        return IEMOP_RAISE_INVALID_OPCODE();
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.66.0F 0x2b - vmovntpd Mpd, Vpd */
+FNIEMOP_STUB(iemOp_vmovntpd_Mpd_Vpd);
+//FNIEMOP_DEF(iemOp_vmovntpd_Mpd_Vpd)
+//{
+//    IEMOP_MNEMONIC(vmovntpd_Mpd_Vpd, "movntpd Mdq,Vpd");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc); /** @todo optimize this one day... */
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//
+//        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    /* The register, register encoding is invalid. */
+//    else
+//        return IEMOP_RAISE_INVALID_OPCODE();
+//    return VINF_SUCCESS;
+//}
+/*  Opcode VEX.F3.0F 0x2b - invalid */
+/*  Opcode VEX.F2.0F 0x2b - invalid */
+
+
+/*  Opcode VEX.0F 0x2c - invalid */
+/*  Opcode VEX.66.0F 0x2c - invalid */
+/** Opcode VEX.F3.0F 0x2c - vcvttss2si Gy, Wss */
+FNIEMOP_STUB(iemOp_vcvttss2si_Gy_Wss);
+/** Opcode VEX.F2.0F 0x2c - vcvttsd2si Gy, Wsd */
+FNIEMOP_STUB(iemOp_vcvttsd2si_Gy_Wsd);
+
+/*  Opcode VEX.0F 0x2d - invalid */
+/*  Opcode VEX.66.0F 0x2d - invalid */
+/** Opcode VEX.F3.0F 0x2d - vcvtss2si Gy, Wss */
+FNIEMOP_STUB(iemOp_vcvtss2si_Gy_Wss);
+/** Opcode VEX.F2.0F 0x2d - vcvtsd2si Gy, Wsd */
+FNIEMOP_STUB(iemOp_vcvtsd2si_Gy_Wsd);
+
+/** Opcode VEX.0F 0x2e - vucomiss Vss, Wss */
+FNIEMOP_STUB(iemOp_vucomiss_Vss_Wss);
+/** Opcode VEX.66.0F 0x2e - vucomisd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_vucomisd_Vsd_Wsd);
+/*  Opcode VEX.F3.0F 0x2e - invalid */
+/*  Opcode VEX.F2.0F 0x2e - invalid */
+
+/** Opcode VEX.0F 0x2f - vcomiss Vss, Wss */
+FNIEMOP_STUB(iemOp_vcomiss_Vss_Wss);
+/** Opcode VEX.66.0F 0x2f - vcomisd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_vcomisd_Vsd_Wsd);
+/*  Opcode VEX.F3.0F 0x2f - invalid */
+/*  Opcode VEX.F2.0F 0x2f - invalid */
+
+/*  Opcode VEX.0F 0x30 - invalid */
+/*  Opcode VEX.0F 0x31 - invalid */
+/*  Opcode VEX.0F 0x32 - invalid */
+/*  Opcode VEX.0F 0x33 - invalid */
+/*  Opcode VEX.0F 0x34 - invalid */
+/*  Opcode VEX.0F 0x35 - invalid */
+/*  Opcode VEX.0F 0x36 - invalid */
+/*  Opcode VEX.0F 0x37 - invalid */
+/*  Opcode VEX.0F 0x38 - invalid */
+/*  Opcode VEX.0F 0x39 - invalid */
+/*  Opcode VEX.0F 0x3a - invalid */
+/*  Opcode VEX.0F 0x3b - invalid */
+/*  Opcode VEX.0F 0x3c - invalid */
+/*  Opcode VEX.0F 0x3d - invalid */
+/*  Opcode VEX.0F 0x3e - invalid */
+/*  Opcode VEX.0F 0x3f - invalid */
+/*  Opcode VEX.0F 0x40 - invalid */
+/*  Opcode VEX.0F 0x41 - invalid */
+/*  Opcode VEX.0F 0x42 - invalid */
+/*  Opcode VEX.0F 0x43 - invalid */
+/*  Opcode VEX.0F 0x44 - invalid */
+/*  Opcode VEX.0F 0x45 - invalid */
+/*  Opcode VEX.0F 0x46 - invalid */
+/*  Opcode VEX.0F 0x47 - invalid */
+/*  Opcode VEX.0F 0x48 - invalid */
+/*  Opcode VEX.0F 0x49 - invalid */
+/*  Opcode VEX.0F 0x4a - invalid */
+/*  Opcode VEX.0F 0x4b - invalid */
+/*  Opcode VEX.0F 0x4c - invalid */
+/*  Opcode VEX.0F 0x4d - invalid */
+/*  Opcode VEX.0F 0x4e - invalid */
+/*  Opcode VEX.0F 0x4f - invalid */
+
+/** Opcode VEX.0F 0x50 - vmovmskps Gy, Ups */
+FNIEMOP_STUB(iemOp_vmovmskps_Gy_Ups);
+/** Opcode VEX.66.0F 0x50 - vmovmskpd Gy,Upd */
+FNIEMOP_STUB(iemOp_vmovmskpd_Gy_Upd);
+/*  Opcode VEX.F3.0F 0x50 - invalid */
+/*  Opcode VEX.F2.0F 0x50 - invalid */
+
+/** Opcode VEX.0F 0x51 - vsqrtps Vps, Wps */
+FNIEMOP_STUB(iemOp_vsqrtps_Vps_Wps);
+/** Opcode VEX.66.0F 0x51 - vsqrtpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_vsqrtpd_Vpd_Wpd);
+/** Opcode VEX.F3.0F 0x51 - vsqrtss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vsqrtss_Vss_Hss_Wss);
+/** Opcode VEX.F2.0F 0x51 - vsqrtsd Vsd, Hsd, Wsd */
+FNIEMOP_STUB(iemOp_vsqrtsd_Vsd_Hsd_Wsd);
+
+/** Opcode VEX.0F 0x52 - vrsqrtps Vps, Wps */
+FNIEMOP_STUB(iemOp_vrsqrtps_Vps_Wps);
+/*  Opcode VEX.66.0F 0x52 - invalid */
+/** Opcode VEX.F3.0F 0x52 - vrsqrtss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vrsqrtss_Vss_Hss_Wss);
+/*  Opcode VEX.F2.0F 0x52 - invalid */
+
+/** Opcode VEX.0F 0x53 - vrcpps Vps, Wps */
+FNIEMOP_STUB(iemOp_vrcpps_Vps_Wps);
+/*  Opcode VEX.66.0F 0x53 - invalid */
+/** Opcode VEX.F3.0F 0x53 - vrcpss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vrcpss_Vss_Hss_Wss);
+/*  Opcode VEX.F2.0F 0x53 - invalid */
+
+/** Opcode VEX.0F 0x54 - vandps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vandps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x54 - vandpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vandpd_Vpd_Hpd_Wpd);
+/*  Opcode VEX.F3.0F 0x54 - invalid */
+/*  Opcode VEX.F2.0F 0x54 - invalid */
+
+/** Opcode VEX.0F 0x55 - vandnps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vandnps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x55 - vandnpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vandnpd_Vpd_Hpd_Wpd);
+/*  Opcode VEX.F3.0F 0x55 - invalid */
+/*  Opcode VEX.F2.0F 0x55 - invalid */
+
+/** Opcode VEX.0F 0x56 - vorps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vorps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x56 - vorpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vorpd_Vpd_Hpd_Wpd);
+/*  Opcode VEX.F3.0F 0x56 - invalid */
+/*  Opcode VEX.F2.0F 0x56 - invalid */
+
+/** Opcode VEX.0F 0x57 - vxorps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vxorps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x57 - vxorpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vxorpd_Vpd_Hpd_Wpd);
+/*  Opcode VEX.F3.0F 0x57 - invalid */
+/*  Opcode VEX.F2.0F 0x57 - invalid */
+
+/** Opcode VEX.0F 0x58 - vaddps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vaddps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x58 - vaddpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vaddpd_Vpd_Hpd_Wpd);
+/** Opcode VEX.F3.0F 0x58 - vaddss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vaddss_Vss_Hss_Wss);
+/** Opcode VEX.F2.0F 0x58 - vaddsd Vsd, Hsd, Wsd */
+FNIEMOP_STUB(iemOp_vaddsd_Vsd_Hsd_Wsd);
+
+/** Opcode VEX.0F 0x59 - vmulps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vmulps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x59 - vmulpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vmulpd_Vpd_Hpd_Wpd);
+/** Opcode VEX.F3.0F 0x59 - vmulss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vmulss_Vss_Hss_Wss);
+/** Opcode VEX.F2.0F 0x59 - vmulsd Vsd, Hsd, Wsd */
+FNIEMOP_STUB(iemOp_vmulsd_Vsd_Hsd_Wsd);
+
+/** Opcode VEX.0F 0x5a - vcvtps2pd Vpd, Wps */
+FNIEMOP_STUB(iemOp_vcvtps2pd_Vpd_Wps);
+/** Opcode VEX.66.0F 0x5a - vcvtpd2ps Vps, Wpd */
+FNIEMOP_STUB(iemOp_vcvtpd2ps_Vps_Wpd);
+/** Opcode VEX.F3.0F 0x5a - vcvtss2sd Vsd, Hx, Wss */
+FNIEMOP_STUB(iemOp_vcvtss2sd_Vsd_Hx_Wss);
+/** Opcode VEX.F2.0F 0x5a - vcvtsd2ss Vss, Hx, Wsd */
+FNIEMOP_STUB(iemOp_vcvtsd2ss_Vss_Hx_Wsd);
+
+/** Opcode VEX.0F 0x5b - vcvtdq2ps Vps, Wdq */
+FNIEMOP_STUB(iemOp_vcvtdq2ps_Vps_Wdq);
+/** Opcode VEX.66.0F 0x5b - vcvtps2dq Vdq, Wps */
+FNIEMOP_STUB(iemOp_vcvtps2dq_Vdq_Wps);
+/** Opcode VEX.F3.0F 0x5b - vcvttps2dq Vdq, Wps */
+FNIEMOP_STUB(iemOp_vcvttps2dq_Vdq_Wps);
+/*  Opcode VEX.F2.0F 0x5b - invalid */
+
+/** Opcode VEX.0F 0x5c - vsubps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vsubps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x5c - vsubpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vsubpd_Vpd_Hpd_Wpd);
+/** Opcode VEX.F3.0F 0x5c - vsubss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vsubss_Vss_Hss_Wss);
+/** Opcode VEX.F2.0F 0x5c - vsubsd Vsd, Hsd, Wsd */
+FNIEMOP_STUB(iemOp_vsubsd_Vsd_Hsd_Wsd);
+
+/** Opcode VEX.0F 0x5d - vminps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vminps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x5d - vminpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vminpd_Vpd_Hpd_Wpd);
+/** Opcode VEX.F3.0F 0x5d - vminss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vminss_Vss_Hss_Wss);
+/** Opcode VEX.F2.0F 0x5d - vminsd Vsd, Hsd, Wsd */
+FNIEMOP_STUB(iemOp_vminsd_Vsd_Hsd_Wsd);
+
+/** Opcode VEX.0F 0x5e - vdivps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vdivps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x5e - vdivpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vdivpd_Vpd_Hpd_Wpd);
+/** Opcode VEX.F3.0F 0x5e - vdivss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vdivss_Vss_Hss_Wss);
+/** Opcode VEX.F2.0F 0x5e - vdivsd Vsd, Hsd, Wsd */
+FNIEMOP_STUB(iemOp_vdivsd_Vsd_Hsd_Wsd);
+
+/** Opcode VEX.0F 0x5f - vmaxps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vmaxps_Vps_Hps_Wps);
+/** Opcode VEX.66.0F 0x5f - vmaxpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vmaxpd_Vpd_Hpd_Wpd);
+/** Opcode VEX.F3.0F 0x5f - vmaxss Vss, Hss, Wss */
+FNIEMOP_STUB(iemOp_vmaxss_Vss_Hss_Wss);
+/** Opcode VEX.F2.0F 0x5f - vmaxsd Vsd, Hsd, Wsd */
+FNIEMOP_STUB(iemOp_vmaxsd_Vsd_Hsd_Wsd);
+
+
+///**
+// * Common worker for SSE2 instructions on the forms:
+// *      pxxxx xmm1, xmm2/mem128
+// *
+// * The 2nd operand is the first half of a register, which in the memory case
+// * means a 32-bit memory access for MMX and 128-bit aligned 64-bit or 128-bit
+// * memory accessed for MMX.
+// *
+// * Exceptions type 4.
+// */
+//FNIEMOP_DEF_1(iemOpCommonSse_LowLow_To_Full, PCIEMOPMEDIAF1L1, pImpl)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if (!pImpl->pfnU64)
+//        return IEMOP_RAISE_INVALID_OPCODE();
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        /** @todo testcase: REX.B / REX.R and MMX register indexing. Ignored? */
+//        /** @todo testcase: REX.B / REX.R and segment register indexing. Ignored? */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(2, 0);
+//        IEM_MC_ARG(uint64_t *,          pDst, 0);
+//        IEM_MC_ARG(uint32_t const *,    pSrc, 1);
+//        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+//        IEM_MC_PREPARE_FPU_USAGE();
+//        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+//        IEM_MC_REF_MREG_U32_CONST(pSrc, bRm & X86_MODRM_RM_MASK);
+//        IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(2, 2);
+//        IEM_MC_ARG(uint64_t *,                  pDst,       0);
+//        IEM_MC_LOCAL(uint32_t,                  uSrc);
+//        IEM_MC_ARG_LOCAL_REF(uint32_t const *,  pSrc, uSrc, 1);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT();
+//        IEM_MC_FETCH_MEM_U32(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//
+//        IEM_MC_PREPARE_FPU_USAGE();
+//        IEM_MC_REF_MREG_U64(pDst, (bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+//        IEM_MC_CALL_MMX_AIMPL_2(pImpl->pfnU64, pDst, pSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/*  Opcode VEX.0F 0x60 - invalid */
+
+/** Opcode VEX.66.0F 0x60 - vpunpcklbw Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpunpcklbw_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpunpcklbw_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpunpcklbw, "vpunpcklbw Vx, Hx, Wx");
+//    return FNIEMOP_CALL_1(iemOpCommonSse_LowLow_To_Full, &g_iemAImpl_punpcklbw);
+//}
+
+/*  Opcode VEX.F3.0F 0x60 - invalid */
+
+
+/*  Opcode VEX.0F 0x61 - invalid */
+
+/** Opcode VEX.66.0F 0x61 - vpunpcklwd Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpunpcklwd_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpunpcklwd_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpunpcklwd, "vpunpcklwd Vx, Hx, Wx");
+//    return FNIEMOP_CALL_1(iemOpCommonSse_LowLow_To_Full, &g_iemAImpl_punpcklwd);
+//}
+
+/*  Opcode VEX.F3.0F 0x61 - invalid */
+
+
+/*  Opcode VEX.0F 0x62 - invalid */
+
+/** Opcode VEX.66.0F 0x62 - vpunpckldq Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpunpckldq_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpunpckldq_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpunpckldq, "vpunpckldq Vx, Hx, Wx");
+//    return FNIEMOP_CALL_1(iemOpCommonSse_LowLow_To_Full, &g_iemAImpl_punpckldq);
+//}
+
+/*  Opcode VEX.F3.0F 0x62 - invalid */
+
+
+
+/*  Opcode VEX.0F 0x63 - invalid */
+/** Opcode VEX.66.0F 0x63 - vpacksswb Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpacksswb_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0x63 - invalid */
+
+/*  Opcode VEX.0F 0x64 - invalid */
+/** Opcode VEX.66.0F 0x64 - vpcmpgtb Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpcmpgtb_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0x64 - invalid */
+
+/*  Opcode VEX.0F 0x65 - invalid */
+/** Opcode VEX.66.0F 0x65 - vpcmpgtw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpcmpgtw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0x65 - invalid */
+
+/*  Opcode VEX.0F 0x66 - invalid */
+/** Opcode VEX.66.0F 0x66 - vpcmpgtd Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpcmpgtd_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0x66 - invalid */
+
+/*  Opcode VEX.0F 0x67 - invalid */
+/** Opcode VEX.66.0F 0x67 - vpackuswb Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpackuswb_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0x67 - invalid */
+
+
+///**
+// * Common worker for SSE2 instructions on the form:
+// *      pxxxx xmm1, xmm2/mem128
+// *
+// * The 2nd operand is the second half of a register, which in the memory case
+// * means a 64-bit memory access for MMX, and for SSE a 128-bit aligned access
+// * where it may read the full 128 bits or only the upper 64 bits.
+// *
+// * Exceptions type 4.
+// */
+//FNIEMOP_DEF_1(iemOpCommonSse_HighHigh_To_Full, PCIEMOPMEDIAF1H1, pImpl)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(2, 0);
+//        IEM_MC_ARG(PRTUINT128U,          pDst, 0);
+//        IEM_MC_ARG(PCRTUINT128U,         pSrc, 1);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(2, 2);
+//        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+//        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); /* Most CPUs probably only right high qword */
+//
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/*  Opcode VEX.0F 0x68 - invalid */
+
+/** Opcode VEX.66.0F 0x68 - vpunpckhbw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpunpckhbw_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpunpckhbw_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpunpckhbw, "vpunpckhbw Vx, Hx, Wx");
+//    return FNIEMOP_CALL_1(iemOpCommonSse_HighHigh_To_Full, &g_iemAImpl_punpckhbw);
+//}
+/*  Opcode VEX.F3.0F 0x68 - invalid */
+
+
+/*  Opcode VEX.0F 0x69 - invalid */
+
+/** Opcode VEX.66.0F 0x69 - vpunpckhwd Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpunpckhwd_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpunpckhwd_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpunpckhwd, "vpunpckhwd Vx, Hx, Wx");
+//    return FNIEMOP_CALL_1(iemOpCommonSse_HighHigh_To_Full, &g_iemAImpl_punpckhwd);
+//
+//}
+/*  Opcode VEX.F3.0F 0x69 - invalid */
+
+
+/*  Opcode VEX.0F 0x6a - invalid */
+
+/** Opcode VEX.66.0F 0x6a - vpunpckhdq Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpunpckhdq_Vx_Hx_W);
+//FNIEMOP_DEF(iemOp_vpunpckhdq_Vx_Hx_W)
+//{
+//    IEMOP_MNEMONIC(vpunpckhdq, "vpunpckhdq Vx, Hx, W");
+//    return FNIEMOP_CALL_1(iemOpCommonSse_HighHigh_To_Full, &g_iemAImpl_punpckhdq);
+//}
+/*  Opcode VEX.F3.0F 0x6a - invalid */
+
+
+/*  Opcode VEX.0F 0x6b - invalid */
+/** Opcode VEX.66.0F 0x6b - vpackssdw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpackssdw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0x6b - invalid */
+
+
+/*  Opcode VEX.0F 0x6c - invalid */
+
+/** Opcode VEX.66.0F 0x6c - vpunpcklqdq Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpunpcklqdq_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpunpcklqdq_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpunpcklqdq, "vpunpcklqdq Vx, Hx, Wx");
+//    return FNIEMOP_CALL_1(iemOpCommonSse_LowLow_To_Full, &g_iemAImpl_punpcklqdq);
+//}
+
+/*  Opcode VEX.F3.0F 0x6c - invalid */
+/*  Opcode VEX.F2.0F 0x6c - invalid */
+
+
+/*  Opcode VEX.0F 0x6d - invalid */
+
+/** Opcode VEX.66.0F 0x6d - vpunpckhqdq Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpunpckhqdq_Vx_Hx_W);
+//FNIEMOP_DEF(iemOp_vpunpckhqdq_Vx_Hx_W)
+//{
+//    IEMOP_MNEMONIC(punpckhqdq, "punpckhqdq");
+//    return FNIEMOP_CALL_1(iemOpCommonSse_HighHigh_To_Full, &g_iemAImpl_punpckhqdq);
+//}
+
+/*  Opcode VEX.F3.0F 0x6d - invalid */
+
+
+/*  Opcode VEX.0F 0x6e - invalid */
+
+/** Opcode VEX.66.0F 0x6e - vmovd/q Vy, Ey */
+FNIEMOP_STUB(iemOp_vmovd_q_Vy_Ey);
+//FNIEMOP_DEF(iemOp_vmovd_q_Vy_Ey)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+//        IEMOP_MNEMONIC(vmovdq_Wq_Eq, "vmovq Wq,Eq");
+//    else
+//        IEMOP_MNEMONIC(vmovdq_Wd_Ed, "vmovd Wd,Ed");
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /* XMM, greg*/
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 1);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+//        {
+//            IEM_MC_LOCAL(uint64_t, u64Tmp);
+//            IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//            IEM_MC_STORE_XREG_U64_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
+//        }
+//        else
+//        {
+//            IEM_MC_LOCAL(uint32_t, u32Tmp);
+//            IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//            IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
+//        }
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /* XMM, [mem] */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT(); /** @todo order */
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+//        {
+//            IEM_MC_LOCAL(uint64_t, u64Tmp);
+//            IEM_MC_FETCH_MEM_U64(u64Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//            IEM_MC_STORE_XREG_U64_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u64Tmp);
+//        }
+//        else
+//        {
+//            IEM_MC_LOCAL(uint32_t, u32Tmp);
+//            IEM_MC_FETCH_MEM_U32(u32Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//            IEM_MC_STORE_XREG_U32_ZX_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u32Tmp);
+//        }
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/*  Opcode VEX.F3.0F 0x6e - invalid */
+
+
+/*  Opcode VEX.0F 0x6f - invalid */
+
+/** Opcode VEX.66.0F 0x6f - vmovdqa Vx, Wx */
+FNIEMOP_STUB(iemOp_vmovdqa_Vx_Wx);
+//FNIEMOP_DEF(iemOp_vmovdqa_Vx_Wx)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    IEMOP_MNEMONIC(vmovdqa_Vdq_Wdq, "movdqa Vdq,Wdq");
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
+//                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U, u128Tmp);
+//        IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(u128Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u128Tmp);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.F3.0F 0x6f - vmovdqu Vx, Wx */
+FNIEMOP_STUB(iemOp_vmovdqu_Vx_Wx);
+//FNIEMOP_DEF(iemOp_vmovdqu_Vx_Wx)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    IEMOP_MNEMONIC(vmovdqu_Vdq_Wdq, "movdqu Vdq,Wdq");
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg,
+//                              (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U, u128Tmp);
+//        IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_FETCH_MEM_U128(u128Tmp, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_STORE_XREG_U128(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg, u128Tmp);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/*  Opcode VEX.0F 0x70 - invalid */
+
+/** Opcode VEX.66.0F 0x70 - vpshufd Vx, Wx, Ib */
+FNIEMOP_STUB(iemOp_vpshufd_Vx_Wx_Ib);
+//FNIEMOP_DEF(iemOp_vpshufd_Vx_Wx_Ib)
+//{
+//    IEMOP_MNEMONIC(vpshufd_Vx_Wx_Ib, "vpshufd Vx,Wx,Ib");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//
+//        IEM_MC_BEGIN(3, 0);
+//        IEM_MC_ARG(PRTUINT128U,         pDst, 0);
+//        IEM_MC_ARG(PCRTUINT128U,        pSrc, 1);
+//        IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshufd, pDst, pSrc, bEvilArg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(3, 2);
+//        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+//        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+//        IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshufd, pDst, pSrc, bEvilArg);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.F3.0F 0x70 - vpshufhw Vx, Wx, Ib */
+FNIEMOP_STUB(iemOp_vpshufhw_Vx_Wx_Ib);
+//FNIEMOP_DEF(iemOp_vpshufhw_Vx_Wx_Ib)
+//{
+//    IEMOP_MNEMONIC(vpshufhw_Vx_Wx_Ib, "vpshufhw Vx,Wx,Ib");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//
+//        IEM_MC_BEGIN(3, 0);
+//        IEM_MC_ARG(PRTUINT128U,         pDst, 0);
+//        IEM_MC_ARG(PCRTUINT128U,        pSrc, 1);
+//        IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshufhw, pDst, pSrc, bEvilArg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(3, 2);
+//        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+//        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+//        IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshufhw, pDst, pSrc, bEvilArg);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.F2.0F 0x70 - vpshuflw Vx, Wx, Ib */
+FNIEMOP_STUB(iemOp_vpshuflw_Vx_Wx_Ib);
+//FNIEMOP_DEF(iemOp_vpshuflw_Vx_Wx_Ib)
+//{
+//    IEMOP_MNEMONIC(vpshuflw_Vx_Wx_Ib, "vpshuflw Vx,Wx,Ib");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//
+//        IEM_MC_BEGIN(3, 0);
+//        IEM_MC_ARG(PRTUINT128U,         pDst, 0);
+//        IEM_MC_ARG(PCRTUINT128U,        pSrc, 1);
+//        IEM_MC_ARG_CONST(uint8_t,       bEvilArg, /*=*/ bEvil, 2);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshuflw, pDst, pSrc, bEvilArg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(3, 2);
+//        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+//        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        uint8_t bEvil; IEM_OPCODE_GET_NEXT_U8(&bEvil);
+//        IEM_MC_ARG_CONST(uint8_t,               bEvilArg, /*=*/ bEvil, 2);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_3(iemAImpl_pshuflw, pDst, pSrc, bEvilArg);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/*  Opcode VEX.0F 0x71 11/2 - invalid. */
+/** Opcode VEX.66.0F 0x71 11/2. */
+FNIEMOP_STUB_1(iemOp_VGrp12_vpsrlw_Hx_Ux_Ib, uint8_t, bRm);
+
+/*  Opcode VEX.0F 0x71 11/4 - invalid */
+/** Opcode VEX.66.0F 0x71 11/4. */
+FNIEMOP_STUB_1(iemOp_VGrp12_vpsraw_Hx_Ux_Ib, uint8_t, bRm);
+
+/*  Opcode VEX.0F 0x71 11/6 - invalid */
+/** Opcode VEX.66.0F 0x71 11/6. */
+FNIEMOP_STUB_1(iemOp_VGrp12_vpsllw_Hx_Ux_Ib, uint8_t, bRm);
+
+
+/**
+ * VEX Group 12 jump table for register variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnVexGroup12RegReg[] =
+{
+    /* /0 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /1 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /2 */ iemOp_InvalidWithRMNeedImm8,   iemOp_VGrp12_vpsrlw_Hx_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /3 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /4 */ iemOp_InvalidWithRMNeedImm8,   iemOp_VGrp12_vpsraw_Hx_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /5 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /6 */ iemOp_InvalidWithRMNeedImm8,   iemOp_VGrp12_vpsllw_Hx_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /7 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8)
+};
+AssertCompile(RT_ELEMENTS(g_apfnVexGroup12RegReg) == 8*4);
+
+
+/** Opcode VEX.0F 0x71. */
+FNIEMOP_DEF(iemOp_VGrp12)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(g_apfnVexGroup12RegReg[  ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                     + pVCpu->iem.s.idxPrefix], bRm);
+    return FNIEMOP_CALL_1(iemOp_InvalidWithRMNeedImm8, bRm);
+}
+
+
+/*  Opcode VEX.0F 0x72 11/2 - invalid. */
+/** Opcode VEX.66.0F 0x72 11/2. */
+FNIEMOP_STUB_1(iemOp_VGrp13_vpsrld_Hx_Ux_Ib, uint8_t, bRm);
+
+/*  Opcode VEX.0F 0x72 11/4 - invalid. */
+/** Opcode VEX.66.0F 0x72 11/4. */
+FNIEMOP_STUB_1(iemOp_VGrp13_vpsrad_Hx_Ux_Ib, uint8_t, bRm);
+
+/*  Opcode VEX.0F 0x72 11/6 - invalid. */
+/** Opcode VEX.66.0F 0x72 11/6. */
+FNIEMOP_STUB_1(iemOp_VGrp13_vpslld_Hx_Ux_Ib, uint8_t, bRm);
+
+
+/**
+ * Group 13 jump table for register variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnVexGroup13RegReg[] =
+{
+    /* /0 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /1 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /2 */ iemOp_InvalidWithRMNeedImm8,   iemOp_VGrp13_vpsrld_Hx_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /3 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /4 */ iemOp_InvalidWithRMNeedImm8,   iemOp_VGrp13_vpsrad_Hx_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /5 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /6 */ iemOp_InvalidWithRMNeedImm8,   iemOp_VGrp13_vpslld_Hx_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /7 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8)
+};
+AssertCompile(RT_ELEMENTS(g_apfnVexGroup13RegReg) == 8*4);
+
+/** Opcode VEX.0F 0x72. */
+FNIEMOP_DEF(iemOp_VGrp13)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(g_apfnVexGroup13RegReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                     + pVCpu->iem.s.idxPrefix], bRm);
+    return FNIEMOP_CALL_1(iemOp_InvalidWithRMNeedImm8, bRm);
+}
+
+
+/*  Opcode VEX.0F 0x73 11/2 - invalid. */
+/** Opcode VEX.66.0F 0x73 11/2. */
+FNIEMOP_STUB_1(iemOp_VGrp14_vpsrlq_Hx_Ux_Ib, uint8_t, bRm);
+
+/** Opcode VEX.66.0F 0x73 11/3. */
+FNIEMOP_STUB_1(iemOp_VGrp14_vpsrldq_Hx_Ux_Ib, uint8_t, bRm);
+
+/*  Opcode VEX.0F 0x73 11/6 - invalid. */
+/** Opcode VEX.66.0F 0x73 11/6. */
+FNIEMOP_STUB_1(iemOp_VGrp14_vpsllq_Hx_Ux_Ib, uint8_t, bRm);
+
+/** Opcode VEX.66.0F 0x73 11/7. */
+FNIEMOP_STUB_1(iemOp_VGrp14_vpslldq_Hx_Ux_Ib, uint8_t, bRm);
+
+/**
+ * Group 14 jump table for register variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnVexGroup14RegReg[] =
+{
+    /* /0 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /1 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /2 */ iemOp_InvalidWithRMNeedImm8, iemOp_VGrp14_vpsrlq_Hx_Ux_Ib,  iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /3 */ iemOp_InvalidWithRMNeedImm8, iemOp_VGrp14_vpsrldq_Hx_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /4 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /5 */ IEMOP_X4(iemOp_InvalidWithRMNeedImm8),
+    /* /6 */ iemOp_InvalidWithRMNeedImm8, iemOp_VGrp14_vpsllq_Hx_Ux_Ib,  iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+    /* /7 */ iemOp_InvalidWithRMNeedImm8, iemOp_VGrp14_vpslldq_Hx_Ux_Ib, iemOp_InvalidWithRMNeedImm8, iemOp_InvalidWithRMNeedImm8,
+};
+AssertCompile(RT_ELEMENTS(g_apfnVexGroup14RegReg) == 8*4);
+
+
+/** Opcode VEX.0F 0x73. */
+FNIEMOP_DEF(iemOp_VGrp14)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(g_apfnVexGroup14RegReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                     + pVCpu->iem.s.idxPrefix], bRm);
+    return FNIEMOP_CALL_1(iemOp_InvalidWithRMNeedImm8, bRm);
+}
+
+
+///**
+// * Common worker for SSE2 instructions on the forms:
+// *      pxxx    xmm1, xmm2/mem128
+// *
+// * Proper alignment of the 128-bit operand is enforced.
+// * Exceptions type 4. SSE2 cpuid checks.
+// */
+//FNIEMOP_DEF_1(iemOpCommonSse2_FullFull_To_Full, PCIEMOPMEDIAF2, pImpl)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(2, 0);
+//        IEM_MC_ARG(PRTUINT128U,          pDst, 0);
+//        IEM_MC_ARG(PCRTUINT128U,         pSrc, 1);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(2, 2);
+//        IEM_MC_ARG(PRTUINT128U,                 pDst,       0);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+//        IEM_MC_ARG_LOCAL_REF(PCRTUINT128U,      pSrc, uSrc, 1);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_FETCH_MEM_U128_ALIGN_SSE(uSrc, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+//
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_XREG_U128(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_CALL_SSE_AIMPL_2(pImpl->pfnU128, pDst, pSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+
+/*  Opcode VEX.0F 0x74 - invalid */
+
+/** Opcode VEX.66.0F 0x74 - vpcmpeqb Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpcmpeqb_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpcmpeqb_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpcmpeqb, "vpcmpeqb");
+//    return FNIEMOP_CALL_1(iemOpCommonSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqb);
+//}
+
+/*  Opcode VEX.F3.0F 0x74 - invalid */
+/*  Opcode VEX.F2.0F 0x74 - invalid */
+
+
+/*  Opcode VEX.0F 0x75 - invalid */
+
+/** Opcode VEX.66.0F 0x75 - vpcmpeqw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpcmpeqw_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpcmpeqw_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpcmpeqw, "vpcmpeqw");
+//    return FNIEMOP_CALL_1(iemOpCommonSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqw);
+//}
+
+/*  Opcode VEX.F3.0F 0x75 - invalid */
+/*  Opcode VEX.F2.0F 0x75 - invalid */
+
+
+/*  Opcode VEX.0F 0x76 - invalid */
+
+/** Opcode VEX.66.0F 0x76 - vpcmpeqd Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpcmpeqd_Vx_Hx_Wx);
+//FNIEMOP_DEF(iemOp_vpcmpeqd_Vx_Hx_Wx)
+//{
+//    IEMOP_MNEMONIC(vpcmpeqd, "vpcmpeqd");
+//    return FNIEMOP_CALL_1(iemOpCommonSse2_FullFull_To_Full, &g_iemAImpl_pcmpeqd);
+//}
+
+/*  Opcode VEX.F3.0F 0x76 - invalid */
+/*  Opcode VEX.F2.0F 0x76 - invalid */
+
+
+/** Opcode VEX.0F 0x77 - vzeroupperv vzeroallv */
+FNIEMOP_STUB(iemOp_vzeroupperv__vzeroallv);
+/*  Opcode VEX.66.0F 0x77 - invalid */
+/*  Opcode VEX.F3.0F 0x77 - invalid */
+/*  Opcode VEX.F2.0F 0x77 - invalid */
+
+/*  Opcode VEX.0F 0x78 - invalid */
+/*  Opcode VEX.66.0F 0x78 - invalid */
+/*  Opcode VEX.F3.0F 0x78 - invalid */
+/*  Opcode VEX.F2.0F 0x78 - invalid */
+
+/*  Opcode VEX.0F 0x79 - invalid */
+/*  Opcode VEX.66.0F 0x79 - invalid */
+/*  Opcode VEX.F3.0F 0x79 - invalid */
+/*  Opcode VEX.F2.0F 0x79 - invalid */
+
+/*  Opcode VEX.0F 0x7a - invalid */
+/*  Opcode VEX.66.0F 0x7a - invalid */
+/*  Opcode VEX.F3.0F 0x7a - invalid */
+/*  Opcode VEX.F2.0F 0x7a - invalid */
+
+/*  Opcode VEX.0F 0x7b - invalid */
+/*  Opcode VEX.66.0F 0x7b - invalid */
+/*  Opcode VEX.F3.0F 0x7b - invalid */
+/*  Opcode VEX.F2.0F 0x7b - invalid */
+
+/*  Opcode VEX.0F 0x7c - invalid */
+/** Opcode VEX.66.0F 0x7c - vhaddpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vhaddpd_Vpd_Hpd_Wpd);
+/*  Opcode VEX.F3.0F 0x7c - invalid */
+/** Opcode VEX.F2.0F 0x7c - vhaddps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vhaddps_Vps_Hps_Wps);
+
+/*  Opcode VEX.0F 0x7d - invalid */
+/** Opcode VEX.66.0F 0x7d - vhsubpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vhsubpd_Vpd_Hpd_Wpd);
+/*  Opcode VEX.F3.0F 0x7d - invalid */
+/** Opcode VEX.F2.0F 0x7d - vhsubps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vhsubps_Vps_Hps_Wps);
+
+
+/*  Opcode VEX.0F 0x7e - invalid */
+
+/** Opcode VEX.66.0F 0x7e - vmovd_q Ey, Vy */
+FNIEMOP_STUB(iemOp_vmovd_q_Ey_Vy);
+//FNIEMOP_DEF(iemOp_vmovd_q_Ey_Vy)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+//        IEMOP_MNEMONIC(vmovq_Eq_Wq, "vmovq Eq,Wq");
+//    else
+//        IEMOP_MNEMONIC(vmovd_Ed_Wd, "vmovd Ed,Wd");
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /* greg, XMM */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 1);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+//        {
+//            IEM_MC_LOCAL(uint64_t, u64Tmp);
+//            IEM_MC_FETCH_XREG_U64(u64Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//            IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u64Tmp);
+//        }
+//        else
+//        {
+//            IEM_MC_LOCAL(uint32_t, u32Tmp);
+//            IEM_MC_FETCH_XREG_U32(u32Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//            IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, u32Tmp);
+//        }
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /* [mem], XMM */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 1);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//        if (pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SIZE_REX_W)
+//        {
+//            IEM_MC_LOCAL(uint64_t, u64Tmp);
+//            IEM_MC_FETCH_XREG_U64(u64Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//            IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u64Tmp);
+//        }
+//        else
+//        {
+//            IEM_MC_LOCAL(uint32_t, u32Tmp);
+//            IEM_MC_FETCH_XREG_U32(u32Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//            IEM_MC_STORE_MEM_U32(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u32Tmp);
+//        }
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.F3.0F 0x7e - vmovq Vq, Wq */
+FNIEMOP_STUB(iemOp_vmovq_Vq_Wq);
+/*  Opcode VEX.F2.0F 0x7e - invalid */
+
+
+/*  Opcode VEX.0F 0x7f - invalid */
+
+/** Opcode VEX.66.0F 0x7f - vmovdqa Wx,Vx */
+FNIEMOP_STUB(iemOp_vmovdqa_Wx_Vx);
+//FNIEMOP_DEF(iemOp_vmovdqa_Wx_Vx)
+//{
+//    IEMOP_MNEMONIC(vmovdqa_Wdq_Vdq, "vmovdqa Wx,Vx");
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+//                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U, u128Tmp);
+//        IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U128(u128Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u128Tmp);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/** Opcode VEX.F3.0F 0x7f - vmovdqu Wx,Vx */
+FNIEMOP_STUB(iemOp_vmovdqu_Wx_Vx);
+//FNIEMOP_DEF(iemOp_vmovdqu_Wx_Vx)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    IEMOP_MNEMONIC(vmovdqu_Wdq_Vdq, "vmovdqu Wx,Vx");
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 0);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//        IEM_MC_COPY_XREG_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB,
+//                              ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Register, memory.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U, u128Tmp);
+//        IEM_MC_LOCAL(RTGCPTR,    GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U128(u128Tmp, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, u128Tmp);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/*  Opcode VEX.F2.0F 0x7f - invalid */
+
+
+/*  Opcode VEX.0F 0x80 - invalid  */
+/*  Opcode VEX.0F 0x81 - invalid  */
+/*  Opcode VEX.0F 0x82 - invalid  */
+/*  Opcode VEX.0F 0x83 - invalid  */
+/*  Opcode VEX.0F 0x84 - invalid  */
+/*  Opcode VEX.0F 0x85 - invalid  */
+/*  Opcode VEX.0F 0x86 - invalid  */
+/*  Opcode VEX.0F 0x87 - invalid  */
+/*  Opcode VEX.0F 0x88 - invalid  */
+/*  Opcode VEX.0F 0x89 - invalid  */
+/*  Opcode VEX.0F 0x8a - invalid  */
+/*  Opcode VEX.0F 0x8b - invalid  */
+/*  Opcode VEX.0F 0x8c - invalid  */
+/*  Opcode VEX.0F 0x8d - invalid  */
+/*  Opcode VEX.0F 0x8e - invalid  */
+/*  Opcode VEX.0F 0x8f - invalid  */
+/*  Opcode VEX.0F 0x90 - invalid  */
+/*  Opcode VEX.0F 0x91 - invalid  */
+/*  Opcode VEX.0F 0x92 - invalid  */
+/*  Opcode VEX.0F 0x93 - invalid  */
+/*  Opcode VEX.0F 0x94 - invalid  */
+/*  Opcode VEX.0F 0x95 - invalid  */
+/*  Opcode VEX.0F 0x96 - invalid  */
+/*  Opcode VEX.0F 0x97 - invalid  */
+/*  Opcode VEX.0F 0x98 - invalid  */
+/*  Opcode VEX.0F 0x99 - invalid  */
+/*  Opcode VEX.0F 0x9a - invalid  */
+/*  Opcode VEX.0F 0x9b - invalid  */
+/*  Opcode VEX.0F 0x9c - invalid  */
+/*  Opcode VEX.0F 0x9d - invalid  */
+/*  Opcode VEX.0F 0x9e - invalid  */
+/*  Opcode VEX.0F 0x9f - invalid  */
+/*  Opcode VEX.0F 0xa0 - invalid  */
+/*  Opcode VEX.0F 0xa1 - invalid  */
+/*  Opcode VEX.0F 0xa2 - invalid  */
+/*  Opcode VEX.0F 0xa3 - invalid  */
+/*  Opcode VEX.0F 0xa4 - invalid  */
+/*  Opcode VEX.0F 0xa5 - invalid  */
+/*  Opcode VEX.0F 0xa6 - invalid  */
+/*  Opcode VEX.0F 0xa7 - invalid  */
+/*  Opcode VEX.0F 0xa8 - invalid  */
+/*  Opcode VEX.0F 0xa9 - invalid  */
+/*  Opcode VEX.0F 0xaa - invalid  */
+/*  Opcode VEX.0F 0xab - invalid  */
+/*  Opcode VEX.0F 0xac - invalid  */
+/*  Opcode VEX.0F 0xad - invalid  */
+
+
+/*  Opcode VEX.0F 0xae mem/0 - invalid. */
+/*  Opcode VEX.0F 0xae mem/1 - invalid. */
+
+/**
+ * @ opmaps      grp15
+ * @ opcode      !11/2
+ * @ oppfx       none
+ * @ opcpuid     sse
+ * @ opgroup     og_sse_mxcsrsm
+ * @ opxcpttype  5
+ * @ optest      op1=0      -> mxcsr=0
+ * @ optest      op1=0x2083 -> mxcsr=0x2083
+ * @ optest      op1=0xfffffffe -> value.xcpt=0xd
+ * @ optest      op1=0x2083 cr0|=ts -> value.xcpt=0x7
+ * @ optest      op1=0x2083 cr0|=em -> value.xcpt=0x6
+ * @ optest      op1=0x2083 cr0|=mp -> mxcsr=0x2083
+ * @ optest      op1=0x2083 cr4&~=osfxsr -> value.xcpt=0x6
+ * @ optest      op1=0x2083 cr0|=ts,em -> value.xcpt=0x6
+ * @ optest      op1=0x2083 cr0|=em cr4&~=osfxsr -> value.xcpt=0x6
+ * @ optest      op1=0x2083 cr0|=ts,em cr4&~=osfxsr -> value.xcpt=0x6
+ * @ optest      op1=0x2083 cr0|=ts,em,mp cr4&~=osfxsr -> value.xcpt=0x6
+ */
+FNIEMOP_STUB_1(iemOp_VGrp15_vldmxcsr, uint8_t, bRm);
+//FNIEMOP_DEF_1(iemOp_VGrp15_vldmxcsr, uint8_t, bRm)
+//{
+//    IEMOP_MNEMONIC1(M_MEM, VLDMXCSR, vldmxcsr, MdRO, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSse)
+//        return IEMOP_RAISE_INVALID_OPCODE();
+//
+//    IEM_MC_BEGIN(2, 0);
+//    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+//    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+//    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+//    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//    IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+//    IEM_MC_CALL_CIMPL_2(iemCImpl_ldmxcsr, iEffSeg, GCPtrEff);
+//    IEM_MC_END();
+//    return VINF_SUCCESS;
+//}
+
+
+/**
+ * @opmaps      vexgrp15
+ * @opcode      !11/3
+ * @oppfx       none
+ * @opcpuid     avx
+ * @opgroup     og_avx_mxcsrsm
+ * @opxcpttype  5
+ * @optest      mxcsr=0      -> op1=0
+ * @optest      mxcsr=0x2083 -> op1=0x2083
+ * @optest      mxcsr=0x2084 cr0|=ts -> value.xcpt=0x7
+ * @optest      !amd / mxcsr=0x2085 cr0|=em -> op1=0x2085
+ * @optest       amd / mxcsr=0x2085 cr0|=em -> value.xcpt=0x6
+ * @optest      mxcsr=0x2086 cr0|=mp -> op1=0x2086
+ * @optest      mxcsr=0x2087 cr4&~=osfxsr -> op1=0x2087
+ * @optest      mxcsr=0x208f cr4&~=osxsave -> value.xcpt=0x6
+ * @optest      mxcsr=0x2087 cr4&~=osfxsr,osxsave -> value.xcpt=0x6
+ * @optest      !amd / mxcsr=0x2088 cr0|=ts,em -> value.xcpt=0x7
+ * @optest      amd  / mxcsr=0x2088 cr0|=ts,em -> value.xcpt=0x6
+ * @optest      !amd / mxcsr=0x2089 cr0|=em cr4&~=osfxsr -> op1=0x2089
+ * @optest      amd  / mxcsr=0x2089 cr0|=em cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      !amd / mxcsr=0x208a cr0|=ts,em cr4&~=osfxsr -> value.xcpt=0x7
+ * @optest      amd  / mxcsr=0x208a cr0|=ts,em cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      !amd / mxcsr=0x208b cr0|=ts,em,mp cr4&~=osfxsr -> value.xcpt=0x7
+ * @optest      amd  / mxcsr=0x208b cr0|=ts,em,mp cr4&~=osfxsr -> value.xcpt=0x6
+ * @optest      !amd / mxcsr=0x208c xcr0&~=all_avx -> value.xcpt=0x6
+ * @optest      amd  / mxcsr=0x208c xcr0&~=all_avx -> op1=0x208c
+ * @optest      !amd / mxcsr=0x208d xcr0&~=all_avx_sse -> value.xcpt=0x6
+ * @optest      amd  / mxcsr=0x208d xcr0&~=all_avx_sse -> op1=0x208d
+ * @optest      !amd / mxcsr=0x208e xcr0&~=all_avx cr0|=ts -> value.xcpt=0x6
+ * @optest      amd  / mxcsr=0x208e xcr0&~=all_avx cr0|=ts -> value.xcpt=0x7
+ * @optest      mxcsr=0x2082 cr0|=ts cr4&~=osxsave -> value.xcpt=0x6
+ * @optest      mxcsr=0x2081 xcr0&~=all_avx cr0|=ts cr4&~=osxsave
+ *              -> value.xcpt=0x6
+ * @remarks     AMD Jaguar CPU (f0x16,m0,s1) \#UD when CR0.EM is set.  It also
+ *              doesn't seem to check XCR0[2:1] != 11b.  This does not match the
+ *              APMv4 rev 3.17 page 509.
+ * @todo        Test this instruction on AMD Ryzen.
+ */
+FNIEMOP_DEF_1(iemOp_VGrp15_vstmxcsr,  uint8_t, bRm)
+{
+    IEMOP_MNEMONIC1(VEX_M_MEM, VSTMXCSR, vstmxcsr, MdWO, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+    if (!IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fAvx)
+        return IEMOP_RAISE_INVALID_OPCODE();
+
+    IEM_MC_BEGIN(2, 0);
+    IEM_MC_ARG(uint8_t,         iEffSeg,                                 0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEff,                                1);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEff, bRm, 0);
+    IEMOP_HLP_DONE_VEX_DECODING_L_ZERO_NO_VVV();
+    IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+    IEM_MC_ASSIGN(iEffSeg, pVCpu->iem.s.iEffSeg);
+    IEM_MC_CALL_CIMPL_2(iemCImpl_vstmxcsr, iEffSeg, GCPtrEff);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+/*  Opcode VEX.0F 0xae mem/4 - invalid. */
+/*  Opcode VEX.0F 0xae mem/5 - invalid. */
+/*  Opcode VEX.0F 0xae mem/6 - invalid. */
+/*  Opcode VEX.0F 0xae mem/7 - invalid. */
+
+/*  Opcode VEX.0F 0xae 11b/0 - invalid. */
+/*  Opcode VEX.0F 0xae 11b/1 - invalid. */
+/*  Opcode VEX.0F 0xae 11b/2 - invalid. */
+/*  Opcode VEX.0F 0xae 11b/3 - invalid. */
+/*  Opcode VEX.0F 0xae 11b/4 - invalid. */
+/*  Opcode VEX.0F 0xae 11b/5 - invalid. */
+/*  Opcode VEX.0F 0xae 11b/6 - invalid. */
+/*  Opcode VEX.0F 0xae 11b/7 - invalid. */
+
+/**
+ * Vex group 15 jump table for memory variant.
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnVexGroup15MemReg[] =
+{   /* pfx:  none,                          066h,                           0f3h,                           0f2h */
+    /* /0 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /1 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /2 */ iemOp_VGrp15_vldmxcsr,         iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /3 */ iemOp_VGrp15_vstmxcsr,         iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /4 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /5 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /6 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+    /* /7 */ iemOp_InvalidWithRM,           iemOp_InvalidWithRM,            iemOp_InvalidWithRM,            iemOp_InvalidWithRM,
+};
+AssertCompile(RT_ELEMENTS(g_apfnVexGroup15MemReg) == 8*4);
+
+
+/** Opcode vex. 0xae. */
+FNIEMOP_DEF(iemOp_VGrp15)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        /* register, register */
+        return FNIEMOP_CALL_1(iemOp_InvalidWithRM, bRm);
+
+    /* memory, register */
+    return FNIEMOP_CALL_1(g_apfnVexGroup15MemReg[ ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) * 4
+                                                 + pVCpu->iem.s.idxPrefix], bRm);
+}
+
+
+/*  Opcode VEX.0F 0xaf - invalid. */
+
+/*  Opcode VEX.0F 0xb0 - invalid. */
+/*  Opcode VEX.0F 0xb1 - invalid. */
+/*  Opcode VEX.0F 0xb2 - invalid. */
+/*  Opcode VEX.0F 0xb2 - invalid. */
+/*  Opcode VEX.0F 0xb3 - invalid. */
+/*  Opcode VEX.0F 0xb4 - invalid. */
+/*  Opcode VEX.0F 0xb5 - invalid. */
+/*  Opcode VEX.0F 0xb6 - invalid. */
+/*  Opcode VEX.0F 0xb7 - invalid. */
+/*  Opcode VEX.0F 0xb8 - invalid. */
+/*  Opcode VEX.0F 0xb9 - invalid. */
+/*  Opcode VEX.0F 0xba - invalid. */
+/*  Opcode VEX.0F 0xbb - invalid. */
+/*  Opcode VEX.0F 0xbc - invalid. */
+/*  Opcode VEX.0F 0xbd - invalid. */
+/*  Opcode VEX.0F 0xbe - invalid. */
+/*  Opcode VEX.0F 0xbf - invalid. */
+
+/*  Opcode VEX.0F 0xc0 - invalid. */
+/*  Opcode VEX.66.0F 0xc0 - invalid. */
+/*  Opcode VEX.F3.0F 0xc0 - invalid. */
+/*  Opcode VEX.F2.0F 0xc0 - invalid. */
+
+/*  Opcode VEX.0F 0xc1 - invalid. */
+/*  Opcode VEX.66.0F 0xc1 - invalid. */
+/*  Opcode VEX.F3.0F 0xc1 - invalid. */
+/*  Opcode VEX.F2.0F 0xc1 - invalid. */
+
+/** Opcode VEX.0F 0xc2 - vcmpps Vps,Hps,Wps,Ib */
+FNIEMOP_STUB(iemOp_vcmpps_Vps_Hps_Wps_Ib);
+/** Opcode VEX.66.0F 0xc2 - vcmppd Vpd,Hpd,Wpd,Ib */
+FNIEMOP_STUB(iemOp_vcmppd_Vpd_Hpd_Wpd_Ib);
+/** Opcode VEX.F3.0F 0xc2 - vcmpss Vss,Hss,Wss,Ib */
+FNIEMOP_STUB(iemOp_vcmpss_Vss_Hss_Wss_Ib);
+/** Opcode VEX.F2.0F 0xc2 - vcmpsd Vsd,Hsd,Wsd,Ib */
+FNIEMOP_STUB(iemOp_vcmpsd_Vsd_Hsd_Wsd_Ib);
+
+/*  Opcode VEX.0F 0xc3 - invalid */
+/*  Opcode VEX.66.0F 0xc3 - invalid */
+/*  Opcode VEX.F3.0F 0xc3 - invalid */
+/*  Opcode VEX.F2.0F 0xc3 - invalid */
+
+/*  Opcode VEX.0F 0xc4 - invalid */
+/** Opcode VEX.66.0F 0xc4 - vpinsrw Vdq,Hdq,Ry/Mw,Ib */
+FNIEMOP_STUB(iemOp_vpinsrw_Vdq_Hdq_RyMw_Ib);
+/*  Opcode VEX.F3.0F 0xc4 - invalid */
+/*  Opcode VEX.F2.0F 0xc4 - invalid */
+
+/*  Opcode VEX.0F 0xc5 - invlid */
+/** Opcode VEX.66.0F 0xc5 - vpextrw Gd, Udq, Ib */
+FNIEMOP_STUB(iemOp_vpextrw_Gd_Udq_Ib);
+/*  Opcode VEX.F3.0F 0xc5 - invalid */
+/*  Opcode VEX.F2.0F 0xc5 - invalid */
+
+/** Opcode VEX.0F 0xc6 - vshufps Vps,Hps,Wps,Ib */
+FNIEMOP_STUB(iemOp_vshufps_Vps_Hps_Wps_Ib);
+/** Opcode VEX.66.0F 0xc6 - vshufpd Vpd,Hpd,Wpd,Ib */
+FNIEMOP_STUB(iemOp_vshufpd_Vpd_Hpd_Wpd_Ib);
+/*  Opcode VEX.F3.0F 0xc6 - invalid */
+/*  Opcode VEX.F2.0F 0xc6 - invalid */
+
+/*  Opcode VEX.0F 0xc7 - invalid */
+/*  Opcode VEX.66.0F 0xc7 - invalid */
+/*  Opcode VEX.F3.0F 0xc7 - invalid */
+/*  Opcode VEX.F2.0F 0xc7 - invalid */
+
+/*  Opcode VEX.0F 0xc8 - invalid */
+/*  Opcode VEX.0F 0xc9 - invalid */
+/*  Opcode VEX.0F 0xca - invalid */
+/*  Opcode VEX.0F 0xcb - invalid */
+/*  Opcode VEX.0F 0xcc - invalid */
+/*  Opcode VEX.0F 0xcd - invalid */
+/*  Opcode VEX.0F 0xce - invalid */
+/*  Opcode VEX.0F 0xcf - invalid */
+
+
+/*  Opcode VEX.0F 0xd0 - invalid */
+/** Opcode VEX.66.0F 0xd0 - vaddsubpd Vpd, Hpd, Wpd */
+FNIEMOP_STUB(iemOp_vaddsubpd_Vpd_Hpd_Wpd);
+/*  Opcode VEX.F3.0F 0xd0 - invalid */
+/** Opcode VEX.F2.0F 0xd0 - vaddsubps Vps, Hps, Wps */
+FNIEMOP_STUB(iemOp_vaddsubps_Vps_Hps_Wps);
+
+/*  Opcode VEX.0F 0xd1 - invalid */
+/** Opcode VEX.66.0F 0xd1 - vpsrlw Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpsrlw_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xd1 - invalid */
+/*  Opcode VEX.F2.0F 0xd1 - invalid */
+
+/*  Opcode VEX.0F 0xd2 - invalid */
+/** Opcode VEX.66.0F 0xd2 - vpsrld Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsrld_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xd2 - invalid */
+/*  Opcode VEX.F2.0F 0xd2 - invalid */
+
+/*  Opcode VEX.0F 0xd3 - invalid */
+/** Opcode VEX.66.0F 0xd3 - vpsrlq Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsrlq_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xd3 - invalid */
+/*  Opcode VEX.F2.0F 0xd3 - invalid */
+
+/*  Opcode VEX.0F 0xd4 - invalid */
+/** Opcode VEX.66.0F 0xd4 - vpaddq Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpaddq_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xd4 - invalid */
+/*  Opcode VEX.F2.0F 0xd4 - invalid */
+
+/*  Opcode VEX.0F 0xd5 - invalid */
+/** Opcode VEX.66.0F 0xd5 - vpmullw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpmullw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xd5 - invalid */
+/*  Opcode VEX.F2.0F 0xd5 - invalid */
+
+/*  Opcode VEX.0F 0xd6 - invalid */
+
+/**
+ * @ opcode      0xd6
+ * @ oppfx       0x66
+ * @ opcpuid     sse2
+ * @ opgroup     og_sse2_pcksclr_datamove
+ * @ opxcpttype  none
+ * @ optest      op1=-1 op2=2 -> op1=2
+ * @ optest      op1=0 op2=-42 -> op1=-42
+ */
+FNIEMOP_STUB(iemOp_vmovq_Wq_Vq);
+//FNIEMOP_DEF(iemOp_vmovq_Wq_Vq)
+//{
+//    IEMOP_MNEMONIC2(MR, VMOVQ, vmovq, WqZxReg, Vq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZE);
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /*
+//         * Register, register.
+//         */
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE();
+//
+//        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_XREG_U64_ZX_U128((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    else
+//    {
+//        /*
+//         * Memory, register.
+//         */
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(uint64_t,                  uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U64(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U64(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//    }
+//    return VINF_SUCCESS;
+//}
+
+/*  Opcode VEX.F3.0F 0xd6 - invalid */
+/*  Opcode VEX.F2.0F 0xd6 - invalid */
+
+
+/*  Opcode VEX.0F 0xd7 - invalid */
+
+/** Opcode VEX.66.0F 0xd7 -  */
+FNIEMOP_STUB(iemOp_vpmovmskb_Gd_Ux);
+//FNIEMOP_DEF(iemOp_vpmovmskb_Gd_Ux)
+//{
+//    /* Note! Taking the lazy approch here wrt the high 32-bits of the GREG. */
+//    /** @todo testcase: Check that the instruction implicitly clears the high
+//     *        bits in 64-bit mode.  The REX.W is first necessary when VLMAX > 256
+//     *        and opcode modifications are made to work with the whole width (not
+//     *        just 128). */
+//    IEMOP_MNEMONIC(vpmovmskb_Gd_Nq, "vpmovmskb Gd, Ux");
+//    /* Docs says register only. */
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT)) /** @todo test that this is registers only. */
+//    {
+//        IEMOP_HLP_DECODED_NL_2(OP_PMOVMSKB, IEMOPFORM_RM_REG, OP_PARM_Gd, OP_PARM_Vdq, DISOPTYPE_SSE | DISOPTYPE_HARMLESS);
+//        IEM_MC_BEGIN(2, 0);
+//        IEM_MC_ARG(uint64_t *,           pDst, 0);
+//        IEM_MC_ARG(PCRTUINT128U,         pSrc, 1);
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_PREPARE_SSE_USAGE();
+//        IEM_MC_REF_GREG_U64(pDst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_REF_XREG_U128_CONST(pSrc, (bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB);
+//        IEM_MC_CALL_SSE_AIMPL_2(iemAImpl_pmovmskb_u128, pDst, pSrc);
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//        return VINF_SUCCESS;
+//    }
+//    return IEMOP_RAISE_INVALID_OPCODE();
+//}
+
+/*  Opcode VEX.F3.0F 0xd7 - invalid */
+/*  Opcode VEX.F2.0F 0xd7 - invalid */
+
+
+/*  Opcode VEX.0F 0xd8 - invalid */
+/** Opcode VEX.66.0F 0xd8 - vpsubusb Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpsubusb_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xd8 - invalid */
+/*  Opcode VEX.F2.0F 0xd8 - invalid */
+
+/*  Opcode VEX.0F 0xd9 - invalid */
+/** Opcode VEX.66.0F 0xd9 - vpsubusw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsubusw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xd9 - invalid */
+/*  Opcode VEX.F2.0F 0xd9 - invalid */
+
+/*  Opcode VEX.0F 0xda - invalid */
+/** Opcode VEX.66.0F 0xda - vpminub Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpminub_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xda - invalid */
+/*  Opcode VEX.F2.0F 0xda - invalid */
+
+/*  Opcode VEX.0F 0xdb - invalid */
+/** Opcode VEX.66.0F 0xdb - vpand Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpand_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xdb - invalid */
+/*  Opcode VEX.F2.0F 0xdb - invalid */
+
+/*  Opcode VEX.0F 0xdc - invalid */
+/** Opcode VEX.66.0F 0xdc - vpaddusb Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpaddusb_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xdc - invalid */
+/*  Opcode VEX.F2.0F 0xdc - invalid */
+
+/*  Opcode VEX.0F 0xdd - invalid */
+/** Opcode VEX.66.0F 0xdd - vpaddusw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpaddusw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xdd - invalid */
+/*  Opcode VEX.F2.0F 0xdd - invalid */
+
+/*  Opcode VEX.0F 0xde - invalid */
+/** Opcode VEX.66.0F 0xde - vpmaxub Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpmaxub_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xde - invalid */
+/*  Opcode VEX.F2.0F 0xde - invalid */
+
+/*  Opcode VEX.0F 0xdf - invalid */
+/** Opcode VEX.66.0F 0xdf - vpandn Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpandn_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xdf - invalid */
+/*  Opcode VEX.F2.0F 0xdf - invalid */
+
+/*  Opcode VEX.0F 0xe0 - invalid */
+/** Opcode VEX.66.0F 0xe0 - vpavgb Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpavgb_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xe0 - invalid */
+/*  Opcode VEX.F2.0F 0xe0 - invalid */
+
+/*  Opcode VEX.0F 0xe1 - invalid */
+/** Opcode VEX.66.0F 0xe1 - vpsraw Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpsraw_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xe1 - invalid */
+/*  Opcode VEX.F2.0F 0xe1 - invalid */
+
+/*  Opcode VEX.0F 0xe2 - invalid */
+/** Opcode VEX.66.0F 0xe2 - vpsrad Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsrad_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xe2 - invalid */
+/*  Opcode VEX.F2.0F 0xe2 - invalid */
+
+/*  Opcode VEX.0F 0xe3 - invalid */
+/** Opcode VEX.66.0F 0xe3 - vpavgw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpavgw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xe3 - invalid */
+/*  Opcode VEX.F2.0F 0xe3 - invalid */
+
+/*  Opcode VEX.0F 0xe4 - invalid */
+/** Opcode VEX.66.0F 0xe4 - vpmulhuw Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpmulhuw_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xe4 - invalid */
+/*  Opcode VEX.F2.0F 0xe4 - invalid */
+
+/*  Opcode VEX.0F 0xe5 - invalid */
+/** Opcode VEX.66.0F 0xe5 - vpmulhw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpmulhw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xe5 - invalid */
+/*  Opcode VEX.F2.0F 0xe5 - invalid */
+
+/*  Opcode VEX.0F 0xe6 - invalid */
+/** Opcode VEX.66.0F 0xe6 - vcvttpd2dq Vx, Wpd */
+FNIEMOP_STUB(iemOp_vcvttpd2dq_Vx_Wpd);
+/** Opcode VEX.F3.0F 0xe6 - vcvtdq2pd Vx, Wpd */
+FNIEMOP_STUB(iemOp_vcvtdq2pd_Vx_Wpd);
+/** Opcode VEX.F2.0F 0xe6 - vcvtpd2dq Vx, Wpd */
+FNIEMOP_STUB(iemOp_vcvtpd2dq_Vx_Wpd);
+
+
+/* Opcode VEX.0F 0xe7 - invalid */
+
+/** Opcode VEX.66.0F 0xe7 - vmovntdq Mx, Vx */
+FNIEMOP_STUB(iemOp_vmovntdq_Mx_Vx);
+//FNIEMOP_DEF(iemOp_vmovntdq_Mx_Vx)
+//{
+//    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+//    if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+//    {
+//        /* Register, memory. */
+//        IEMOP_MNEMONIC(vmovntdq_Mx_Vx, "vmovntdq Mx,Vx");
+//        IEM_MC_BEGIN(0, 2);
+//        IEM_MC_LOCAL(RTUINT128U,                uSrc);
+//        IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc);
+//
+//        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+//        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+//        IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT();
+//        IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ();
+//
+//        IEM_MC_FETCH_XREG_U128(uSrc, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pVCpu->iem.s.uRexReg);
+//        IEM_MC_STORE_MEM_U128_ALIGN_SSE(pVCpu->iem.s.iEffSeg, GCPtrEffSrc, uSrc);
+//
+//        IEM_MC_ADVANCE_RIP();
+//        IEM_MC_END();
+//        return VINF_SUCCESS;
+//    }
+//
+//    /* The register, register encoding is invalid. */
+//    return IEMOP_RAISE_INVALID_OPCODE();
+//}
+
+/*  Opcode VEX.F3.0F 0xe7 - invalid */
+/*  Opcode VEX.F2.0F 0xe7 - invalid */
+
+
+/*  Opcode VEX.0F 0xe8 - invalid */
+/** Opcode VEX.66.0F 0xe8 - vpsubsb Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpsubsb_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xe8 - invalid */
+/*  Opcode VEX.F2.0F 0xe8 - invalid */
+
+/*  Opcode VEX.0F 0xe9 - invalid */
+/** Opcode VEX.66.0F 0xe9 - vpsubsw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsubsw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xe9 - invalid */
+/*  Opcode VEX.F2.0F 0xe9 - invalid */
+
+/*  Opcode VEX.0F 0xea - invalid */
+/** Opcode VEX.66.0F 0xea - vpminsw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpminsw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xea - invalid */
+/*  Opcode VEX.F2.0F 0xea - invalid */
+
+/*  Opcode VEX.0F 0xeb - invalid */
+/** Opcode VEX.66.0F 0xeb - vpor Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpor_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xeb - invalid */
+/*  Opcode VEX.F2.0F 0xeb - invalid */
+
+/*  Opcode VEX.0F 0xec - invalid */
+/** Opcode VEX.66.0F 0xec - vpaddsb Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpaddsb_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xec - invalid */
+/*  Opcode VEX.F2.0F 0xec - invalid */
+
+/*  Opcode VEX.0F 0xed - invalid */
+/** Opcode VEX.66.0F 0xed - vpaddsw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpaddsw_Vx_Hx_Wx);
+/*  Opcode VEX.F3.0F 0xed - invalid */
+/*  Opcode VEX.F2.0F 0xed - invalid */
+
+/*  Opcode VEX.0F 0xee - invalid */
+/** Opcode VEX.66.0F 0xee - vpmaxsw Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpmaxsw_Vx_Hx_W);
+/*  Opcode VEX.F3.0F 0xee - invalid */
+/*  Opcode VEX.F2.0F 0xee - invalid */
+
+
+/*  Opcode VEX.0F 0xef - invalid */
+
+/** Opcode VEX.66.0F 0xef - vpxor Vx, Hx, Wx */
+FNIEMOP_DEF(iemOp_vpxor_Vx_Hx_Wx)
+{
+    IEMOP_MNEMONIC(vpxor, "vpxor");
+    return FNIEMOP_CALL_1(iemOpCommonSse2_FullFull_To_Full, &g_iemAImpl_pxor);
+}
+
+/*  Opcode VEX.F3.0F 0xef - invalid */
+/*  Opcode VEX.F2.0F 0xef - invalid */
+
+/*  Opcode VEX.0F 0xf0 - invalid */
+/*  Opcode VEX.66.0F 0xf0 - invalid */
+/** Opcode VEX.F2.0F 0xf0 - vlddqu Vx, Mx */
+FNIEMOP_STUB(iemOp_vlddqu_Vx_Mx);
+
+/*  Opcode VEX.0F 0xf1 - invalid */
+/** Opcode VEX.66.0F 0xf1 - vpsllw Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpsllw_Vx_Hx_W);
+/*  Opcode VEX.F2.0F 0xf1 - invalid */
+
+/*  Opcode VEX.0F 0xf2 - invalid */
+/** Opcode VEX.66.0F 0xf2 - vpslld Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpslld_Vx_Hx_Wx);
+/*  Opcode VEX.F2.0F 0xf2 - invalid */
+
+/*  Opcode VEX.0F 0xf3 - invalid */
+/** Opcode VEX.66.0F 0xf3 - vpsllq Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsllq_Vx_Hx_Wx);
+/*  Opcode VEX.F2.0F 0xf3 - invalid */
+
+/*  Opcode VEX.0F 0xf4 - invalid */
+/** Opcode VEX.66.0F 0xf4 - vpmuludq Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpmuludq_Vx_Hx_W);
+/*  Opcode VEX.F2.0F 0xf4 - invalid */
+
+/*  Opcode VEX.0F 0xf5 - invalid */
+/** Opcode VEX.66.0F 0xf5 - vpmaddwd Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpmaddwd_Vx_Hx_Wx);
+/*  Opcode VEX.F2.0F 0xf5 - invalid */
+
+/*  Opcode VEX.0F 0xf6 - invalid */
+/** Opcode VEX.66.0F 0xf6 - vpsadbw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsadbw_Vx_Hx_Wx);
+/*  Opcode VEX.F2.0F 0xf6 - invalid */
+
+/*  Opcode VEX.0F 0xf7 - invalid */
+/** Opcode VEX.66.0F 0xf7 - vmaskmovdqu Vdq, Udq */
+FNIEMOP_STUB(iemOp_vmaskmovdqu_Vdq_Udq);
+/*  Opcode VEX.F2.0F 0xf7 - invalid */
+
+/*  Opcode VEX.0F 0xf8 - invalid */
+/** Opcode VEX.66.0F 0xf8 - vpsubb Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpsubb_Vx_Hx_W);
+/*  Opcode VEX.F2.0F 0xf8 - invalid */
+
+/*  Opcode VEX.0F 0xf9 - invalid */
+/** Opcode VEX.66.0F 0xf9 - vpsubw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsubw_Vx_Hx_Wx);
+/*  Opcode VEX.F2.0F 0xf9 - invalid */
+
+/*  Opcode VEX.0F 0xfa - invalid */
+/** Opcode VEX.66.0F 0xfa - vpsubd Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpsubd_Vx_Hx_Wx);
+/*  Opcode VEX.F2.0F 0xfa - invalid */
+
+/*  Opcode VEX.0F 0xfb - invalid */
+/** Opcode VEX.66.0F 0xfb - vpsubq Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpsubq_Vx_Hx_W);
+/*  Opcode VEX.F2.0F 0xfb - invalid */
+
+/*  Opcode VEX.0F 0xfc - invalid */
+/** Opcode VEX.66.0F 0xfc - vpaddb Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpaddb_Vx_Hx_Wx);
+/*  Opcode VEX.F2.0F 0xfc - invalid */
+
+/*  Opcode VEX.0F 0xfd - invalid */
+/** Opcode VEX.66.0F 0xfd - vpaddw Vx, Hx, Wx */
+FNIEMOP_STUB(iemOp_vpaddw_Vx_Hx_Wx);
+/*  Opcode VEX.F2.0F 0xfd - invalid */
+
+/*  Opcode VEX.0F 0xfe - invalid */
+/** Opcode VEX.66.0F 0xfe - vpaddd Vx, Hx, W */
+FNIEMOP_STUB(iemOp_vpaddd_Vx_Hx_W);
+/*  Opcode VEX.F2.0F 0xfe - invalid */
+
+
+/** Opcode **** 0x0f 0xff - UD0 */
+FNIEMOP_DEF(iemOp_vud0)
+{
+    IEMOP_MNEMONIC(vud0, "vud0");
+    if (pVCpu->iem.s.enmCpuVendor == CPUMCPUVENDOR_INTEL)
+    {
+        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); RT_NOREF(bRm);
+#ifndef TST_IEM_CHECK_MC
+        RTGCPTR      GCPtrEff;
+        VBOXSTRICTRC rcStrict = iemOpHlpCalcRmEffAddr(pVCpu, bRm, 0, &GCPtrEff);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+#endif
+        IEMOP_HLP_DONE_DECODING();
+    }
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+
+/**
+ * VEX opcode map \#1.
+ *
+ * @sa  g_apfnTwoByteMap
+ */
+IEM_STATIC const PFNIEMOP g_apfnVexMap1[] =
+{
+    /*          no prefix,                  066h prefix                 f3h prefix,                 f2h prefix */
+    /* 0x00 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x01 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x02 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x03 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x04 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x05 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x06 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x07 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x08 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x09 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x0a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x0b */  IEMOP_X4(iemOp_vud2), /* ?? */
+    /* 0x0c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x0d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x0e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x0f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x10 */  iemOp_vmovups_Vps_Wps,      iemOp_vmovupd_Vpd_Wpd,      iemOp_vmovss_Vx_Hx_Wss,     iemOp_vmovsd_Vx_Hx_Wsd,
+    /* 0x11 */  iemOp_vmovups_Wps_Vps,      iemOp_vmovupd_Wpd_Vpd,      iemOp_vmovss_Wss_Hx_Vss,    iemOp_vmovsd_Wsd_Hx_Vsd,
+    /* 0x12 */  iemOp_vmovlps_Vq_Hq_Mq__vmovhlps, iemOp_vmovlpd_Vq_Hq_Mq, iemOp_vmovsldup_Vx_Wx,    iemOp_vmovddup_Vx_Wx,
+    /* 0x13 */  iemOp_vmovlps_Mq_Vq,        iemOp_vmovlpd_Mq_Vq,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x14 */  iemOp_vunpcklps_Vx_Hx_Wx,   iemOp_vunpcklpd_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x15 */  iemOp_vunpckhps_Vx_Hx_Wx,   iemOp_vunpckhpd_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x16 */  iemOp_vmovhpsv1_Vdq_Hq_Mq__vmovlhps_Vdq_Hq_Uq, iemOp_vmovhpdv1_Vdq_Hq_Mq, iemOp_vmovshdup_Vx_Wx, iemOp_InvalidNeedRM,
+    /* 0x17 */  iemOp_vmovhpsv1_Mq_Vq,      iemOp_vmovhpdv1_Mq_Vq,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x18 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x19 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x20 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x21 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x22 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x23 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x24 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x25 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x26 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x27 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x28 */  iemOp_vmovaps_Vps_Wps,      iemOp_vmovapd_Vpd_Wpd,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x29 */  iemOp_vmovaps_Wps_Vps,      iemOp_vmovapd_Wpd_Vpd,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2a */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_vcvtsi2ss_Vss_Hss_Ey, iemOp_vcvtsi2sd_Vsd_Hsd_Ey,
+    /* 0x2b */  iemOp_vmovntps_Mps_Vps,     iemOp_vmovntpd_Mpd_Vpd,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2c */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_vcvttss2si_Gy_Wss,    iemOp_vcvttsd2si_Gy_Wsd,
+    /* 0x2d */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_vcvtss2si_Gy_Wss,     iemOp_vcvtsd2si_Gy_Wsd,
+    /* 0x2e */  iemOp_vucomiss_Vss_Wss,     iemOp_vucomisd_Vsd_Wsd,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2f */  iemOp_vcomiss_Vss_Wss,      iemOp_vcomisd_Vsd_Wsd,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0x30 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x31 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x32 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x33 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x34 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x35 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x36 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x37 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x38 */  IEMOP_X4(iemOp_InvalidNeedRM),  /** @todo check that there is no escape table stuff here */
+    /* 0x39 */  IEMOP_X4(iemOp_InvalidNeedRM),  /** @todo check that there is no escape table stuff here */
+    /* 0x3a */  IEMOP_X4(iemOp_InvalidNeedRM),  /** @todo check that there is no escape table stuff here */
+    /* 0x3b */  IEMOP_X4(iemOp_InvalidNeedRM),  /** @todo check that there is no escape table stuff here */
+    /* 0x3c */  IEMOP_X4(iemOp_InvalidNeedRM),  /** @todo check that there is no escape table stuff here */
+    /* 0x3d */  IEMOP_X4(iemOp_InvalidNeedRM),  /** @todo check that there is no escape table stuff here */
+    /* 0x3e */  IEMOP_X4(iemOp_InvalidNeedRM),  /** @todo check that there is no escape table stuff here */
+    /* 0x3f */  IEMOP_X4(iemOp_InvalidNeedRM),  /** @todo check that there is no escape table stuff here */
+
+    /* 0x40 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x41 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x42 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x43 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x44 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x45 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x46 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x47 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x48 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x49 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x50 */  iemOp_vmovmskps_Gy_Ups,     iemOp_vmovmskpd_Gy_Upd,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x51 */  iemOp_vsqrtps_Vps_Wps,      iemOp_vsqrtpd_Vpd_Wpd,      iemOp_vsqrtss_Vss_Hss_Wss,  iemOp_vsqrtsd_Vsd_Hsd_Wsd,
+    /* 0x52 */  iemOp_vrsqrtps_Vps_Wps,     iemOp_InvalidNeedRM,        iemOp_vrsqrtss_Vss_Hss_Wss, iemOp_InvalidNeedRM,
+    /* 0x53 */  iemOp_vrcpps_Vps_Wps,       iemOp_InvalidNeedRM,        iemOp_vrcpss_Vss_Hss_Wss,   iemOp_InvalidNeedRM,
+    /* 0x54 */  iemOp_vandps_Vps_Hps_Wps,   iemOp_vandpd_Vpd_Hpd_Wpd,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x55 */  iemOp_vandnps_Vps_Hps_Wps,  iemOp_vandnpd_Vpd_Hpd_Wpd,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x56 */  iemOp_vorps_Vps_Hps_Wps,    iemOp_vorpd_Vpd_Hpd_Wpd,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x57 */  iemOp_vxorps_Vps_Hps_Wps,   iemOp_vxorpd_Vpd_Hpd_Wpd,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x58 */  iemOp_vaddps_Vps_Hps_Wps,   iemOp_vaddpd_Vpd_Hpd_Wpd,   iemOp_vaddss_Vss_Hss_Wss,   iemOp_vaddsd_Vsd_Hsd_Wsd,
+    /* 0x59 */  iemOp_vmulps_Vps_Hps_Wps,   iemOp_vmulpd_Vpd_Hpd_Wpd,   iemOp_vmulss_Vss_Hss_Wss,   iemOp_vmulsd_Vsd_Hsd_Wsd,
+    /* 0x5a */  iemOp_vcvtps2pd_Vpd_Wps,    iemOp_vcvtpd2ps_Vps_Wpd,    iemOp_vcvtss2sd_Vsd_Hx_Wss, iemOp_vcvtsd2ss_Vss_Hx_Wsd,
+    /* 0x5b */  iemOp_vcvtdq2ps_Vps_Wdq,    iemOp_vcvtps2dq_Vdq_Wps,    iemOp_vcvttps2dq_Vdq_Wps,   iemOp_InvalidNeedRM,
+    /* 0x5c */  iemOp_vsubps_Vps_Hps_Wps,   iemOp_vsubpd_Vpd_Hpd_Wpd,   iemOp_vsubss_Vss_Hss_Wss,   iemOp_vsubsd_Vsd_Hsd_Wsd,
+    /* 0x5d */  iemOp_vminps_Vps_Hps_Wps,   iemOp_vminpd_Vpd_Hpd_Wpd,   iemOp_vminss_Vss_Hss_Wss,   iemOp_vminsd_Vsd_Hsd_Wsd,
+    /* 0x5e */  iemOp_vdivps_Vps_Hps_Wps,   iemOp_vdivpd_Vpd_Hpd_Wpd,   iemOp_vdivss_Vss_Hss_Wss,   iemOp_vdivsd_Vsd_Hsd_Wsd,
+    /* 0x5f */  iemOp_vmaxps_Vps_Hps_Wps,   iemOp_vmaxpd_Vpd_Hpd_Wpd,   iemOp_vmaxss_Vss_Hss_Wss,   iemOp_vmaxsd_Vsd_Hsd_Wsd,
+
+    /* 0x60 */  iemOp_InvalidNeedRM,        iemOp_vpunpcklbw_Vx_Hx_Wx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x61 */  iemOp_InvalidNeedRM,        iemOp_vpunpcklwd_Vx_Hx_Wx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x62 */  iemOp_InvalidNeedRM,        iemOp_vpunpckldq_Vx_Hx_Wx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x63 */  iemOp_InvalidNeedRM,        iemOp_vpacksswb_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x64 */  iemOp_InvalidNeedRM,        iemOp_vpcmpgtb_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x65 */  iemOp_InvalidNeedRM,        iemOp_vpcmpgtw_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x66 */  iemOp_InvalidNeedRM,        iemOp_vpcmpgtd_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x67 */  iemOp_InvalidNeedRM,        iemOp_vpackuswb_Vx_Hx_W,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x68 */  iemOp_InvalidNeedRM,        iemOp_vpunpckhbw_Vx_Hx_Wx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x69 */  iemOp_InvalidNeedRM,        iemOp_vpunpckhwd_Vx_Hx_Wx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6a */  iemOp_InvalidNeedRM,        iemOp_vpunpckhdq_Vx_Hx_W,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6b */  iemOp_InvalidNeedRM,        iemOp_vpackssdw_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6c */  iemOp_InvalidNeedRM,        iemOp_vpunpcklqdq_Vx_Hx_Wx, iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6d */  iemOp_InvalidNeedRM,        iemOp_vpunpckhqdq_Vx_Hx_W,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6e */  iemOp_InvalidNeedRM,        iemOp_vmovd_q_Vy_Ey,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x6f */  iemOp_InvalidNeedRM,        iemOp_vmovdqa_Vx_Wx,        iemOp_vmovdqu_Vx_Wx,        iemOp_InvalidNeedRM,
+
+    /* 0x70 */  iemOp_InvalidNeedRM,        iemOp_vpshufd_Vx_Wx_Ib,     iemOp_vpshufhw_Vx_Wx_Ib,    iemOp_vpshuflw_Vx_Wx_Ib,
+    /* 0x71 */  iemOp_InvalidNeedRM,        iemOp_VGrp12,               iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x72 */  iemOp_InvalidNeedRM,        iemOp_VGrp13,               iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x73 */  iemOp_InvalidNeedRM,        iemOp_VGrp14,               iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x74 */  iemOp_InvalidNeedRM,        iemOp_vpcmpeqb_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x75 */  iemOp_InvalidNeedRM,        iemOp_vpcmpeqw_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x76 */  iemOp_InvalidNeedRM,        iemOp_vpcmpeqd_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x77 */  iemOp_vzeroupperv__vzeroallv, iemOp_InvalidNeedRM,      iemOp_InvalidNeedRM,       iemOp_InvalidNeedRM,
+    /* 0x78 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x79 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7c */  iemOp_InvalidNeedRM,        iemOp_vhaddpd_Vpd_Hpd_Wpd,  iemOp_InvalidNeedRM,        iemOp_vhaddps_Vps_Hps_Wps,
+    /* 0x7d */  iemOp_InvalidNeedRM,        iemOp_vhsubpd_Vpd_Hpd_Wpd,  iemOp_InvalidNeedRM,        iemOp_vhsubps_Vps_Hps_Wps,
+    /* 0x7e */  iemOp_InvalidNeedRM,        iemOp_vmovd_q_Ey_Vy,        iemOp_vmovq_Vq_Wq,          iemOp_InvalidNeedRM,
+    /* 0x7f */  iemOp_InvalidNeedRM,        iemOp_vmovdqa_Wx_Vx,        iemOp_vmovdqu_Wx_Vx,        iemOp_InvalidNeedRM,
+
+    /* 0x80 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x81 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x82 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x83 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x84 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x85 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x86 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x87 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x88 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x89 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x90 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x91 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x92 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x93 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x94 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x95 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x96 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x97 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x98 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x99 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x9f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xa0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xaa */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xab */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xac */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xad */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xae */  IEMOP_X4(iemOp_VGrp15),
+    /* 0xaf */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xb0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xba */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbb */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbc */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbd */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbe */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xbf */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xc0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc2 */  iemOp_vcmpps_Vps_Hps_Wps_Ib, iemOp_vcmppd_Vpd_Hpd_Wpd_Ib, iemOp_vcmpss_Vss_Hss_Wss_Ib, iemOp_vcmpsd_Vsd_Hsd_Wsd_Ib,
+    /* 0xc3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc4 */  iemOp_InvalidNeedRM,        iemOp_vpinsrw_Vdq_Hdq_RyMw_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0xc5 */  iemOp_InvalidNeedRM,        iemOp_vpextrw_Gd_Udq_Ib,       iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0xc6 */  iemOp_vshufps_Vps_Hps_Wps_Ib, iemOp_vshufpd_Vpd_Hpd_Wpd_Ib, iemOp_InvalidNeedRMImm8,iemOp_InvalidNeedRMImm8,
+    /* 0xc7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xca */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xcb */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xcc */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xcd */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xce */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xcf */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xd0 */  iemOp_InvalidNeedRM,        iemOp_vaddsubpd_Vpd_Hpd_Wpd, iemOp_InvalidNeedRM,       iemOp_vaddsubps_Vps_Hps_Wps,
+    /* 0xd1 */  iemOp_InvalidNeedRM,        iemOp_vpsrlw_Vx_Hx_W,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd2 */  iemOp_InvalidNeedRM,        iemOp_vpsrld_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd3 */  iemOp_InvalidNeedRM,        iemOp_vpsrlq_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd4 */  iemOp_InvalidNeedRM,        iemOp_vpaddq_Vx_Hx_W,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd5 */  iemOp_InvalidNeedRM,        iemOp_vpmullw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd6 */  iemOp_InvalidNeedRM,        iemOp_vmovq_Wq_Vq,          iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd7 */  iemOp_InvalidNeedRM,        iemOp_vpmovmskb_Gd_Ux,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd8 */  iemOp_InvalidNeedRM,        iemOp_vpsubusb_Vx_Hx_W,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xd9 */  iemOp_InvalidNeedRM,        iemOp_vpsubusw_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xda */  iemOp_InvalidNeedRM,        iemOp_vpminub_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdb */  iemOp_InvalidNeedRM,        iemOp_vpand_Vx_Hx_W,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdc */  iemOp_InvalidNeedRM,        iemOp_vpaddusb_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdd */  iemOp_InvalidNeedRM,        iemOp_vpaddusw_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xde */  iemOp_InvalidNeedRM,        iemOp_vpmaxub_Vx_Hx_W,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdf */  iemOp_InvalidNeedRM,        iemOp_vpandn_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0xe0 */  iemOp_InvalidNeedRM,        iemOp_vpavgb_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe1 */  iemOp_InvalidNeedRM,        iemOp_vpsraw_Vx_Hx_W,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe2 */  iemOp_InvalidNeedRM,        iemOp_vpsrad_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe3 */  iemOp_InvalidNeedRM,        iemOp_vpavgw_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe4 */  iemOp_InvalidNeedRM,        iemOp_vpmulhuw_Vx_Hx_W,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe5 */  iemOp_InvalidNeedRM,        iemOp_vpmulhw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe6 */  iemOp_InvalidNeedRM,        iemOp_vcvttpd2dq_Vx_Wpd,    iemOp_vcvtdq2pd_Vx_Wpd,     iemOp_vcvtpd2dq_Vx_Wpd,
+    /* 0xe7 */  iemOp_InvalidNeedRM,        iemOp_vmovntdq_Mx_Vx,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe8 */  iemOp_InvalidNeedRM,        iemOp_vpsubsb_Vx_Hx_W,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xe9 */  iemOp_InvalidNeedRM,        iemOp_vpsubsw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xea */  iemOp_InvalidNeedRM,        iemOp_vpminsw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xeb */  iemOp_InvalidNeedRM,        iemOp_vpor_Vx_Hx_W,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xec */  iemOp_InvalidNeedRM,        iemOp_vpaddsb_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xed */  iemOp_InvalidNeedRM,        iemOp_vpaddsw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xee */  iemOp_InvalidNeedRM,        iemOp_vpmaxsw_Vx_Hx_W,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xef */  iemOp_InvalidNeedRM,        iemOp_vpxor_Vx_Hx_Wx,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0xf0 */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_vlddqu_Vx_Mx,
+    /* 0xf1 */  iemOp_InvalidNeedRM,        iemOp_vpsllw_Vx_Hx_W,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf2 */  iemOp_InvalidNeedRM,        iemOp_vpslld_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf3 */  iemOp_InvalidNeedRM,        iemOp_vpsllq_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf4 */  iemOp_InvalidNeedRM,        iemOp_vpmuludq_Vx_Hx_W,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf5 */  iemOp_InvalidNeedRM,        iemOp_vpmaddwd_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf6 */  iemOp_InvalidNeedRM,        iemOp_vpsadbw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf7 */  iemOp_InvalidNeedRM,        iemOp_vmaskmovdqu_Vdq_Udq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf8 */  iemOp_InvalidNeedRM,        iemOp_vpsubb_Vx_Hx_W,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf9 */  iemOp_InvalidNeedRM,        iemOp_vpsubw_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfa */  iemOp_InvalidNeedRM,        iemOp_vpsubd_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfb */  iemOp_InvalidNeedRM,        iemOp_vpsubq_Vx_Hx_W,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfc */  iemOp_InvalidNeedRM,        iemOp_vpaddb_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfd */  iemOp_InvalidNeedRM,        iemOp_vpaddw_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xfe */  iemOp_InvalidNeedRM,        iemOp_vpaddd_Vx_Hx_W,       iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xff */  IEMOP_X4(iemOp_vud0) /* ?? */
+};
+AssertCompile(RT_ELEMENTS(g_apfnVexMap1) == 1024);
+/** @}  */
+
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap2.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap2.cpp.h
new file mode 100644
index 0000000..958295a
--- /dev/null
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap2.cpp.h
@@ -0,0 +1,855 @@
+/* $Id: IEMAllInstructionsVexMap2.cpp.h $ */
+/** @file
+ * IEM - Instruction Decoding and Emulation.
+ *
+ * @remarks IEMAllInstructionsThree0f38.cpp.h is a VEX mirror of this file.
+ *          Any update here is likely needed in that file too.
+ */
+
+/*
+ * Copyright (C) 2011-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @name VEX Opcode Map 2
+ * @{
+ */
+
+/*  Opcode VEX.0F38 0x00 - invalid. */
+/** Opcode VEX.66.0F38 0x00. */
+FNIEMOP_STUB(iemOp_vpshufb_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x01 - invalid. */
+/** Opcode VEX.66.0F38 0x01. */
+FNIEMOP_STUB(iemOp_vphaddw_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x02 - invalid. */
+/** Opcode VEX.66.0F38 0x02. */
+FNIEMOP_STUB(iemOp_vphaddd_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x03 - invalid. */
+/** Opcode VEX.66.0F38 0x03. */
+FNIEMOP_STUB(iemOp_vphaddsw_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x04 - invalid. */
+/** Opcode VEX.66.0F38 0x04. */
+FNIEMOP_STUB(iemOp_vpmaddubsw_Vx_Hx_Wx);
+/* Opcode VEX.0F38 0x05 - invalid. */
+/** Opcode VEX.66.0F38 0x05. */
+FNIEMOP_STUB(iemOp_vphsubw_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x06 - invalid. */
+/** Opcode VEX.66.0F38 0x06. */
+FNIEMOP_STUB(iemOp_vphsubdq_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x07 - invalid. */
+/** Opcode VEX.66.0F38 0x07. */
+FNIEMOP_STUB(iemOp_vphsubsw_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x08 - invalid. */
+/** Opcode VEX.66.0F38 0x08. */
+FNIEMOP_STUB(iemOp_vpsignb_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x09 - invalid. */
+/** Opcode VEX.66.0F38 0x09. */
+FNIEMOP_STUB(iemOp_vpsignw_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x0a - invalid. */
+/** Opcode VEX.66.0F38 0x0a. */
+FNIEMOP_STUB(iemOp_vpsignd_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x0b - invalid. */
+/** Opcode VEX.66.0F38 0x0b. */
+FNIEMOP_STUB(iemOp_vpmulhrsw_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x0c - invalid. */
+/**  Opcode VEX.66.0F38 0x0c. */
+FNIEMOP_STUB(iemOp_vpermilps_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x0d - invalid. */
+/**  Opcode VEX.66.0F38 0x0d. */
+FNIEMOP_STUB(iemOp_vpermilpd_Vx_Hx_Wx);
+/*  Opcode VEX.0F38 0x0e - invalid. */
+/**  Opcode VEX.66.0F38 0x0e. */
+FNIEMOP_STUB(iemOp_vtestps_Vx_Wx);
+/*  Opcode VEX.0F38 0x0f - invalid. */
+/**  Opcode VEX.66.0F38 0x0f. */
+FNIEMOP_STUB(iemOp_vtestpd_Vx_Wx);
+
+
+/*  Opcode VEX.0F38 0x10 - invalid */
+/*  Opcode VEX.66.0F38 0x10 - invalid (legacy only). */
+/*  Opcode VEX.0F38 0x11 - invalid */
+/*  Opcode VEX.66.0F38 0x11 - invalid */
+/*  Opcode VEX.0F38 0x12 - invalid */
+/*  Opcode VEX.66.0F38 0x12 - invalid */
+/*  Opcode VEX.0F38 0x13 - invalid */
+/*  Opcode VEX.66.0F38 0x13 - invalid (vex only). */
+/*  Opcode VEX.0F38 0x14 - invalid */
+/*  Opcode VEX.66.0F38 0x14 - invalid (legacy only). */
+/*  Opcode VEX.0F38 0x15 - invalid */
+/*  Opcode VEX.66.0F38 0x15 - invalid (legacy only). */
+/*  Opcode VEX.0F38 0x16 - invalid */
+/** Opcode VEX.66.0F38 0x16. */
+FNIEMOP_STUB(iemOp_vpermps_Vqq_Hqq_Wqq);
+/*  Opcode VEX.0F38 0x17 - invalid */
+/** Opcode VEX.66.0F38 0x17 - invalid */
+FNIEMOP_STUB(iemOp_vptest_Vx_Wx);
+/*  Opcode VEX.0F38 0x18 - invalid */
+/** Opcode VEX.66.0F38 0x18. */
+FNIEMOP_STUB(iemOp_vbroadcastss_Vx_Wd);
+/*  Opcode VEX.0F38 0x19 - invalid */
+/** Opcode VEX.66.0F38 0x19. */
+FNIEMOP_STUB(iemOp_vbroadcastsd_Vqq_Wq);
+/*  Opcode VEX.0F38 0x1a - invalid */
+/** Opcode VEX.66.0F38 0x1a. */
+FNIEMOP_STUB(iemOp_vbroadcastf128_Vqq_Mdq);
+/*  Opcode VEX.0F38 0x1b - invalid */
+/*  Opcode VEX.66.0F38 0x1b - invalid */
+/*  Opcode VEX.0F38 0x1c - invalid. */
+/** Opcode VEX.66.0F38 0x1c. */
+FNIEMOP_STUB(iemOp_vpabsb_Vx_Wx);
+/*  Opcode VEX.0F38 0x1d - invalid. */
+/** Opcode VEX.66.0F38 0x1d. */
+FNIEMOP_STUB(iemOp_vpabsw_Vx_Wx);
+/*  Opcode VEX.0F38 0x1e - invalid. */
+/** Opcode VEX.66.0F38 0x1e. */
+FNIEMOP_STUB(iemOp_vpabsd_Vx_Wx);
+/*  Opcode VEX.0F38 0x1f - invalid */
+/*  Opcode VEX.66.0F38 0x1f - invalid */
+
+
+/** Opcode VEX.66.0F38 0x20. */
+FNIEMOP_STUB(iemOp_vpmovsxbw_Vx_UxMq);
+/** Opcode VEX.66.0F38 0x21. */
+FNIEMOP_STUB(iemOp_vpmovsxbd_Vx_UxMd);
+/** Opcode VEX.66.0F38 0x22. */
+FNIEMOP_STUB(iemOp_vpmovsxbq_Vx_UxMw);
+/** Opcode VEX.66.0F38 0x23. */
+FNIEMOP_STUB(iemOp_vpmovsxwd_Vx_UxMq);
+/** Opcode VEX.66.0F38 0x24. */
+FNIEMOP_STUB(iemOp_vpmovsxwq_Vx_UxMd);
+/** Opcode VEX.66.0F38 0x25. */
+FNIEMOP_STUB(iemOp_vpmovsxdq_Vx_UxMq);
+/*  Opcode VEX.66.0F38 0x26 - invalid */
+/*  Opcode VEX.66.0F38 0x27 - invalid */
+/** Opcode VEX.66.0F38 0x28. */
+FNIEMOP_STUB(iemOp_vpmuldq_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x29. */
+FNIEMOP_STUB(iemOp_vpcmpeqq_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x2a. */
+FNIEMOP_STUB(iemOp_vmovntdqa_Vx_Hx_Mx);
+/** Opcode VEX.66.0F38 0x2b. */
+FNIEMOP_STUB(iemOp_vpackusdw_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x2c. */
+FNIEMOP_STUB(iemOp_vmaskmovps_Vx_Hx_Mx);
+/** Opcode VEX.66.0F38 0x2d. */
+FNIEMOP_STUB(iemOp_vmaskmovpd_Vx_Hx_Mx);
+/** Opcode VEX.66.0F38 0x2e. */
+FNIEMOP_STUB(iemOp_vmaskmovps_Mx_Hx_Vx);
+/** Opcode VEX.66.0F38 0x2f. */
+FNIEMOP_STUB(iemOp_vmaskmovpd_Mx_Hx_Vx);
+
+/** Opcode VEX.66.0F38 0x30. */
+FNIEMOP_STUB(iemOp_vpmovzxbw_Vx_UxMq);
+/** Opcode VEX.66.0F38 0x31. */
+FNIEMOP_STUB(iemOp_vpmovzxbd_Vx_UxMd);
+/** Opcode VEX.66.0F38 0x32. */
+FNIEMOP_STUB(iemOp_vpmovzxbq_Vx_UxMw);
+/** Opcode VEX.66.0F38 0x33. */
+FNIEMOP_STUB(iemOp_vpmovzxwd_Vx_UxMq);
+/** Opcode VEX.66.0F38 0x34. */
+FNIEMOP_STUB(iemOp_vpmovzxwq_Vx_UxMd);
+/** Opcode VEX.66.0F38 0x35. */
+FNIEMOP_STUB(iemOp_vpmovzxdq_Vx_UxMq);
+/*  Opcode VEX.66.0F38 0x36. */
+FNIEMOP_STUB(iemOp_vpermd_Vqq_Hqq_Wqq);
+/** Opcode VEX.66.0F38 0x37. */
+FNIEMOP_STUB(iemOp_vpcmpgtq_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x38. */
+FNIEMOP_STUB(iemOp_vpminsb_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x39. */
+FNIEMOP_STUB(iemOp_vpminsd_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x3a. */
+FNIEMOP_STUB(iemOp_vpminuw_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x3b. */
+FNIEMOP_STUB(iemOp_vpminud_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x3c. */
+FNIEMOP_STUB(iemOp_vpmaxsb_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x3d. */
+FNIEMOP_STUB(iemOp_vpmaxsd_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x3e. */
+FNIEMOP_STUB(iemOp_vpmaxuw_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x3f. */
+FNIEMOP_STUB(iemOp_vpmaxud_Vx_Hx_Wx);
+
+
+/** Opcode VEX.66.0F38 0x40. */
+FNIEMOP_STUB(iemOp_vpmulld_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x41. */
+FNIEMOP_STUB(iemOp_vphminposuw_Vdq_Wdq);
+/*  Opcode VEX.66.0F38 0x42 - invalid. */
+/*  Opcode VEX.66.0F38 0x43 - invalid. */
+/*  Opcode VEX.66.0F38 0x44 - invalid. */
+/** Opcode VEX.66.0F38 0x45. */
+FNIEMOP_STUB(iemOp_vpsrlvd_q_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x46. */
+FNIEMOP_STUB(iemOp_vsravd_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x47. */
+FNIEMOP_STUB(iemOp_vpsllvd_q_Vx_Hx_Wx);
+/*  Opcode VEX.66.0F38 0x48 - invalid. */
+/*  Opcode VEX.66.0F38 0x49 - invalid. */
+/*  Opcode VEX.66.0F38 0x4a - invalid. */
+/*  Opcode VEX.66.0F38 0x4b - invalid. */
+/*  Opcode VEX.66.0F38 0x4c - invalid. */
+/*  Opcode VEX.66.0F38 0x4d - invalid. */
+/*  Opcode VEX.66.0F38 0x4e - invalid. */
+/*  Opcode VEX.66.0F38 0x4f - invalid. */
+
+/*  Opcode VEX.66.0F38 0x50 - invalid. */
+/*  Opcode VEX.66.0F38 0x51 - invalid. */
+/*  Opcode VEX.66.0F38 0x52 - invalid. */
+/*  Opcode VEX.66.0F38 0x53 - invalid. */
+/*  Opcode VEX.66.0F38 0x54 - invalid. */
+/*  Opcode VEX.66.0F38 0x55 - invalid. */
+/*  Opcode VEX.66.0F38 0x56 - invalid. */
+/*  Opcode VEX.66.0F38 0x57 - invalid. */
+/** Opcode VEX.66.0F38 0x58. */
+FNIEMOP_STUB(iemOp_vpbroadcastd_Vx_Wx);
+/** Opcode VEX.66.0F38 0x59. */
+FNIEMOP_STUB(iemOp_vpbroadcastq_Vx_Wx);
+/** Opcode VEX.66.0F38 0x5a. */
+FNIEMOP_STUB(iemOp_vbroadcasti128_Vqq_Mdq);
+/*  Opcode VEX.66.0F38 0x5b - invalid. */
+/*  Opcode VEX.66.0F38 0x5c - invalid. */
+/*  Opcode VEX.66.0F38 0x5d - invalid. */
+/*  Opcode VEX.66.0F38 0x5e - invalid. */
+/*  Opcode VEX.66.0F38 0x5f - invalid. */
+
+/*  Opcode VEX.66.0F38 0x60 - invalid. */
+/*  Opcode VEX.66.0F38 0x61 - invalid. */
+/*  Opcode VEX.66.0F38 0x62 - invalid. */
+/*  Opcode VEX.66.0F38 0x63 - invalid. */
+/*  Opcode VEX.66.0F38 0x64 - invalid. */
+/*  Opcode VEX.66.0F38 0x65 - invalid. */
+/*  Opcode VEX.66.0F38 0x66 - invalid. */
+/*  Opcode VEX.66.0F38 0x67 - invalid. */
+/*  Opcode VEX.66.0F38 0x68 - invalid. */
+/*  Opcode VEX.66.0F38 0x69 - invalid. */
+/*  Opcode VEX.66.0F38 0x6a - invalid. */
+/*  Opcode VEX.66.0F38 0x6b - invalid. */
+/*  Opcode VEX.66.0F38 0x6c - invalid. */
+/*  Opcode VEX.66.0F38 0x6d - invalid. */
+/*  Opcode VEX.66.0F38 0x6e - invalid. */
+/*  Opcode VEX.66.0F38 0x6f - invalid. */
+
+/*  Opcode VEX.66.0F38 0x70 - invalid. */
+/*  Opcode VEX.66.0F38 0x71 - invalid. */
+/*  Opcode VEX.66.0F38 0x72 - invalid. */
+/*  Opcode VEX.66.0F38 0x73 - invalid. */
+/*  Opcode VEX.66.0F38 0x74 - invalid. */
+/*  Opcode VEX.66.0F38 0x75 - invalid. */
+/*  Opcode VEX.66.0F38 0x76 - invalid. */
+/*  Opcode VEX.66.0F38 0x77 - invalid. */
+/** Opcode VEX.66.0F38 0x78. */
+FNIEMOP_STUB(iemOp_vpboardcastb_Vx_Wx);
+/** Opcode VEX.66.0F38 0x79. */
+FNIEMOP_STUB(iemOp_vpboardcastw_Vx_Wx);
+/*  Opcode VEX.66.0F38 0x7a - invalid. */
+/*  Opcode VEX.66.0F38 0x7b - invalid. */
+/*  Opcode VEX.66.0F38 0x7c - invalid. */
+/*  Opcode VEX.66.0F38 0x7d - invalid. */
+/*  Opcode VEX.66.0F38 0x7e - invalid. */
+/*  Opcode VEX.66.0F38 0x7f - invalid. */
+
+/*  Opcode VEX.66.0F38 0x80 - invalid (legacy only). */
+/*  Opcode VEX.66.0F38 0x81 - invalid (legacy only). */
+/*  Opcode VEX.66.0F38 0x82 - invalid (legacy only). */
+/*  Opcode VEX.66.0F38 0x83 - invalid. */
+/*  Opcode VEX.66.0F38 0x84 - invalid. */
+/*  Opcode VEX.66.0F38 0x85 - invalid. */
+/*  Opcode VEX.66.0F38 0x86 - invalid. */
+/*  Opcode VEX.66.0F38 0x87 - invalid. */
+/*  Opcode VEX.66.0F38 0x88 - invalid. */
+/*  Opcode VEX.66.0F38 0x89 - invalid. */
+/*  Opcode VEX.66.0F38 0x8a - invalid. */
+/*  Opcode VEX.66.0F38 0x8b - invalid. */
+/** Opcode VEX.66.0F38 0x8c. */
+FNIEMOP_STUB(iemOp_vpmaskmovd_q_Vx_Hx_Mx);
+/*  Opcode VEX.66.0F38 0x8d - invalid. */
+/** Opcode VEX.66.0F38 0x8e. */
+FNIEMOP_STUB(iemOp_vpmaskmovd_q_Mx_Vx_Hx);
+/*  Opcode VEX.66.0F38 0x8f - invalid. */
+
+/** Opcode VEX.66.0F38 0x90 (vex only). */
+FNIEMOP_STUB(iemOp_vgatherdd_q_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x91 (vex only). */
+FNIEMOP_STUB(iemOp_vgatherqd_q_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x92 (vex only). */
+FNIEMOP_STUB(iemOp_vgatherdps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x93 (vex only). */
+FNIEMOP_STUB(iemOp_vgatherqps_d_Vx_Hx_Wx);
+/*  Opcode VEX.66.0F38 0x94 - invalid. */
+/*  Opcode VEX.66.0F38 0x95 - invalid. */
+/** Opcode VEX.66.0F38 0x96 (vex only). */
+FNIEMOP_STUB(iemOp_vfmaddsub132ps_q_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x97 (vex only). */
+FNIEMOP_STUB(iemOp_vfmsubadd132ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x98 (vex only). */
+FNIEMOP_STUB(iemOp_vfmadd132ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x99 (vex only). */
+FNIEMOP_STUB(iemOp_vfmadd132ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x9a (vex only). */
+FNIEMOP_STUB(iemOp_vfmsub132ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x9b (vex only). */
+FNIEMOP_STUB(iemOp_vfmsub132ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x9c (vex only). */
+FNIEMOP_STUB(iemOp_vfnmadd132ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x9d (vex only). */
+FNIEMOP_STUB(iemOp_vfnmadd132ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x9e (vex only). */
+FNIEMOP_STUB(iemOp_vfnmsub132ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0x9f (vex only). */
+FNIEMOP_STUB(iemOp_vfnmsub132ss_d_Vx_Hx_Wx);
+
+/*  Opcode VEX.66.0F38 0xa0 - invalid. */
+/*  Opcode VEX.66.0F38 0xa1 - invalid. */
+/*  Opcode VEX.66.0F38 0xa2 - invalid. */
+/*  Opcode VEX.66.0F38 0xa3 - invalid. */
+/*  Opcode VEX.66.0F38 0xa4 - invalid. */
+/*  Opcode VEX.66.0F38 0xa5 - invalid. */
+/** Opcode VEX.66.0F38 0xa6 (vex only). */
+FNIEMOP_STUB(iemOp_vfmaddsub213ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xa7 (vex only). */
+FNIEMOP_STUB(iemOp_vfmsubadd213ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xa8 (vex only). */
+FNIEMOP_STUB(iemOp_vfmadd213ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xa9 (vex only). */
+FNIEMOP_STUB(iemOp_vfmadd213ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xaa (vex only). */
+FNIEMOP_STUB(iemOp_vfmsub213ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xab (vex only). */
+FNIEMOP_STUB(iemOp_vfmsub213ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xac (vex only). */
+FNIEMOP_STUB(iemOp_vfnmadd213ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xad (vex only). */
+FNIEMOP_STUB(iemOp_vfnmadd213ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xae (vex only). */
+FNIEMOP_STUB(iemOp_vfnmsub213ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xaf (vex only). */
+FNIEMOP_STUB(iemOp_vfnmsub213ss_d_Vx_Hx_Wx);
+
+/*  Opcode VEX.66.0F38 0xb0 - invalid. */
+/*  Opcode VEX.66.0F38 0xb1 - invalid. */
+/*  Opcode VEX.66.0F38 0xb2 - invalid. */
+/*  Opcode VEX.66.0F38 0xb3 - invalid. */
+/*  Opcode VEX.66.0F38 0xb4 - invalid. */
+/*  Opcode VEX.66.0F38 0xb5 - invalid. */
+/** Opcode VEX.66.0F38 0xb6 (vex only). */
+FNIEMOP_STUB(iemOp_vfmaddsub231ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xb7 (vex only). */
+FNIEMOP_STUB(iemOp_vfmsubadd231ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xb8 (vex only). */
+FNIEMOP_STUB(iemOp_vfmadd231ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xb9 (vex only). */
+FNIEMOP_STUB(iemOp_vfmadd231ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xba (vex only). */
+FNIEMOP_STUB(iemOp_vfmsub231ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xbb (vex only). */
+FNIEMOP_STUB(iemOp_vfmsub231ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xbc (vex only). */
+FNIEMOP_STUB(iemOp_vfnmadd231ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xbd (vex only). */
+FNIEMOP_STUB(iemOp_vfnmadd231ss_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xbe (vex only). */
+FNIEMOP_STUB(iemOp_vfnmsub231ps_d_Vx_Hx_Wx);
+/** Opcode VEX.66.0F38 0xbf (vex only). */
+FNIEMOP_STUB(iemOp_vfnmsub231ss_d_Vx_Hx_Wx);
+
+/*  Opcode VEX.0F38 0xc0 - invalid. */
+/*  Opcode VEX.66.0F38 0xc0 - invalid. */
+/*  Opcode VEX.0F38 0xc1 - invalid. */
+/*  Opcode VEX.66.0F38 0xc1 - invalid. */
+/*  Opcode VEX.0F38 0xc2 - invalid. */
+/*  Opcode VEX.66.0F38 0xc2 - invalid. */
+/*  Opcode VEX.0F38 0xc3 - invalid. */
+/*  Opcode VEX.66.0F38 0xc3 - invalid. */
+/*  Opcode VEX.0F38 0xc4 - invalid. */
+/*  Opcode VEX.66.0F38 0xc4 - invalid. */
+/*  Opcode VEX.0F38 0xc5 - invalid. */
+/*  Opcode VEX.66.0F38 0xc5 - invalid. */
+/*  Opcode VEX.0F38 0xc6 - invalid. */
+/*  Opcode VEX.66.0F38 0xc6 - invalid. */
+/*  Opcode VEX.0F38 0xc7 - invalid. */
+/*  Opcode VEX.66.0F38 0xc7 - invalid. */
+/** Opcode VEX.0F38 0xc8. */
+FNIEMOP_STUB(iemOp_vsha1nexte_Vdq_Wdq);
+/*  Opcode VEX.66.0F38 0xc8 - invalid. */
+/** Opcode VEX.0F38 0xc9. */
+FNIEMOP_STUB(iemOp_vsha1msg1_Vdq_Wdq);
+/*  Opcode VEX.66.0F38 0xc9 - invalid. */
+/** Opcode VEX.0F38 0xca. */
+FNIEMOP_STUB(iemOp_vsha1msg2_Vdq_Wdq);
+/*  Opcode VEX.66.0F38 0xca - invalid. */
+/** Opcode VEX.0F38 0xcb. */
+FNIEMOP_STUB(iemOp_vsha256rnds2_Vdq_Wdq);
+/*  Opcode VEX.66.0F38 0xcb - invalid. */
+/** Opcode VEX.0F38 0xcc. */
+FNIEMOP_STUB(iemOp_vsha256msg1_Vdq_Wdq);
+/*  Opcode VEX.66.0F38 0xcc - invalid. */
+/** Opcode VEX.0F38 0xcd. */
+FNIEMOP_STUB(iemOp_vsha256msg2_Vdq_Wdq);
+/*  Opcode VEX.66.0F38 0xcd - invalid. */
+/*  Opcode VEX.0F38 0xce - invalid. */
+/*  Opcode VEX.66.0F38 0xce - invalid. */
+/*  Opcode VEX.0F38 0xcf - invalid. */
+/*  Opcode VEX.66.0F38 0xcf - invalid. */
+
+/*  Opcode VEX.66.0F38 0xd0 - invalid. */
+/*  Opcode VEX.66.0F38 0xd1 - invalid. */
+/*  Opcode VEX.66.0F38 0xd2 - invalid. */
+/*  Opcode VEX.66.0F38 0xd3 - invalid. */
+/*  Opcode VEX.66.0F38 0xd4 - invalid. */
+/*  Opcode VEX.66.0F38 0xd5 - invalid. */
+/*  Opcode VEX.66.0F38 0xd6 - invalid. */
+/*  Opcode VEX.66.0F38 0xd7 - invalid. */
+/*  Opcode VEX.66.0F38 0xd8 - invalid. */
+/*  Opcode VEX.66.0F38 0xd9 - invalid. */
+/*  Opcode VEX.66.0F38 0xda - invalid. */
+/** Opcode VEX.66.0F38 0xdb. */
+FNIEMOP_STUB(iemOp_vaesimc_Vdq_Wdq);
+/** Opcode VEX.66.0F38 0xdc. */
+FNIEMOP_STUB(iemOp_vaesenc_Vdq_Wdq);
+/** Opcode VEX.66.0F38 0xdd. */
+FNIEMOP_STUB(iemOp_vaesenclast_Vdq_Wdq);
+/** Opcode VEX.66.0F38 0xde. */
+FNIEMOP_STUB(iemOp_vaesdec_Vdq_Wdq);
+/** Opcode VEX.66.0F38 0xdf. */
+FNIEMOP_STUB(iemOp_vaesdeclast_Vdq_Wdq);
+
+/*  Opcode VEX.66.0F38 0xe0 - invalid. */
+/*  Opcode VEX.66.0F38 0xe1 - invalid. */
+/*  Opcode VEX.66.0F38 0xe2 - invalid. */
+/*  Opcode VEX.66.0F38 0xe3 - invalid. */
+/*  Opcode VEX.66.0F38 0xe4 - invalid. */
+/*  Opcode VEX.66.0F38 0xe5 - invalid. */
+/*  Opcode VEX.66.0F38 0xe6 - invalid. */
+/*  Opcode VEX.66.0F38 0xe7 - invalid. */
+/*  Opcode VEX.66.0F38 0xe8 - invalid. */
+/*  Opcode VEX.66.0F38 0xe9 - invalid. */
+/*  Opcode VEX.66.0F38 0xea - invalid. */
+/*  Opcode VEX.66.0F38 0xeb - invalid. */
+/*  Opcode VEX.66.0F38 0xec - invalid. */
+/*  Opcode VEX.66.0F38 0xed - invalid. */
+/*  Opcode VEX.66.0F38 0xee - invalid. */
+/*  Opcode VEX.66.0F38 0xef - invalid. */
+
+
+/*  Opcode VEX.0F38 0xf0 - invalid (legacy only). */
+/*  Opcode VEX.66.0F38 0xf0 - invalid (legacy only). */
+/*  Opcode VEX.F3.0F38 0xf0 - invalid. */
+/*  Opcode VEX.F2.0F38 0xf0 - invalid (legacy only). */
+
+/*  Opcode VEX.0F38 0xf1 - invalid (legacy only). */
+/*  Opcode VEX.66.0F38 0xf1 - invalid (legacy only). */
+/*  Opcode VEX.F3.0F38 0xf1 - invalid. */
+/*  Opcode VEX.F2.0F38 0xf1 - invalid (legacy only). */
+
+/*  Opcode VEX.0F38 0xf2 - invalid (vex only). */
+FNIEMOP_STUB(iemOp_andn_Gy_By_Ey);
+/*  Opcode VEX.66.0F38 0xf2 - invalid. */
+/*  Opcode VEX.F3.0F38 0xf2 - invalid. */
+/*  Opcode VEX.F2.0F38 0xf2 - invalid. */
+
+
+/*  Opcode VEX.0F38 0xf3 - invalid. */
+/*  Opcode VEX.66.0F38 0xf3 - invalid. */
+
+/*  Opcode VEX.F3.0F38 0xf3 /0 - invalid). */
+/*  Opcode VEX.F3.0F38 0xf3 /1). */
+FNIEMOP_STUB_1(iemOp_VGrp17_blsr_By_Ey, uint8_t, bRm);
+/*  Opcode VEX.F3.0F38 0xf3 /2). */
+FNIEMOP_STUB_1(iemOp_VGrp17_blsmsk_By_Ey, uint8_t, bRm);
+/*  Opcode VEX.F3.0F38 0xf3 /3). */
+FNIEMOP_STUB_1(iemOp_VGrp17_blsi_By_Ey, uint8_t, bRm);
+/*  Opcode VEX.F3.0F38 0xf3 /4 - invalid). */
+/*  Opcode VEX.F3.0F38 0xf3 /5 - invalid). */
+/*  Opcode VEX.F3.0F38 0xf3 /6 - invalid). */
+/*  Opcode VEX.F3.0F38 0xf3 /7 - invalid). */
+
+/**
+ * Group 17 jump table for the VEX.F3 variant..
+ */
+IEM_STATIC const PFNIEMOPRM g_apfnVexGroup17_f3[] =
+{
+    /* /0 */ iemOp_InvalidWithRM,
+    /* /1 */ iemOp_VGrp17_blsr_By_Ey,
+    /* /2 */ iemOp_VGrp17_blsmsk_By_Ey,
+    /* /3 */ iemOp_VGrp17_blsi_By_Ey,
+    /* /4 */ iemOp_InvalidWithRM,
+    /* /5 */ iemOp_InvalidWithRM,
+    /* /6 */ iemOp_InvalidWithRM,
+    /* /7 */ iemOp_InvalidWithRM
+};
+AssertCompile(RT_ELEMENTS(g_apfnVexGroup17_f3) == 8);
+
+/**  Opcode VEX.F3.0F38 0xf3 - invalid (vex only - group 17). */
+FNIEMOP_DEF(iemOp_VGrp17_f3)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    return FNIEMOP_CALL_1(g_apfnVexGroup17_f3[((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)], bRm);
+
+}
+
+/*  Opcode VEX.F2.0F38 0xf3 - invalid (vex only - group 17). */
+
+
+/*  Opcode VEX.0F38 0xf4 - invalid. */
+/*  Opcode VEX.66.0F38 0xf4 - invalid. */
+/*  Opcode VEX.F3.0F38 0xf4 - invalid. */
+/*  Opcode VEX.F2.0F38 0xf4 - invalid. */
+
+/** Opcode VEX.0F38 0xf5 (vex only). */
+FNIEMOP_STUB(iemOp_bzhi_Gy_Ey_By);
+/*  Opcode VEX.66.0F38 0xf5 - invalid. */
+/** Opcode VEX.F3.0F38 0xf5 (vex only). */
+FNIEMOP_STUB(iemOp_pext_Gy_By_Ey);
+/** Opcode VEX.F2.0F38 0xf5 (vex only). */
+FNIEMOP_STUB(iemOp_pdep_Gy_By_Ey);
+
+/*  Opcode VEX.0F38 0xf6 - invalid. */
+/*  Opcode VEX.66.0F38 0xf6 - invalid (legacy only). */
+/*  Opcode VEX.F3.0F38 0xf6 - invalid (legacy only). */
+/*  Opcode VEX.F2.0F38 0xf6 - invalid (vex only). */
+FNIEMOP_STUB(iemOp_mulx_By_Gy_rDX_Ey);
+
+/** Opcode VEX.0F38 0xf7 (vex only). */
+FNIEMOP_STUB(iemOp_bextr_Gy_Ey_By);
+/** Opcode VEX.66.0F38 0xf7 (vex only). */
+FNIEMOP_STUB(iemOp_shlx_Gy_Ey_By);
+/** Opcode VEX.F3.0F38 0xf7 (vex only). */
+FNIEMOP_STUB(iemOp_sarx_Gy_Ey_By);
+/** Opcode VEX.F2.0F38 0xf7 (vex only). */
+FNIEMOP_STUB(iemOp_shrx_Gy_Ey_By);
+
+/*  Opcode VEX.0F38 0xf8 - invalid. */
+/*  Opcode VEX.66.0F38 0xf8 - invalid. */
+/*  Opcode VEX.F3.0F38 0xf8 - invalid. */
+/*  Opcode VEX.F2.0F38 0xf8 - invalid. */
+
+/*  Opcode VEX.0F38 0xf9 - invalid. */
+/*  Opcode VEX.66.0F38 0xf9 - invalid. */
+/*  Opcode VEX.F3.0F38 0xf9 - invalid. */
+/*  Opcode VEX.F2.0F38 0xf9 - invalid. */
+
+/*  Opcode VEX.0F38 0xfa - invalid. */
+/*  Opcode VEX.66.0F38 0xfa - invalid. */
+/*  Opcode VEX.F3.0F38 0xfa - invalid. */
+/*  Opcode VEX.F2.0F38 0xfa - invalid. */
+
+/*  Opcode VEX.0F38 0xfb - invalid. */
+/*  Opcode VEX.66.0F38 0xfb - invalid. */
+/*  Opcode VEX.F3.0F38 0xfb - invalid. */
+/*  Opcode VEX.F2.0F38 0xfb - invalid. */
+
+/*  Opcode VEX.0F38 0xfc - invalid. */
+/*  Opcode VEX.66.0F38 0xfc - invalid. */
+/*  Opcode VEX.F3.0F38 0xfc - invalid. */
+/*  Opcode VEX.F2.0F38 0xfc - invalid. */
+
+/*  Opcode VEX.0F38 0xfd - invalid. */
+/*  Opcode VEX.66.0F38 0xfd - invalid. */
+/*  Opcode VEX.F3.0F38 0xfd - invalid. */
+/*  Opcode VEX.F2.0F38 0xfd - invalid. */
+
+/*  Opcode VEX.0F38 0xfe - invalid. */
+/*  Opcode VEX.66.0F38 0xfe - invalid. */
+/*  Opcode VEX.F3.0F38 0xfe - invalid. */
+/*  Opcode VEX.F2.0F38 0xfe - invalid. */
+
+/*  Opcode VEX.0F38 0xff - invalid. */
+/*  Opcode VEX.66.0F38 0xff - invalid. */
+/*  Opcode VEX.F3.0F38 0xff - invalid. */
+/*  Opcode VEX.F2.0F38 0xff - invalid. */
+
+
+/**
+ * VEX opcode map \#2.
+ *
+ * @sa      g_apfnThreeByte0f38
+ */
+IEM_STATIC const PFNIEMOP g_apfnVexMap2[] =
+{
+    /*          no prefix,                  066h prefix                 f3h prefix,                 f2h prefix */
+    /* 0x00 */  iemOp_InvalidNeedRM,        iemOp_vpshufb_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x01 */  iemOp_InvalidNeedRM,        iemOp_vphaddw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x02 */  iemOp_InvalidNeedRM,        iemOp_vphaddd_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x03 */  iemOp_InvalidNeedRM,        iemOp_vphaddsw_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x04 */  iemOp_InvalidNeedRM,        iemOp_vpmaddubsw_Vx_Hx_Wx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x05 */  iemOp_InvalidNeedRM,        iemOp_vphsubw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x06 */  iemOp_InvalidNeedRM,        iemOp_vphsubdq_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x07 */  iemOp_InvalidNeedRM,        iemOp_vphsubsw_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x08 */  iemOp_InvalidNeedRM,        iemOp_vpsignb_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x09 */  iemOp_InvalidNeedRM,        iemOp_vpsignw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0a */  iemOp_InvalidNeedRM,        iemOp_vpsignd_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0b */  iemOp_InvalidNeedRM,        iemOp_vpmulhrsw_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0c */  iemOp_InvalidNeedRM,        iemOp_vpermilps_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0d */  iemOp_InvalidNeedRM,        iemOp_vpermilpd_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0e */  iemOp_InvalidNeedRM,        iemOp_vtestps_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x0f */  iemOp_InvalidNeedRM,        iemOp_vtestpd_Vx_Wx,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0x10 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x11 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x12 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x13 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x14 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x15 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x16 */  iemOp_InvalidNeedRM,        iemOp_vpermps_Vqq_Hqq_Wqq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x17 */  iemOp_InvalidNeedRM,        iemOp_vptest_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x18 */  iemOp_InvalidNeedRM,        iemOp_vbroadcastss_Vx_Wd,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x19 */  iemOp_InvalidNeedRM,        iemOp_vbroadcastsd_Vqq_Wq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x1a */  iemOp_InvalidNeedRM,        iemOp_vbroadcastf128_Vqq_Mdq, iemOp_InvalidNeedRM,      iemOp_InvalidNeedRM,
+    /* 0x1b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x1c */  iemOp_InvalidNeedRM,        iemOp_vpabsb_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x1d */  iemOp_InvalidNeedRM,        iemOp_vpabsw_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x1e */  iemOp_InvalidNeedRM,        iemOp_vpabsd_Vx_Wx,         iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x1f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x20 */  iemOp_InvalidNeedRM,        iemOp_vpmovsxbw_Vx_UxMq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x21 */  iemOp_InvalidNeedRM,        iemOp_vpmovsxbd_Vx_UxMd,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x22 */  iemOp_InvalidNeedRM,        iemOp_vpmovsxbq_Vx_UxMw,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x23 */  iemOp_InvalidNeedRM,        iemOp_vpmovsxwd_Vx_UxMq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x24 */  iemOp_InvalidNeedRM,        iemOp_vpmovsxwq_Vx_UxMd,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x25 */  iemOp_InvalidNeedRM,        iemOp_vpmovsxdq_Vx_UxMq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x26 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x27 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x28 */  iemOp_InvalidNeedRM,        iemOp_vpmuldq_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x29 */  iemOp_InvalidNeedRM,        iemOp_vpcmpeqq_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2a */  iemOp_InvalidNeedRM,        iemOp_vmovntdqa_Vx_Hx_Mx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2b */  iemOp_InvalidNeedRM,        iemOp_vpackusdw_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2c */  iemOp_InvalidNeedRM,        iemOp_vmaskmovps_Vx_Hx_Mx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2d */  iemOp_InvalidNeedRM,        iemOp_vmaskmovpd_Vx_Hx_Mx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2e */  iemOp_InvalidNeedRM,        iemOp_vmaskmovps_Mx_Hx_Vx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x2f */  iemOp_InvalidNeedRM,        iemOp_vmaskmovpd_Mx_Hx_Vx,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0x30 */  iemOp_InvalidNeedRM,        iemOp_vpmovzxbw_Vx_UxMq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x31 */  iemOp_InvalidNeedRM,        iemOp_vpmovzxbd_Vx_UxMd,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x32 */  iemOp_InvalidNeedRM,        iemOp_vpmovzxbq_Vx_UxMw,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x33 */  iemOp_InvalidNeedRM,        iemOp_vpmovzxwd_Vx_UxMq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x34 */  iemOp_InvalidNeedRM,        iemOp_vpmovzxwq_Vx_UxMd,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x35 */  iemOp_InvalidNeedRM,        iemOp_vpmovzxdq_Vx_UxMq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x36 */  iemOp_InvalidNeedRM,        iemOp_vpermd_Vqq_Hqq_Wqq,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x37 */  iemOp_InvalidNeedRM,        iemOp_vpcmpgtq_Vx_Hx_Wx,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x38 */  iemOp_InvalidNeedRM,        iemOp_vpminsb_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x39 */  iemOp_InvalidNeedRM,        iemOp_vpminsd_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3a */  iemOp_InvalidNeedRM,        iemOp_vpminuw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3b */  iemOp_InvalidNeedRM,        iemOp_vpminud_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3c */  iemOp_InvalidNeedRM,        iemOp_vpmaxsb_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3d */  iemOp_InvalidNeedRM,        iemOp_vpmaxsd_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3e */  iemOp_InvalidNeedRM,        iemOp_vpmaxuw_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x3f */  iemOp_InvalidNeedRM,        iemOp_vpmaxud_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0x40 */  iemOp_InvalidNeedRM,        iemOp_vpmulld_Vx_Hx_Wx,     iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x41 */  iemOp_InvalidNeedRM,        iemOp_vphminposuw_Vdq_Wdq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x42 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x43 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x44 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x45 */  iemOp_InvalidNeedRM,        iemOp_vpsrlvd_q_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x46 */  iemOp_InvalidNeedRM,        iemOp_vsravd_Vx_Hx_Wx,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x47 */  iemOp_InvalidNeedRM,        iemOp_vpsllvd_q_Vx_Hx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x48 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x49 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x4f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x50 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x51 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x52 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x53 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x54 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x55 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x56 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x57 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x58 */  iemOp_InvalidNeedRM,        iemOp_vpbroadcastd_Vx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x59 */  iemOp_InvalidNeedRM,        iemOp_vpbroadcastq_Vx_Wx,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0x5a */  iemOp_InvalidNeedRM,        iemOp_vbroadcasti128_Vqq_Mdq, iemOp_InvalidNeedRM,      iemOp_InvalidNeedRM,
+    /* 0x5b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x5f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x60 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x61 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x62 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x63 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x64 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x65 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x66 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x67 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x68 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x69 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x6f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x70 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x71 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x72 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x73 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x74 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x75 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x76 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x77 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x78 */  iemOp_InvalidNeedRM,        iemOp_vpboardcastb_Vx_Wx,   iemOp_InvalidNeedRM,      iemOp_InvalidNeedRM,
+    /* 0x79 */  iemOp_InvalidNeedRM,        iemOp_vpboardcastw_Vx_Wx,   iemOp_InvalidNeedRM,      iemOp_InvalidNeedRM,
+    /* 0x7a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7c */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7e */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x7f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x80 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x81 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x82 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x83 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x84 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x85 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x86 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x87 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x88 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x89 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8a */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8b */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8c */  iemOp_InvalidNeedRM,        iemOp_vpmaskmovd_q_Vx_Hx_Mx, iemOp_InvalidNeedRM,     iemOp_InvalidNeedRM,
+    /* 0x8d */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x8e */  iemOp_InvalidNeedRM,        iemOp_vpmaskmovd_q_Mx_Vx_Hx, iemOp_InvalidNeedRM,     iemOp_InvalidNeedRM,
+    /* 0x8f */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0x90 */  iemOp_InvalidNeedRM,        iemOp_vgatherdd_q_Vx_Hx_Wx, iemOp_InvalidNeedRM,      iemOp_InvalidNeedRM,
+    /* 0x91 */  iemOp_InvalidNeedRM,        iemOp_vgatherqd_q_Vx_Hx_Wx, iemOp_InvalidNeedRM,      iemOp_InvalidNeedRM,
+    /* 0x92 */  iemOp_InvalidNeedRM,        iemOp_vgatherdps_d_Vx_Hx_Wx, iemOp_InvalidNeedRM,     iemOp_InvalidNeedRM,
+    /* 0x93 */  iemOp_InvalidNeedRM,        iemOp_vgatherqps_d_Vx_Hx_Wx, iemOp_InvalidNeedRM,     iemOp_InvalidNeedRM,
+    /* 0x94 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x95 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0x96 */  iemOp_InvalidNeedRM,        iemOp_vfmaddsub132ps_q_Vx_Hx_Wx, iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x97 */  iemOp_InvalidNeedRM,        iemOp_vfmsubadd132ps_d_Vx_Hx_Wx, iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x98 */  iemOp_InvalidNeedRM,        iemOp_vfmadd132ps_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x99 */  iemOp_InvalidNeedRM,        iemOp_vfmadd132ss_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x9a */  iemOp_InvalidNeedRM,        iemOp_vfmsub132ps_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x9b */  iemOp_InvalidNeedRM,        iemOp_vfmsub132ss_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x9c */  iemOp_InvalidNeedRM,        iemOp_vfnmadd132ps_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x9d */  iemOp_InvalidNeedRM,        iemOp_vfnmadd132ss_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x9e */  iemOp_InvalidNeedRM,        iemOp_vfnmsub132ps_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0x9f */  iemOp_InvalidNeedRM,        iemOp_vfnmsub132ss_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+
+    /* 0xa0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xa6 */  iemOp_InvalidNeedRM,        iemOp_vfmaddsub213ps_d_Vx_Hx_Wx, iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xa7 */  iemOp_InvalidNeedRM,        iemOp_vfmsubadd213ps_d_Vx_Hx_Wx, iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xa8 */  iemOp_InvalidNeedRM,        iemOp_vfmadd213ps_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xa9 */  iemOp_InvalidNeedRM,        iemOp_vfmadd213ss_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xaa */  iemOp_InvalidNeedRM,        iemOp_vfmsub213ps_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xab */  iemOp_InvalidNeedRM,        iemOp_vfmsub213ss_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xac */  iemOp_InvalidNeedRM,        iemOp_vfnmadd213ps_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xad */  iemOp_InvalidNeedRM,        iemOp_vfnmadd213ss_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xae */  iemOp_InvalidNeedRM,        iemOp_vfnmsub213ps_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xaf */  iemOp_InvalidNeedRM,        iemOp_vfnmsub213ss_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+
+    /* 0xb0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xb6 */  iemOp_InvalidNeedRM,        iemOp_vfmaddsub231ps_d_Vx_Hx_Wx, iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xb7 */  iemOp_InvalidNeedRM,        iemOp_vfmsubadd231ps_d_Vx_Hx_Wx, iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xb8 */  iemOp_InvalidNeedRM,        iemOp_vfmadd231ps_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xb9 */  iemOp_InvalidNeedRM,        iemOp_vfmadd231ss_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xba */  iemOp_InvalidNeedRM,        iemOp_vfmsub231ps_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xbb */  iemOp_InvalidNeedRM,        iemOp_vfmsub231ss_d_Vx_Hx_Wx,    iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xbc */  iemOp_InvalidNeedRM,        iemOp_vfnmadd231ps_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xbd */  iemOp_InvalidNeedRM,        iemOp_vfnmadd231ss_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xbe */  iemOp_InvalidNeedRM,        iemOp_vfnmsub231ps_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+    /* 0xbf */  iemOp_InvalidNeedRM,        iemOp_vfnmsub231ss_d_Vx_Hx_Wx,   iemOp_InvalidNeedRM, iemOp_InvalidNeedRM,
+
+    /* 0xc0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xc8 */  iemOp_vsha1nexte_Vdq_Wdq,   iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xc9 */  iemOp_vsha1msg1_Vdq_Wdq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xca */  iemOp_vsha1msg2_Vdq_Wdq,    iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xcb */  iemOp_vsha256rnds2_Vdq_Wdq, iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xcc */  iemOp_vsha256msg1_Vdq_Wdq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xcd */  iemOp_vsha256msg2_Vdq_Wdq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xce */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xcf */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xd0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xd9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xda */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xdb */  iemOp_InvalidNeedRM,        iemOp_vaesimc_Vdq_Wdq,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdc */  iemOp_InvalidNeedRM,        iemOp_vaesenc_Vdq_Wdq,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdd */  iemOp_InvalidNeedRM,        iemOp_vaesenclast_Vdq_Wdq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xde */  iemOp_InvalidNeedRM,        iemOp_vaesdec_Vdq_Wdq,      iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xdf */  iemOp_InvalidNeedRM,        iemOp_vaesdeclast_Vdq_Wdq,  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+
+    /* 0xe0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe2 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe3 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe5 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe6 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe7 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xe9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xea */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xeb */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xec */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xed */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xee */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xef */  IEMOP_X4(iemOp_InvalidNeedRM),
+
+    /* 0xf0 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf1 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf2 */  iemOp_andn_Gy_By_Ey,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,
+    /* 0xf3 */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_VGrp17_f3,            iemOp_InvalidNeedRM,
+    /* 0xf4 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf5 */  iemOp_bzhi_Gy_Ey_By,        iemOp_InvalidNeedRM,        iemOp_pext_Gy_By_Ey,        iemOp_pdep_Gy_By_Ey,
+    /* 0xf6 */  iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_InvalidNeedRM,        iemOp_mulx_By_Gy_rDX_Ey,
+    /* 0xf7 */  iemOp_bextr_Gy_Ey_By,       iemOp_shlx_Gy_Ey_By,        iemOp_sarx_Gy_Ey_By,        iemOp_shrx_Gy_Ey_By,
+    /* 0xf8 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xf9 */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfa */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfb */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfc */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfd */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xfe */  IEMOP_X4(iemOp_InvalidNeedRM),
+    /* 0xff */  IEMOP_X4(iemOp_InvalidNeedRM),
+};
+AssertCompile(RT_ELEMENTS(g_apfnVexMap2) == 1024);
+
+/** @} */
+
diff --git a/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap3.cpp.h b/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap3.cpp.h
new file mode 100644
index 0000000..f0f69e8
--- /dev/null
+++ b/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap3.cpp.h
@@ -0,0 +1,557 @@
+/* $Id: IEMAllInstructionsVexMap3.cpp.h $ */
+/** @file
+ * IEM - Instruction Decoding and Emulation, 0x0f 0x3a map.
+ *
+ * @remarks IEMAllInstructionsThree0f3a.cpp.h is a VEX mirror of this file.
+ *          Any update here is likely needed in that file too.
+ */
+
+/*
+ * Copyright (C) 2011-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @name VEX Opcode Map 3
+ * @{
+ */
+
+/** Opcode VEX.66.0F3A 0x00. */
+FNIEMOP_STUB(iemOp_vpermq_Vqq_Wqq_Ib);
+/** Opcode VEX.66.0F3A 0x01. */
+FNIEMOP_STUB(iemOp_vpermqd_Vqq_Wqq_Ib);
+/** Opcode VEX.66.0F3A 0x02. */
+FNIEMOP_STUB(iemOp_vpblendd_Vx_Wx_Ib);
+/*  Opcode VEX.66.0F3A 0x03 - invalid */
+/** Opcode VEX.66.0F3A 0x04. */
+FNIEMOP_STUB(iemOp_vpermilps_Vx_Wx_Ib);
+/** Opcode VEX.66.0F3A 0x05. */
+FNIEMOP_STUB(iemOp_vpermilpd_Vx_Wx_Ib);
+/** Opcode VEX.66.0F3A 0x06 (vex only) */
+FNIEMOP_STUB(iemOp_vperm2f128_Vqq_Hqq_Wqq_Ib);
+/*  Opcode VEX.66.0F3A 0x07 - invalid */
+/** Opcode VEX.66.0F3A 0x08. */
+FNIEMOP_STUB(iemOp_vroundps_Vx_Wx_Ib);
+/** Opcode VEX.66.0F3A 0x09. */
+FNIEMOP_STUB(iemOp_vroundpd_Vx_Wx_Ib);
+/** Opcode VEX.66.0F3A 0x0a. */
+FNIEMOP_STUB(iemOp_vroundss_Vss_Wss_Ib);
+/** Opcode VEX.66.0F3A 0x0b. */
+FNIEMOP_STUB(iemOp_vroundsd_Vsd_Wsd_Ib);
+/** Opcode VEX.66.0F3A 0x0c. */
+FNIEMOP_STUB(iemOp_vblendps_Vx_Hx_Wx_Ib);
+/** Opcode VEX.66.0F3A 0x0d. */
+FNIEMOP_STUB(iemOp_vblendpd_Vx_Hx_Wx_Ib);
+/** Opcode VEX.66.0F3A 0x0e. */
+FNIEMOP_STUB(iemOp_vblendw_Vx_Hx_Wx_Ib);
+/** Opcode VEX.0F3A 0x0f - invalid. */
+/** Opcode VEX.66.0F3A 0x0f. */
+FNIEMOP_STUB(iemOp_vpalignr_Vx_Hx_Wx_Ib);
+
+
+/*  Opcode VEX.66.0F3A 0x10 - invalid */
+/*  Opcode VEX.66.0F3A 0x11 - invalid */
+/*  Opcode VEX.66.0F3A 0x12 - invalid */
+/*  Opcode VEX.66.0F3A 0x13 - invalid */
+/** Opcode VEX.66.0F3A 0x14. */
+FNIEMOP_STUB(iemOp_vpextrb_RdMb_Vdq_Ib);
+/** Opcode VEX.66.0F3A 0x15. */
+FNIEMOP_STUB(iemOp_vpextrw_RdMw_Vdq_Ib);
+/** Opcode VEX.66.0F3A 0x16. */
+FNIEMOP_STUB(iemOp_vpextrd_q_RdMw_Vdq_Ib);
+/** Opcode VEX.66.0F3A 0x17. */
+FNIEMOP_STUB(iemOp_vextractps_Ed_Vdq_Ib);
+/** Opcode VEX.66.0F3A 0x18 (vex only). */
+FNIEMOP_STUB(iemOp_vinsertf128_Vqq_Hqq_Wqq_Ib);
+/** Opcode VEX.66.0F3A 0x19 (vex only). */
+FNIEMOP_STUB(iemOp_vextractf128_Wdq_Vqq_Ib);
+/*  Opcode VEX.66.0F3A 0x1a - invalid */
+/*  Opcode VEX.66.0F3A 0x1b - invalid */
+/*  Opcode VEX.66.0F3A 0x1c - invalid */
+/** Opcode VEX.66.0F3A 0x1d (vex only). */
+FNIEMOP_STUB(iemOp_vcvtps2ph_Wx_Vx_Ib);
+/*  Opcode VEX.66.0F3A 0x1e - invalid */
+/*  Opcode VEX.66.0F3A 0x1f - invalid */
+
+
+/** Opcode VEX.66.0F3A 0x20. */
+FNIEMOP_STUB(iemOp_vpinsrb_Vdq_Hdq_RyMb_Ib);
+/** Opcode VEX.66.0F3A 0x21, */
+FNIEMOP_STUB(iemOp_vinsertps_Vdq_Hdq_UdqMd_Ib);
+/** Opcode VEX.66.0F3A 0x22. */
+FNIEMOP_STUB(iemOp_vpinsrd_q_Vdq_Hdq_Ey_Ib);
+/*  Opcode VEX.66.0F3A 0x23 - invalid */
+/*  Opcode VEX.66.0F3A 0x24 - invalid */
+/*  Opcode VEX.66.0F3A 0x25 - invalid */
+/*  Opcode VEX.66.0F3A 0x26 - invalid */
+/*  Opcode VEX.66.0F3A 0x27 - invalid */
+/*  Opcode VEX.66.0F3A 0x28 - invalid */
+/*  Opcode VEX.66.0F3A 0x29 - invalid */
+/*  Opcode VEX.66.0F3A 0x2a - invalid */
+/*  Opcode VEX.66.0F3A 0x2b - invalid */
+/*  Opcode VEX.66.0F3A 0x2c - invalid */
+/*  Opcode VEX.66.0F3A 0x2d - invalid */
+/*  Opcode VEX.66.0F3A 0x2e - invalid */
+/*  Opcode VEX.66.0F3A 0x2f - invalid */
+
+
+/*  Opcode VEX.66.0F3A 0x30 - invalid */
+/*  Opcode VEX.66.0F3A 0x31 - invalid */
+/*  Opcode VEX.66.0F3A 0x32 - invalid */
+/*  Opcode VEX.66.0F3A 0x33 - invalid */
+/*  Opcode VEX.66.0F3A 0x34 - invalid */
+/*  Opcode VEX.66.0F3A 0x35 - invalid */
+/*  Opcode VEX.66.0F3A 0x36 - invalid */
+/*  Opcode VEX.66.0F3A 0x37 - invalid */
+/** Opcode VEX.66.0F3A 0x38 (vex only). */
+FNIEMOP_STUB(iemOp_vinserti128_Vqq_Hqq_Wqq_Ib);
+/** Opcode VEX.66.0F3A 0x39 (vex only). */
+FNIEMOP_STUB(iemOp_vextracti128_Wdq_Vqq_Ib);
+/*  Opcode VEX.66.0F3A 0x3a - invalid */
+/*  Opcode VEX.66.0F3A 0x3b - invalid */
+/*  Opcode VEX.66.0F3A 0x3c - invalid */
+/*  Opcode VEX.66.0F3A 0x3d - invalid */
+/*  Opcode VEX.66.0F3A 0x3e - invalid */
+/*  Opcode VEX.66.0F3A 0x3f - invalid */
+
+
+/** Opcode VEX.66.0F3A 0x40. */
+FNIEMOP_STUB(iemOp_vdpps_Vx_Hx_Wx_Ib);
+/** Opcode VEX.66.0F3A 0x41, */
+FNIEMOP_STUB(iemOp_vdppd_Vdq_Hdq_Wdq_Ib);
+/** Opcode VEX.66.0F3A 0x42. */
+FNIEMOP_STUB(iemOp_vmpsadbw_Vx_Hx_Wx_Ib);
+/*  Opcode VEX.66.0F3A 0x43 - invalid */
+/** Opcode VEX.66.0F3A 0x44. */
+FNIEMOP_STUB(iemOp_vpclmulqdq_Vdq_Hdq_Wdq_Ib);
+/*  Opcode VEX.66.0F3A 0x45 - invalid */
+/** Opcode VEX.66.0F3A 0x46 (vex only)  */
+FNIEMOP_STUB(iemOp_vperm2i128_Vqq_Hqq_Wqq_Ib);
+/*  Opcode VEX.66.0F3A 0x47 - invalid */
+/** Opcode VEX.66.0F3A 0x48 (AMD tables only). */
+FNIEMOP_STUB(iemOp_vperlmilzz2ps_Vx_Hx_Wp_Lx);
+/** Opcode VEX.66.0F3A 0x49 (AMD tables only). */
+FNIEMOP_STUB(iemOp_vperlmilzz2pd_Vx_Hx_Wp_Lx);
+/** Opcode VEX.66.0F3A 0x4a (vex only). */
+FNIEMOP_STUB(iemOp_vblendvps_Vx_Hx_Wx_Lx);
+/** Opcode VEX.66.0F3A 0x4b (vex only). */
+FNIEMOP_STUB(iemOp_vblendvpd_Vx_Hx_Wx_Lx);
+/** Opcode VEX.66.0F3A 0x4c (vex only). */
+FNIEMOP_STUB(iemOp_vpblendvb_Vx_Hx_Wx_Lx);
+/*  Opcode VEX.66.0F3A 0x4d - invalid */
+/*  Opcode VEX.66.0F3A 0x4e - invalid */
+/*  Opcode VEX.66.0F3A 0x4f - invalid */
+
+
+/*  Opcode VEX.66.0F3A 0x50 - invalid */
+/*  Opcode VEX.66.0F3A 0x51 - invalid */
+/*  Opcode VEX.66.0F3A 0x52 - invalid */
+/*  Opcode VEX.66.0F3A 0x53 - invalid */
+/*  Opcode VEX.66.0F3A 0x54 - invalid */
+/*  Opcode VEX.66.0F3A 0x55 - invalid */
+/*  Opcode VEX.66.0F3A 0x56 - invalid */
+/*  Opcode VEX.66.0F3A 0x57 - invalid */
+/*  Opcode VEX.66.0F3A 0x58 - invalid */
+/*  Opcode VEX.66.0F3A 0x59 - invalid */
+/*  Opcode VEX.66.0F3A 0x5a - invalid */
+/*  Opcode VEX.66.0F3A 0x5b - invalid */
+/** Opcode VEX.66.0F3A 0x5c (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmaddsubps_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x5d (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmaddsubpd_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x5e (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmsubaddps_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x5f (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmsubaddpd_Vx_Lx_Wx_Hx);
+
+
+/** Opcode VEX.66.0F3A 0x60. */
+FNIEMOP_STUB(iemOp_vpcmpestrm_Vdq_Wdq_Ib);
+/** Opcode VEX.66.0F3A 0x61, */
+FNIEMOP_STUB(iemOp_vpcmpestri_Vdq_Wdq_Ib);
+/** Opcode VEX.66.0F3A 0x62. */
+FNIEMOP_STUB(iemOp_vpcmpistrm_Vdq_Wdq_Ib);
+/** Opcode VEX.66.0F3A 0x63*/
+FNIEMOP_STUB(iemOp_vpcmpistri_Vdq_Wdq_Ib);
+/*  Opcode VEX.66.0F3A 0x64 - invalid */
+/*  Opcode VEX.66.0F3A 0x65 - invalid */
+/*  Opcode VEX.66.0F3A 0x66 - invalid */
+/*  Opcode VEX.66.0F3A 0x67 - invalid */
+/** Opcode VEX.66.0F3A 0x68 (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmaddps_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x69 (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmaddpd_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x6a (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmaddss_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x6b (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmaddsd_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x6c (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmsubps_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x6d (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmsubpd_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x6e (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmsubss_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x6f (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfmsubsd_Vx_Lx_Wx_Hx);
+
+/*  Opcode VEX.66.0F3A 0x70 - invalid */
+/*  Opcode VEX.66.0F3A 0x71 - invalid */
+/*  Opcode VEX.66.0F3A 0x72 - invalid */
+/*  Opcode VEX.66.0F3A 0x73 - invalid */
+/*  Opcode VEX.66.0F3A 0x74 - invalid */
+/*  Opcode VEX.66.0F3A 0x75 - invalid */
+/*  Opcode VEX.66.0F3A 0x76 - invalid */
+/*  Opcode VEX.66.0F3A 0x77 - invalid */
+/** Opcode VEX.66.0F3A 0x78 (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfnmaddps_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x79 (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfnmaddpd_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x7a (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfnmaddss_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x7b (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfnmaddsd_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x7c (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfnmsubps_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x7d (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfnmsubpd_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x7e (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfnmsubss_Vx_Lx_Wx_Hx);
+/** Opcode VEX.66.0F3A 0x7f (AMD tables only). */
+FNIEMOP_STUB(iemOp_vfnmsubsd_Vx_Lx_Wx_Hx);
+
+/*  Opcodes 0x0f 0x80 thru 0x0f 0xb0 are unused.  */
+
+
+/*  Opcode      0x0f 0xc0 - invalid */
+/*  Opcode      0x0f 0xc1 - invalid */
+/*  Opcode      0x0f 0xc2 - invalid */
+/*  Opcode      0x0f 0xc3 - invalid */
+/*  Opcode      0x0f 0xc4 - invalid */
+/*  Opcode      0x0f 0xc5 - invalid */
+/*  Opcode      0x0f 0xc6 - invalid */
+/*  Opcode      0x0f 0xc7 - invalid */
+/*  Opcode      0x0f 0xc8 - invalid */
+/*  Opcode      0x0f 0xc9 - invalid */
+/*  Opcode      0x0f 0xca - invalid */
+/*  Opcode      0x0f 0xcb - invalid */
+/*  Opcode      0x0f 0xcc */
+FNIEMOP_STUB(iemOp_vsha1rnds4_Vdq_Wdq_Ib);
+/*  Opcode      0x0f 0xcd - invalid */
+/*  Opcode      0x0f 0xce - invalid */
+/*  Opcode      0x0f 0xcf - invalid */
+
+
+/*  Opcode VEX.66.0F3A 0xd0 - invalid */
+/*  Opcode VEX.66.0F3A 0xd1 - invalid */
+/*  Opcode VEX.66.0F3A 0xd2 - invalid */
+/*  Opcode VEX.66.0F3A 0xd3 - invalid */
+/*  Opcode VEX.66.0F3A 0xd4 - invalid */
+/*  Opcode VEX.66.0F3A 0xd5 - invalid */
+/*  Opcode VEX.66.0F3A 0xd6 - invalid */
+/*  Opcode VEX.66.0F3A 0xd7 - invalid */
+/*  Opcode VEX.66.0F3A 0xd8 - invalid */
+/*  Opcode VEX.66.0F3A 0xd9 - invalid */
+/*  Opcode VEX.66.0F3A 0xda - invalid */
+/*  Opcode VEX.66.0F3A 0xdb - invalid */
+/*  Opcode VEX.66.0F3A 0xdc - invalid */
+/*  Opcode VEX.66.0F3A 0xdd - invalid */
+/*  Opcode VEX.66.0F3A 0xde - invalid */
+/*  Opcode VEX.66.0F3A 0xdf - (aeskeygenassist). */
+FNIEMOP_STUB(iemOp_vaeskeygen_Vdq_Wdq_Ib);
+
+
+/*  Opcode VEX.F2.0F3A (vex only) */
+FNIEMOP_STUB(iemOp_rorx_Gy_Ey_Ib);
+
+
+/**
+ * VEX opcode map \#3.
+ *
+ * @sa      g_apfnThreeByte0f3a
+ */
+IEM_STATIC const PFNIEMOP g_apfnVexMap3[] =
+{
+    /*          no prefix,                  066h prefix                 f3h prefix,                 f2h prefix */
+    /* 0x00 */  iemOp_InvalidNeedRMImm8,    iemOp_vpermq_Vqq_Wqq_Ib,    iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x01 */  iemOp_InvalidNeedRMImm8,    iemOp_vpermqd_Vqq_Wqq_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x02 */  iemOp_InvalidNeedRMImm8,    iemOp_vpblendd_Vx_Wx_Ib,    iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x03 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x04 */  iemOp_InvalidNeedRMImm8,    iemOp_vpermilps_Vx_Wx_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x05 */  iemOp_InvalidNeedRMImm8,    iemOp_vpermilpd_Vx_Wx_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x06 */  iemOp_InvalidNeedRMImm8,    iemOp_vperm2f128_Vqq_Hqq_Wqq_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x07 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x08 */  iemOp_InvalidNeedRMImm8,    iemOp_vroundps_Vx_Wx_Ib,    iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x09 */  iemOp_InvalidNeedRMImm8,    iemOp_vroundpd_Vx_Wx_Ib,    iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0a */  iemOp_InvalidNeedRMImm8,    iemOp_vroundss_Vss_Wss_Ib,  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0b */  iemOp_InvalidNeedRMImm8,    iemOp_vroundsd_Vsd_Wsd_Ib,  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0c */  iemOp_InvalidNeedRMImm8,    iemOp_vblendps_Vx_Hx_Wx_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0d */  iemOp_InvalidNeedRMImm8,    iemOp_vblendpd_Vx_Hx_Wx_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0e */  iemOp_InvalidNeedRMImm8,    iemOp_vblendw_Vx_Hx_Wx_Ib,  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x0f */  iemOp_InvalidNeedRMImm8,    iemOp_vpalignr_Vx_Hx_Wx_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+
+    /* 0x10 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x11 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x12 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x13 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x14 */  iemOp_InvalidNeedRMImm8,    iemOp_vpextrb_RdMb_Vdq_Ib,  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x15 */  iemOp_InvalidNeedRMImm8,    iemOp_vpextrw_RdMw_Vdq_Ib,  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x16 */  iemOp_InvalidNeedRMImm8,    iemOp_vpextrd_q_RdMw_Vdq_Ib, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x17 */  iemOp_InvalidNeedRMImm8,    iemOp_vextractps_Ed_Vdq_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x18 */  iemOp_InvalidNeedRMImm8,    iemOp_vinsertf128_Vqq_Hqq_Wqq_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x19 */  iemOp_InvalidNeedRMImm8,    iemOp_vextractf128_Wdq_Vqq_Ib, iemOp_InvalidNeedRMImm8,  iemOp_InvalidNeedRMImm8,
+    /* 0x1a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1d */  iemOp_InvalidNeedRMImm8,    iemOp_vcvtps2ph_Wx_Vx_Ib,   iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x1e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x1f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x20 */  iemOp_InvalidNeedRMImm8,    iemOp_vpinsrb_Vdq_Hdq_RyMb_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x21 */  iemOp_InvalidNeedRMImm8,    iemOp_vinsertps_Vdq_Hdq_UdqMd_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x22 */  iemOp_InvalidNeedRMImm8,    iemOp_vpinsrd_q_Vdq_Hdq_Ey_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x23 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x24 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x25 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x26 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x27 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x28 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x29 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x2f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x30 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x31 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x32 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x33 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x34 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x35 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x36 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x37 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x38 */  iemOp_InvalidNeedRMImm8,    iemOp_vinserti128_Vqq_Hqq_Wqq_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x39 */  iemOp_InvalidNeedRMImm8,    iemOp_vextracti128_Wdq_Vqq_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x3a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x3f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x40 */  iemOp_InvalidNeedRMImm8,    iemOp_vdpps_Vx_Hx_Wx_Ib,    iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x41 */  iemOp_InvalidNeedRMImm8,    iemOp_vdppd_Vdq_Hdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x42 */  iemOp_InvalidNeedRMImm8,    iemOp_vmpsadbw_Vx_Hx_Wx_Ib, iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0x43 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x44 */  iemOp_InvalidNeedRMImm8,    iemOp_vpclmulqdq_Vdq_Hdq_Wdq_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x45 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x46 */  iemOp_InvalidNeedRMImm8,    iemOp_vperm2i128_Vqq_Hqq_Wqq_Ib, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x47 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x48 */  iemOp_InvalidNeedRMImm8,    iemOp_vperlmilzz2ps_Vx_Hx_Wp_Lx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x49 */  iemOp_InvalidNeedRMImm8,    iemOp_vperlmilzz2pd_Vx_Hx_Wp_Lx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x4a */  iemOp_InvalidNeedRMImm8,    iemOp_vblendvps_Vx_Hx_Wx_Lx, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x4b */  iemOp_InvalidNeedRMImm8,    iemOp_vblendvpd_Vx_Hx_Wx_Lx, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x4c */  iemOp_InvalidNeedRMImm8,    iemOp_vpblendvb_Vx_Hx_Wx_Lx, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x4d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x4e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x4f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x50 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x51 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x52 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x53 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x54 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x55 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x56 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x57 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x58 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x59 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x5c */  iemOp_InvalidNeedRMImm8,    iemOp_vfmaddsubps_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x5d */  iemOp_InvalidNeedRMImm8,    iemOp_vfmaddsubpd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x5e */  iemOp_InvalidNeedRMImm8,    iemOp_vfmsubaddps_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x5f */  iemOp_InvalidNeedRMImm8,    iemOp_vfmsubaddpd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+
+    /* 0x60 */  iemOp_InvalidNeedRMImm8,    iemOp_vpcmpestrm_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x61 */  iemOp_InvalidNeedRMImm8,    iemOp_vpcmpestri_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x62 */  iemOp_InvalidNeedRMImm8,    iemOp_vpcmpistrm_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x63 */  iemOp_InvalidNeedRMImm8,    iemOp_vpcmpistri_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,   iemOp_InvalidNeedRMImm8,
+    /* 0x64 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x65 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x66 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x67 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x68 */  iemOp_InvalidNeedRMImm8,    iemOp_vfmaddps_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x69 */  iemOp_InvalidNeedRMImm8,    iemOp_vfmaddpd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x6a */  iemOp_InvalidNeedRMImm8,    iemOp_vfmaddss_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x6b */  iemOp_InvalidNeedRMImm8,    iemOp_vfmaddsd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x6c */  iemOp_InvalidNeedRMImm8,    iemOp_vfmsubps_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x6d */  iemOp_InvalidNeedRMImm8,    iemOp_vfmsubpd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x6e */  iemOp_InvalidNeedRMImm8,    iemOp_vfmsubss_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x6f */  iemOp_InvalidNeedRMImm8,    iemOp_vfmsubsd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+
+    /* 0x70 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x71 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x72 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x73 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x74 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x75 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x76 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x77 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x78 */  iemOp_InvalidNeedRMImm8,    iemOp_vfnmaddps_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x79 */  iemOp_InvalidNeedRMImm8,    iemOp_vfnmaddpd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x7a */  iemOp_InvalidNeedRMImm8,    iemOp_vfnmaddss_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x7b */  iemOp_InvalidNeedRMImm8,    iemOp_vfnmaddsd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x7c */  iemOp_InvalidNeedRMImm8,    iemOp_vfnmsubps_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x7d */  iemOp_InvalidNeedRMImm8,    iemOp_vfnmsubpd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x7e */  iemOp_InvalidNeedRMImm8,    iemOp_vfnmsubss_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+    /* 0x7f */  iemOp_InvalidNeedRMImm8,    iemOp_vfnmsubsd_Vx_Lx_Wx_Hx, iemOp_InvalidNeedRMImm8, iemOp_InvalidNeedRMImm8,
+
+    /* 0x80 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x81 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x82 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x83 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x84 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x85 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x86 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x87 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x88 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x89 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x8f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0x90 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x91 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x92 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x93 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x94 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x95 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x96 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x97 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x98 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x99 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9a */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9b */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9c */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9d */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9e */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0x9f */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xa0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xa9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xaa */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xab */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xac */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xad */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xae */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xaf */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xb0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xb9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xba */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbc */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbd */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbe */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xbf */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xc0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xc9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xca */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xcb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xcc */  iemOp_vsha1rnds4_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+    /* 0xcd */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xce */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xcf */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xd0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xd9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xda */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xdb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xdc */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xdd */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xde */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xdf */  iemOp_vaeskeygen_Vdq_Wdq_Ib, iemOp_InvalidNeedRMImm8,  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,
+
+    /* 0xe0 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xe9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xea */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xeb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xec */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xed */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xee */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xef */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+
+    /* 0xf0 */  iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,    iemOp_InvalidNeedRMImm8,    iemOp_rorx_Gy_Ey_Ib,
+    /* 0xf1 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf2 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf3 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf4 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf5 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf6 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf7 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf8 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xf9 */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfa */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfb */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfc */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfd */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xfe */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+    /* 0xff */  IEMOP_X4(iemOp_InvalidNeedRMImm8),
+};
+AssertCompile(RT_ELEMENTS(g_apfnVexMap3) == 1024);
+
+/** @} */
+
diff --git a/src/VBox/VMM/VMMAll/IOMAllMMIO.cpp b/src/VBox/VMM/VMMAll/IOMAllMMIO.cpp
index fee3496..a32ee4a 100644
--- a/src/VBox/VMM/VMMAll/IOMAllMMIO.cpp
+++ b/src/VBox/VMM/VMMAll/IOMAllMMIO.cpp
@@ -1137,6 +1137,8 @@ VMMDECL(int) IOMMMIOMapMMIO2Page(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS GCPhysRemapp
 #  endif
     rc = PGMPrefetchPage(pVCpu, (RTGCPTR)GCPhys);
     Assert(rc == VINF_SUCCESS || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT);
+# else
+    RT_NOREF_PV(pVM); RT_NOREF(GCPhys); RT_NOREF(GCPhysRemapped); RT_NOREF(fPageFlags);
 # endif /* !IEM_VERIFICATION_MODE_FULL */
     return VINF_SUCCESS;
 }
diff --git a/src/VBox/VMM/VMMAll/MMAll.cpp b/src/VBox/VMM/VMMAll/MMAll.cpp
index b755a56..2f2675b 100644
--- a/src/VBox/VMM/VMMAll/MMAll.cpp
+++ b/src/VBox/VMM/VMMAll/MMAll.cpp
@@ -589,6 +589,8 @@ const char *mmGetTagName(MMTAG enmTag)
 
         TAG2STR(EM);
 
+        TAG2STR(IEM);
+
         TAG2STR(IOM);
         TAG2STR(IOM_STATS);
 
diff --git a/src/VBox/VMM/VMMAll/PGMAllBth.h b/src/VBox/VMM/VMMAll/PGMAllBth.h
index 6498462..5f8fb4d 100644
--- a/src/VBox/VMM/VMMAll/PGMAllBth.h
+++ b/src/VBox/VMM/VMMAll/PGMAllBth.h
@@ -120,7 +120,8 @@ PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPU pVCpu, PGSTPTWALK pGs
     uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
                      ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
                      : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
-    if (pGstWalk->Core.fBadPhysAddr)
+    if (   pGstWalk->Core.fRsvdError
+        || pGstWalk->Core.fBadPhysAddr)
     {
         uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
         Assert(!pGstWalk->Core.fNotPresent);
diff --git a/src/VBox/VMM/VMMAll/PGMAllPhys.cpp b/src/VBox/VMM/VMMAll/PGMAllPhys.cpp
index 73e023c..e8da8f5 100644
--- a/src/VBox/VMM/VMMAll/PGMAllPhys.cpp
+++ b/src/VBox/VMM/VMMAll/PGMAllPhys.cpp
@@ -4432,6 +4432,7 @@ VMM_INT_DECL(int) PGMPhysIemGCPhys2PtrNoLock(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCP
                             break;
                         case PGM_PAGE_STATE_BALLOONED:
                             AssertFailed();
+                            /* fall thru */
                         case PGM_PAGE_STATE_ZERO:
                         case PGM_PAGE_STATE_SHARED:
                         case PGM_PAGE_STATE_WRITE_MONITORED:
@@ -4472,6 +4473,7 @@ VMM_INT_DECL(int) PGMPhysIemGCPhys2PtrNoLock(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCP
                                 break;
                             case PGM_PAGE_STATE_BALLOONED:
                                 AssertFailed();
+                                /* fall thru */
                             case PGM_PAGE_STATE_ZERO:
                             case PGM_PAGE_STATE_SHARED:
                             case PGM_PAGE_STATE_WRITE_MONITORED:
@@ -4587,6 +4589,7 @@ VMM_INT_DECL(int) PGMPhysIemGCPhys2Ptr(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, b
                         break;
                     case PGM_PAGE_STATE_BALLOONED:
                         AssertFailed();
+                        break;
                     case PGM_PAGE_STATE_ZERO:
                     case PGM_PAGE_STATE_SHARED:
                     case PGM_PAGE_STATE_WRITE_MONITORED:
diff --git a/src/VBox/VMM/VMMAll/PGMAllPool.cpp b/src/VBox/VMM/VMMAll/PGMAllPool.cpp
index 8e541e4..bc44e00 100644
--- a/src/VBox/VMM/VMMAll/PGMAllPool.cpp
+++ b/src/VBox/VMM/VMMAll/PGMAllPool.cpp
@@ -3950,8 +3950,8 @@ static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMP
              * non-present PDPT will continue to cause page faults.
              */
             ASMReloadCR3();
-            /* no break */
 #endif
+            /* fall thru */
         case PGMPOOLKIND_PAE_PD_PHYS:
         case PGMPOOLKIND_PAE_PDPT_PHYS:
         case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
diff --git a/src/VBox/VMM/VMMAll/TMAll.cpp b/src/VBox/VMM/VMMAll/TMAll.cpp
index 84dcfef..dc4995f 100644
--- a/src/VBox/VMM/VMMAll/TMAll.cpp
+++ b/src/VBox/VMM/VMMAll/TMAll.cpp
@@ -1903,6 +1903,7 @@ VMMDECL(int) TMTimerStop(PTMTIMER pTimer)
                     STAM_PROFILE_STOP(&pVM->tm.s.CTX_SUFF_Z(StatTimerStop), a);
                     return VINF_SUCCESS;
                 }
+                break;
 
             case TMTIMERSTATE_PENDING_RESCHEDULE:
                 if (tmTimerTry(pTimer, TMTIMERSTATE_PENDING_STOP, enmState))
diff --git a/src/VBox/VMM/VMMAll/TMAllCpu.cpp b/src/VBox/VMM/VMMAll/TMAllCpu.cpp
index f032f72..20688c3 100644
--- a/src/VBox/VMM/VMMAll/TMAllCpu.cpp
+++ b/src/VBox/VMM/VMMAll/TMAllCpu.cpp
@@ -434,6 +434,7 @@ DECLINLINE(uint64_t) tmCpuTickGetInternal(PVMCPU pVCpu, bool fCheckTimers)
     }
     else
         u64 = pVCpu->tm.s.u64TSC;
+    /** @todo @bugref{7243}: SVM TSC offset. */
     return u64;
 }
 
diff --git a/src/VBox/VMM/VMMAll/TRPMAll.cpp b/src/VBox/VMM/VMMAll/TRPMAll.cpp
index 90f2800..8900c4b 100644
--- a/src/VBox/VMM/VMMAll/TRPMAll.cpp
+++ b/src/VBox/VMM/VMMAll/TRPMAll.cpp
@@ -456,7 +456,7 @@ VMMDECL(void) TRPMRestoreTrap(PVMCPU pVCpu)
  * @returns VBox status code.
  *  or does not return at all (when the trap is actually forwarded)
  *
- * @param   pVM         The cross context VM structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
  * @param   pRegFrame   Pointer to the register frame for the trap.
  * @param   iGate       Trap or interrupt gate number
  * @param   cbInstr     Instruction size (only relevant for software interrupts)
@@ -495,7 +495,7 @@ VMMDECL(int) TRPMForwardTrap(PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, uint32_t iGat
             if (RT_SUCCESS(rc))
                 Log(("TRPMForwardTrap: caller=%RGv\n", pCallerGC));
         }
-        /* no break */
+        /* fall thru */
     case X86_XCPT_DF:
     case X86_XCPT_TS:
     case X86_XCPT_NP:
diff --git a/src/VBox/VMM/VMMR0/HMSVMR0.cpp b/src/VBox/VMM/VMMR0/HMSVMR0.cpp
index ffed5d9..c09b126 100644
--- a/src/VBox/VMM/VMMR0/HMSVMR0.cpp
+++ b/src/VBox/VMM/VMMR0/HMSVMR0.cpp
@@ -681,8 +681,8 @@ VMMR0DECL(int) SVMR0SetupVM(PVM pVM)
     AssertReturn(pVM, VERR_INVALID_PARAMETER);
     Assert(pVM->hm.s.svm.fSupported);
 
-    bool const fPauseFilter          = RT_BOOL(pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER);
-    bool const fPauseFilterThreshold = RT_BOOL(pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD);
+    bool const fPauseFilter          = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER);
+    bool const fPauseFilterThreshold = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD);
     bool const fUsePauseFilter       = fPauseFilter && pVM->hm.s.svm.cPauseFilter && pVM->hm.s.svm.cPauseFilterThresholdTicks;
 
     for (VMCPUID i = 0; i < pVM->cCpus; i++)
@@ -947,7 +947,7 @@ static void hmR0SvmFlushTaggedTlb(PVMCPU pVCpu)
                 pCpu->cTlbFlushes++;                /* All VCPUs that run on this host CPU must use a new VPID. */
                 fHitASIDLimit             = true;
 
-                if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
+                if (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
                 {
                     pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
                     pCpu->fFlushAsidBeforeUse = true;
@@ -962,7 +962,7 @@ static void hmR0SvmFlushTaggedTlb(PVMCPU pVCpu)
             if (   !fHitASIDLimit
                 && pCpu->fFlushAsidBeforeUse)
             {
-                if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
+                if (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
                     pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
                 else
                 {
@@ -977,7 +977,7 @@ static void hmR0SvmFlushTaggedTlb(PVMCPU pVCpu)
         }
         else
         {
-            if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
+            if (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
                 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
             else
                 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
@@ -3194,7 +3194,7 @@ static void hmR0SvmPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PS
     }
 
     /* If VMCB Clean bits isn't supported by the CPU, simply mark all state-bits as dirty, indicating (re)load-from-VMCB. */
-    if (!(pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN))
+    if (!(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN))
         pVmcb->ctrl.u64VmcbCleanBits = 0;
 }
 
@@ -4270,7 +4270,7 @@ static int hmR0SvmCheckExitDueToEventDelivery(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMT
  */
 DECLINLINE(void) hmR0SvmAdvanceRipHwAssist(PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t cb)
 {
-    if (pVCpu->CTX_SUFF(pVM)->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
+    if (pVCpu->CTX_SUFF(pVM)->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
     {
         PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
         Assert(pVmcb->ctrl.u64NextRIP);
@@ -4283,7 +4283,6 @@ DECLINLINE(void) hmR0SvmAdvanceRipHwAssist(PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t
     HMSVM_UPDATE_INTR_SHADOW(pVCpu, pCtx);
 }
 
-
 /**
  * Advances the guest RIP by the number of bytes specified in @a cb. This does
  * not make use of any hardware features to determine the instruction length.
@@ -4670,7 +4669,7 @@ HMSVM_EXIT_DECL hmR0SvmExitMsr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTr
             return rc;
         }
 
-        if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
+        if (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
         {
             rc = EMInterpretWrmsr(pVM, pVCpu, CPUMCTX2CORE(pCtx));
             if (RT_LIKELY(rc == VINF_SUCCESS))
@@ -4717,7 +4716,7 @@ HMSVM_EXIT_DECL hmR0SvmExitMsr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTr
         STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
         Assert(pVmcb->ctrl.u64ExitInfo1 == SVM_EXIT1_MSR_READ);
 
-        if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
+        if (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
         {
             rc = EMInterpretRdmsr(pVM, pVCpu, CPUMCTX2CORE(pCtx));
             if (RT_LIKELY(rc == VINF_SUCCESS))
@@ -4902,7 +4901,7 @@ HMSVM_EXIT_DECL hmR0SvmExitIOInstr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pS
                     /* Don't know exactly how to detect whether u3SEG is valid, currently
                        only enabling it for Bulldozer and later with NRIP.  OS/2 broke on
                        2384 Opterons when only checking NRIP. */
-                    if (   (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
+                    if (   (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
                         && pVM->cpum.ro.GuestFeatures.enmMicroarch >= kCpumMicroarch_AMD_15h_First)
                     {
                         AssertMsg(IoExitInfo.n.u3SEG == X86_SREG_DS || cbInstr > 1U + IoExitInfo.n.u1REP,
diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.cpp b/src/VBox/VMM/VMMR0/HMVMXR0.cpp
index 2ea22f0..85cf0d4 100644
--- a/src/VBox/VMM/VMMR0/HMVMXR0.cpp
+++ b/src/VBox/VMM/VMMR0/HMVMXR0.cpp
@@ -7019,7 +7019,7 @@ static void hmR0VmxTrpmTrapToPendingEvent(PVMCPU pVCpu)
             case X86_XCPT_GP:
             case X86_XCPT_AC:
                 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
-                /* no break! */
+                /* fall thru */
             default:
                 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
                 break;
diff --git a/src/VBox/VMM/VMMR3/CFGM.cpp b/src/VBox/VMM/VMMR3/CFGM.cpp
index 5276a82..641d198 100644
--- a/src/VBox/VMM/VMMR3/CFGM.cpp
+++ b/src/VBox/VMM/VMMR3/CFGM.cpp
@@ -581,7 +581,7 @@ VMMR3DECL(CFGMVALUETYPE) CFGMR3GetValueType(PCFGMLEAF pCur)
 /**
  * Validates that the values are within a set of valid names.
  *
- * @returns true if all names are found in pszzAllowed.
+ * @returns true if all names are found in pszzValid.
  * @returns false if not.
  * @param   pNode           The node which values should be examined.
  * @param   pszzValid       List of valid names separated by '\\0' and ending with
diff --git a/src/VBox/VMM/VMMR3/CPUM.cpp b/src/VBox/VMM/VMMR3/CPUM.cpp
index fb05fb0..bdabb8e 100644
--- a/src/VBox/VMM/VMMR3/CPUM.cpp
+++ b/src/VBox/VMM/VMMR3/CPUM.cpp
@@ -803,6 +803,8 @@ VMMR3DECL(int) CPUMR3Init(PVM pVM)
         return VERR_UNSUPPORTED_CPU;
     }
 
+    pVM->cpum.s.fHostMxCsrMask = CPUMR3DeterminHostMxCsrMask();
+
     PCPUMCPUIDLEAF  paLeaves;
     uint32_t        cLeaves;
     int rc = CPUMR3CpuIdCollectLeaves(&paLeaves, &cLeaves);
@@ -1104,8 +1106,8 @@ VMMR3DECL(void) CPUMR3ResetCpu(PVM pVM, PVMCPU pVCpu)
     /* Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3A, Table 8-1.
        IA-32 Processor States Following Power-up, Reset, or INIT */
     pFpuCtx->MXCSR                  = 0x1F80;
-    pFpuCtx->MXCSR_MASK             = 0xffff; /** @todo REM always changed this for us. Should probably check if the HW really
-                                                        supports all bits, since a zero value here should be read as 0xffbf. */
+    pFpuCtx->MXCSR_MASK             = pVM->cpum.s.GuestInfo.fMxCsrMask; /** @todo check if REM messes this up... */
+
     pCtx->aXcr[0]                   = XSAVE_C_X87;
     if (pVM->cpum.s.HostFeatures.cbMaxExtendedState >= RT_OFFSETOF(X86XSAVEAREA, Hdr))
     {
diff --git a/src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp b/src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp
index 8cd5157..4f25351 100644
--- a/src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp
+++ b/src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp
@@ -549,6 +549,28 @@ VMMR3DECL(const char *) CPUMR3MicroarchName(CPUMMICROARCH enmMicroarch)
 }
 
 
+/**
+ * Determins the host CPU MXCSR mask.
+ *
+ * @returns MXCSR mask.
+ */
+VMMR3DECL(uint32_t) CPUMR3DeterminHostMxCsrMask(void)
+{
+    if (   ASMHasCpuId()
+        && ASMIsValidStdRange(ASMCpuId_EAX(0))
+        && ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_FXSR)
+    {
+        uint8_t volatile abBuf[sizeof(X86FXSTATE) + 64];
+        PX86FXSTATE      pState = (PX86FXSTATE)&abBuf[64 - ((uintptr_t)&abBuf[0] & 63)];
+        RT_ZERO(*pState);
+        ASMFxSave(pState);
+        if (pState->MXCSR_MASK == 0)
+            return 0xffbf;
+        return pState->MXCSR_MASK;
+    }
+    return 0;
+}
+
 
 /**
  * Gets a matching leaf in the CPUID leaf array.
@@ -1634,6 +1656,7 @@ int cpumR3CpuIdExplodeFeatures(PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, PCPUM
         pFeatures->fHypervisorPresent   = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_HVP);
         pFeatures->fMonitorMWait        = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_MONITOR);
         pFeatures->fMovCmpXchg16b       = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_CX16);
+        pFeatures->fClFlush             = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_CLFSH);
 
         /* Structured extended features. */
         PCCPUMCPUIDLEAF const pSxfLeaf0 = cpumR3CpuIdFindLeafEx(paLeaves, cLeaves, 7, 0);
@@ -1641,6 +1664,7 @@ int cpumR3CpuIdExplodeFeatures(PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, PCPUM
         {
             pFeatures->fAvx2                = RT_BOOL(pSxfLeaf0->uEcx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
             pFeatures->fAvx512Foundation    = RT_BOOL(pSxfLeaf0->uEcx & X86_CPUID_STEXT_FEATURE_EBX_AVX512F);
+            pFeatures->fClFlushOpt          = RT_BOOL(pSxfLeaf0->uEbx & X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT);
         }
 
         /* MWAIT/MONITOR leaf. */
@@ -1681,6 +1705,25 @@ int cpumR3CpuIdExplodeFeatures(PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, PCPUM
             pFeatures->fMmx            |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_MMX);
             pFeatures->fTsc            |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_TSC);
             pFeatures->fAmdMmxExts      = RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
+            pFeatures->fXop             = RT_BOOL(pExtLeaf->uEcx & X86_CPUID_AMD_FEATURE_ECX_XOP);
+            pFeatures->fSvm             = RT_BOOL(pExtLeaf->uEcx & X86_CPUID_AMD_FEATURE_ECX_SVM);
+            if (pFeatures->fSvm)
+            {
+                PCCPUMCPUIDLEAF pSvmLeaf = cpumR3CpuIdFindLeaf(paLeaves, cLeaves, 0x8000000a);
+                AssertLogRelReturn(pSvmLeaf, VERR_CPUM_IPE_1);
+                pFeatures->fSvmNestedPaging         = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING);
+                pFeatures->fSvmLbrVirt              = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_LBR_VIRT);
+                pFeatures->fSvmSvmLock              = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_SVM_LOCK);
+                pFeatures->fSvmNextRipSave          = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE);
+                pFeatures->fSvmTscRateMsr           = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_TSC_RATE_MSR);
+                pFeatures->fSvmVmcbClean            = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN);
+                pFeatures->fSvmFlusbByAsid          = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID);
+                pFeatures->fSvmDecodeAssist         = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSIST);
+                pFeatures->fSvmPauseFilter          = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER);
+                pFeatures->fSvmPauseFilterThreshold = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD);
+                pFeatures->fSvmAvic                 = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_AVIC);
+                pFeatures->uSvmMaxAsid              = pSvmLeaf->uEbx;
+            }
         }
 
         /*
@@ -1709,12 +1752,13 @@ int cpumR3CpuIdExplodeFeatures(PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, PCPUM
                 {
                     pFeatures->cbMaxExtendedState = pXStateLeaf0->uEcx;
 
+                    /* (paranoia:) */
                     PCCPUMCPUIDLEAF const pXStateLeaf1 = cpumR3CpuIdFindLeafEx(paLeaves, cLeaves, 13, 1);
                     if (   pXStateLeaf1
                         && pXStateLeaf1->uEbx > pFeatures->cbMaxExtendedState
                         && pXStateLeaf1->uEbx <= CPUM_MAX_XSAVE_AREA_SIZE
                         && (pXStateLeaf1->uEcx || pXStateLeaf1->uEdx) )
-                        pFeatures->cbMaxExtendedState = pXStateLeaf0->uEbx;
+                        pFeatures->cbMaxExtendedState = pXStateLeaf1->uEbx;
                 }
                 else
                     AssertLogRelMsgFailedStmt(("Unexpected max/cur XSAVE area sizes: %#x/%#x\n", pXStateLeaf0->uEcx, pXStateLeaf0->uEbx),
@@ -2214,6 +2258,7 @@ typedef struct CPUMCPUIDCONFIG
     CPUMISAEXTCFG   enmMisAlnSse;
     CPUMISAEXTCFG   enm3dNowPrf;
     CPUMISAEXTCFG   enmAmdExtMmx;
+    CPUMISAEXTCFG   enmSvm;
 
     uint32_t        uMaxStdLeaf;
     uint32_t        uMaxExtLeaf;
@@ -2698,7 +2743,7 @@ static int cpumR3CpuIdSanitize(PVM pVM, PCPUM pCpum, PCPUMCPUIDCONFIG pConfig)
                                ;
         pExtFeatureLeaf->uEcx &= X86_CPUID_EXT_FEATURE_ECX_LAHF_SAHF
                                //| X86_CPUID_AMD_FEATURE_ECX_CMPL   - set below if applicable.
-                               //| X86_CPUID_AMD_FEATURE_ECX_SVM    - not virtualized.
+                               | (pConfig->enmSvm       ? X86_CPUID_AMD_FEATURE_ECX_SVM : 0)
                                //| X86_CPUID_AMD_FEATURE_ECX_EXT_APIC
                                /* Note: This could prevent teleporting from AMD to Intel CPUs! */
                                | X86_CPUID_AMD_FEATURE_ECX_CR8L         /* expose lock mov cr0 = mov cr8 hack for guests that can use this feature to access the TPR. */
@@ -2739,6 +2784,7 @@ static int cpumR3CpuIdSanitize(PVM pVM, PCPUM pCpum, PCPUMCPUIDCONFIG pConfig)
         if (pCpum->u8PortableCpuIdLevel > 0)
         {
             PORTABLE_DISABLE_FEATURE_BIT(    1, pExtFeatureLeaf->uEcx, CR8L,       X86_CPUID_AMD_FEATURE_ECX_CR8L);
+            PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEcx, SVM,        X86_CPUID_AMD_FEATURE_ECX_SVM,       pConfig->enmSvm);
             PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEcx, ABM,        X86_CPUID_AMD_FEATURE_ECX_ABM,       pConfig->enmAbm);
             PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEcx, SSE4A,      X86_CPUID_AMD_FEATURE_ECX_SSE4A,     pConfig->enmSse4A);
             PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEcx, MISALNSSE,  X86_CPUID_AMD_FEATURE_ECX_MISALNSSE, pConfig->enmMisAlnSse);
@@ -2781,12 +2827,16 @@ static int cpumR3CpuIdSanitize(PVM pVM, PCPUM pCpum, PCPUMCPUIDCONFIG pConfig)
             pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_ABM;
         if (pConfig->enmSse4A     == CPUMISAEXTCFG_ENABLED_ALWAYS)
             pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_SSE4A;
+        if (pConfig->enmSvm       == CPUMISAEXTCFG_ENABLED_ALWAYS)
+            pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_SVM;
         if (pConfig->enmMisAlnSse == CPUMISAEXTCFG_ENABLED_ALWAYS)
             pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_MISALNSSE;
         if (pConfig->enm3dNowPrf  == CPUMISAEXTCFG_ENABLED_ALWAYS)
             pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF;
         if (pConfig->enmAmdExtMmx  == CPUMISAEXTCFG_ENABLED_ALWAYS)
             pExtFeatureLeaf->uEdx |= X86_CPUID_AMD_FEATURE_EDX_AXMMX;
+        if (pConfig->enmSvm        == CPUMISAEXTCFG_ENABLED_ALWAYS)
+            pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_SVM;
     }
     pExtFeatureLeaf = NULL; /* Must refetch! */
 
@@ -3350,9 +3400,18 @@ static int cpumR3CpuIdSanitize(PVM pVM, PCPUM pCpum, PCPUMCPUIDCONFIG pConfig)
      *      EBX - Number of ASIDs.
      *      ECX - Reserved.
      *      EDX - SVM Feature identification.
-     * We clear all as we currently does not virtualize SVM.
      */
-    cpumR3CpuIdZeroLeaf(pCpum, UINT32_C(0x8000000a));
+    pExtFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x80000001), 0);
+    if (pExtFeatureLeaf->uEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
+    {
+        PCPUMCPUIDLEAF pSvmFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 0x8000000a, 0);
+        pSvmFeatureLeaf->uEax = 0x1;
+        pSvmFeatureLeaf->uEbx = 0x8000;     /** @todo figure out virtual NASID. */
+        pSvmFeatureLeaf->uEcx = 0;
+        pSvmFeatureLeaf->uEdx = 0;          /** @todo Support SVM features */
+    }
+    else
+        cpumR3CpuIdZeroLeaf(pCpum, UINT32_C(0x8000000a));
 
     /* Cpuid 0x8000000b thru 0x80000018: Reserved
      * We clear these as we don't know what purpose they might have. */
@@ -3724,6 +3783,7 @@ static int cpumR3CpuIdReadConfig(PVM pVM, PCPUMCPUIDCONFIG pConfig, PCFGMNODE pC
                                   "|MISALNSSE"
                                   "|3DNOWPRF"
                                   "|AXMMX"
+                                  "|SVM"
                                   , "" /*pszValidNodes*/, "CPUM" /*pszWho*/, 0 /*uInstance*/);
         if (RT_FAILURE(rc))
             return rc;
@@ -3898,6 +3958,16 @@ static int cpumR3CpuIdReadConfig(PVM pVM, PCPUMCPUIDCONFIG pConfig, PCFGMNODE pC
     rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "AXMMX", &pConfig->enmAmdExtMmx, fNestedPagingAndFullGuestExec);
     AssertLogRelRCReturn(rc, rc);
 
+#ifdef VBOX_WITH_NESTED_HWVIRT
+    /** @cfgm{/CPUM/IsaExts/SVM, isaextcfg, depends}
+     * Whether to expose the AMD's hardware virtualization (SVM) instructions to the
+     * guest. For the time being, the default is to only do this for VMs with nested
+     * paging and AMD-V.
+     */
+    rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "SVM", &pConfig->enmSvm, fNestedPagingAndFullGuestExec);
+    AssertLogRelRCReturn(rc, rc);
+#endif
+
     return VINF_SUCCESS;
 }
 
@@ -3943,6 +4013,14 @@ int cpumR3InitCpuIdAndMsrs(PVM pVM)
                           "Info on guest CPU '%s' could not be found. Please, select a different CPU.", Config.szCpuName)
              : rc;
 
+    if (pCpum->GuestInfo.fMxCsrMask & ~pVM->cpum.s.fHostMxCsrMask)
+    {
+        LogRel(("Stripping unsupported MXCSR bits from guest mask: %#x -> %#x (host: %#x)\n", pCpum->GuestInfo.fMxCsrMask,
+                pCpum->GuestInfo.fMxCsrMask & pVM->cpum.s.fHostMxCsrMask, pVM->cpum.s.fHostMxCsrMask));
+        pCpum->GuestInfo.fMxCsrMask &= pVM->cpum.s.fHostMxCsrMask;
+    }
+    LogRel(("CPUM: MXCSR_MASK=%#x (host: %#x)\n", pCpum->GuestInfo.fMxCsrMask, pVM->cpum.s.fHostMxCsrMask));
+
     /** @cfgm{/CPUM/MSRs/[Name]/[First|Last|Type|Value|...],}
      * Overrides the guest MSRs.
      */
@@ -5834,7 +5912,7 @@ static DBGFREGSUBFIELD const g_aExtLeaf1EcxSubFields[] =
 {
     DBGFREGSUBFIELD_RO("LahfSahf\0"     "LAHF/SAHF support in 64-bit mode",              0, 1, 0),
     DBGFREGSUBFIELD_RO("CmpLegacy\0"    "Core multi-processing legacy mode",             1, 1, 0),
-    DBGFREGSUBFIELD_RO("SVM\0"          "AMD VM extensions",                             2, 1, 0),
+    DBGFREGSUBFIELD_RO("SVM\0"          "AMD Secure Virtual Machine extensions",         2, 1, 0),
     DBGFREGSUBFIELD_RO("EXTAPIC\0"      "AMD Extended APIC registers",                   3, 1, 0),
     DBGFREGSUBFIELD_RO("CR8L\0"         "AMD LOCK MOV CR0 means MOV CR8",                4, 1, 0),
     DBGFREGSUBFIELD_RO("ABM\0"          "AMD Advanced Bit Manipulation",                 5, 1, 0),
diff --git a/src/VBox/VMM/VMMR3/CPUMR3Db.cpp b/src/VBox/VMM/VMMR3/CPUMR3Db.cpp
index fa7626b..d3afa52 100644
--- a/src/VBox/VMM/VMMR3/CPUMR3Db.cpp
+++ b/src/VBox/VMM/VMMR3/CPUMR3Db.cpp
@@ -57,6 +57,8 @@ typedef struct CPUMDBENTRY
     /** The maximum physical address with of the CPU.  This should correspond to
      * the value in CPUID leaf 0x80000008 when present. */
     uint8_t         cMaxPhysAddrWidth;
+    /** The MXCSR mask. */
+    uint32_t        fMxCsrMask;
     /** Pointer to an array of CPUID leaves.  */
     PCCPUMCPUIDLEAF paCpuIdLeaves;
     /** The number of CPUID leaves in the array paCpuIdLeaves points to. */
@@ -66,7 +68,8 @@ typedef struct CPUMDBENTRY
     /** The default unknown CPUID value. */
     CPUMCPUID       DefUnknownCpuId;
 
-    /** MSR mask.  Several microarchitectures ignore higher bits of the    */
+    /** MSR mask.  Several microarchitectures ignore the higher bits of ECX in
+     *  the RDMSR and WRMSR instructions. */
     uint32_t        fMsrMask;
 
     /** The number of ranges in the table pointed to b paMsrRanges. */
@@ -446,6 +449,11 @@ int cpumR3MsrRangesInsert(PVM pVM, PCPUMMSRRANGE *ppaMsrRanges, uint32_t *pcMsrR
         ppaMsrRanges = &pVM->cpum.s.GuestInfo.paMsrRangesR3;
         pcMsrRanges  = &pVM->cpum.s.GuestInfo.cMsrRanges;
     }
+    else
+    {
+        AssertReturn(ppaMsrRanges, VERR_INVALID_POINTER);
+        AssertReturn(pcMsrRanges, VERR_INVALID_POINTER);
+    }
 
     uint32_t        cMsrRanges  = *pcMsrRanges;
     PCPUMMSRRANGE   paMsrRanges = *ppaMsrRanges;
@@ -794,6 +802,7 @@ int cpumR3DbGetCpuInfo(const char *pszName, PCPUMINFO pInfo)
         rc = CPUMR3CpuIdCollectLeaves(&pInfo->paCpuIdLeavesR3, &pInfo->cCpuIdLeaves);
         if (RT_FAILURE(rc))
             return rc;
+        pInfo->fMxCsrMask = CPUMR3DeterminHostMxCsrMask();
 
         /* Lookup database entry for MSRs. */
         CPUMCPUVENDOR const enmVendor    = CPUMR3CpuIdDetectVendorEx(pInfo->paCpuIdLeavesR3[0].uEax,
@@ -911,7 +920,8 @@ int cpumR3DbGetCpuInfo(const char *pszName, PCPUMINFO pInfo)
             pInfo->paCpuIdLeavesR3 = NULL;
 
         pInfo->enmUnknownCpuIdMethod = pEntry->enmUnknownCpuId;
-        pInfo->DefCpuId         = pEntry->DefUnknownCpuId;
+        pInfo->DefCpuId              = pEntry->DefUnknownCpuId;
+        pInfo->fMxCsrMask            = pEntry->fMxCsrMask;
 
         LogRel(("CPUM: Using CPU DB entry '%s' (%s %#x/%#x/%#x %s)\n",
                 pEntry->pszName, CPUMR3CpuVendorName((CPUMCPUVENDOR)pEntry->enmVendor),
@@ -920,7 +930,6 @@ int cpumR3DbGetCpuInfo(const char *pszName, PCPUMINFO pInfo)
 
     pInfo->fMsrMask             = pEntry->fMsrMask;
     pInfo->iFirstExtCpuIdLeaf   = 0; /* Set by caller. */
-    pInfo->uPadding             = 0;
     pInfo->uScalableBusFreq     = pEntry->uScalableBusFreq;
     pInfo->paCpuIdLeavesR0      = NIL_RTR0PTR;
     pInfo->paMsrRangesR0        = NIL_RTR0PTR;
diff --git a/src/VBox/VMM/VMMR3/CSAM.cpp b/src/VBox/VMM/VMMR3/CSAM.cpp
index 363b9da..f405b72 100644
--- a/src/VBox/VMM/VMMR3/CSAM.cpp
+++ b/src/VBox/VMM/VMMR3/CSAM.cpp
@@ -1067,7 +1067,7 @@ static DECLCALLBACK(int) CSAMR3AnalyseCallback(PVM pVM, DISCPUSTATE *pCpu, RCPTR
     case OP_IRET:
         if (EMIsRawRing1Enabled(pVM))
             break;
-        /* no break */
+        /* fall thru */
 
     case OP_ILLUD2:
         /* This appears to be some kind of kernel panic in Linux 2.4; no point to continue. */
@@ -1164,10 +1164,11 @@ static DECLCALLBACK(int) CSAMR3AnalyseCallback(PVM pVM, DISCPUSTATE *pCpu, RCPTR
         if (pCpu->pCurInstr->fParam1 != OP_PARM_REG_CS)
             break;
 
-        /* no break */
+        /* fall thru */
 #ifndef VBOX_WITH_SAFE_STR
     case OP_STR:
 #endif
+        /* fall thru */
     case OP_LSL:
     case OP_LAR:
     case OP_SGDT:
diff --git a/src/VBox/VMM/VMMR3/DBGF.cpp b/src/VBox/VMM/VMMR3/DBGF.cpp
index a10c984..9fb6583 100644
--- a/src/VBox/VMM/VMMR3/DBGF.cpp
+++ b/src/VBox/VMM/VMMR3/DBGF.cpp
@@ -93,11 +93,30 @@
 
 
 /*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Instruction type returned by dbgfStepGetCurInstrType.
+ */
+typedef enum DBGFSTEPINSTRTYPE
+{
+    DBGFSTEPINSTRTYPE_INVALID = 0,
+    DBGFSTEPINSTRTYPE_OTHER,
+    DBGFSTEPINSTRTYPE_RET,
+    DBGFSTEPINSTRTYPE_CALL,
+    DBGFSTEPINSTRTYPE_END,
+    DBGFSTEPINSTRTYPE_32BIT_HACK = 0x7fffffff
+} DBGFSTEPINSTRTYPE;
+
+
+/*********************************************************************************************************************************
 *   Internal Functions                                                                                                           *
 *********************************************************************************************************************************/
 static int dbgfR3VMMWait(PVM pVM);
 static int dbgfR3VMMCmd(PVM pVM, DBGFCMD enmCmd, PDBGFCMDDATA pCmdData, bool *pfResumeExecution);
 static DECLCALLBACK(int) dbgfR3Attach(PVM pVM);
+static DBGFSTEPINSTRTYPE dbgfStepGetCurInstrType(PVM pVM, PVMCPU pVCpu);
+static bool dbgfStepAreWeThereYet(PVM pVM,  PVMCPU pVCpu);
 
 
 /**
@@ -140,6 +159,8 @@ VMMR3_INT_DECL(int) DBGFR3Init(PVM pVM)
     AssertCompile(sizeof(pUVM->dbgf.s)          <= sizeof(pUVM->dbgf.padding));
     AssertCompile(sizeof(pUVM->aCpus[0].dbgf.s) <= sizeof(pUVM->aCpus[0].dbgf.padding));
 
+    pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID;
+
     /*
      * The usual sideways mountain climbing style of init:
      */
@@ -549,6 +570,8 @@ static int dbgfR3EventPrologue(PVM pVM, DBGFEVENTTYPE enmEvent)
  */
 static int dbgfR3SendEvent(PVM pVM)
 {
+    pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID;
+
     int rc = RTSemPing(&pVM->dbgf.s.PingPong);
     if (RT_SUCCESS(rc))
         rc = dbgfR3VMMWait(pVM);
@@ -612,6 +635,18 @@ VMMR3_INT_DECL(VBOXSTRICTRC) DBGFR3EventHandlePending(PVM pVM, PVMCPU pVCpu)
  */
 VMMR3DECL(int) DBGFR3Event(PVM pVM, DBGFEVENTTYPE enmEvent)
 {
+    /*
+     * Do stepping filtering.
+     */
+    /** @todo Would be better if we did some of this inside the execution
+     *        engines. */
+    if (   enmEvent == DBGFEVENT_STEPPED
+        || enmEvent == DBGFEVENT_STEPPED_HYPER)
+    {
+        if (!dbgfStepAreWeThereYet(pVM, VMMGetCpu(pVM)))
+            return VINF_EM_DBG_STEP;
+    }
+
     int rc = dbgfR3EventPrologue(pVM, enmEvent);
     if (RT_FAILURE(rc))
         return rc;
@@ -847,6 +882,7 @@ static int dbgfR3VMMWait(PVM pVM)
                     /* remember return code. */
                     default:
                         AssertReleaseMsgFailed(("rc=%Rrc is not in the switch!\n", rc));
+                        /* fall thru */
                     case VINF_EM_RESET:
                     case VINF_EM_SUSPEND:
                     case VINF_EM_HALT:
@@ -890,6 +926,7 @@ static int dbgfR3VMMWait(PVM pVM)
 
 /**
  * Executes command from debugger.
+ *
  * The caller is responsible for waiting or resuming execution based on the
  * value returned in the *pfResumeExecution indicator.
  *
@@ -946,6 +983,7 @@ static int dbgfR3VMMCmd(PVM pVM, DBGFCMD enmCmd, PDBGFCMDDATA pCmdData, bool *pf
             ASMAtomicWriteBool(&pVM->dbgf.s.fAttached, false);
             pVM->dbgf.s.DbgEvent.enmType = DBGFEVENT_DETACH_DONE;
             pVM->dbgf.s.DbgEvent.enmCtx  = DBGFEVENTCTX_OTHER;
+            pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID;
             fSendEvent = true;
             fResume = true;
             break;
@@ -968,12 +1006,30 @@ static int dbgfR3VMMCmd(PVM pVM, DBGFCMD enmCmd, PDBGFCMDDATA pCmdData, bool *pf
         case DBGFCMD_SINGLE_STEP:
         {
             Log2(("Single step\n"));
-            rc = VINF_EM_DBG_STEP;
             /** @todo SMP */
             PVMCPU pVCpu = VMMGetCpu0(pVM);
-            pVCpu->dbgf.s.fSingleSteppingRaw = true;
-            fSendEvent = false;
-            fResume = true;
+            if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_OVER)
+            {
+                if (dbgfStepGetCurInstrType(pVM, pVCpu) == DBGFSTEPINSTRTYPE_CALL)
+                    pVM->dbgf.s.SteppingFilter.uCallDepth++;
+            }
+            if (pVM->dbgf.s.SteppingFilter.cMaxSteps > 0)
+            {
+                pVCpu->dbgf.s.fSingleSteppingRaw = true;
+                fSendEvent = false;
+                fResume = true;
+                rc = VINF_EM_DBG_STEP;
+            }
+            else
+            {
+                /* Stop after zero steps. Nonsense, but whatever. */
+                pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID;
+                pVM->dbgf.s.DbgEvent.enmCtx  = dbgfR3FigureEventCtx(pVM);
+                pVM->dbgf.s.DbgEvent.enmType = pVM->dbgf.s.DbgEvent.enmCtx != DBGFEVENTCTX_HYPER
+                                             ? DBGFEVENT_STEPPED : DBGFEVENT_STEPPED_HYPER;
+                fSendEvent = false;
+                fResume = false;
+            }
             break;
         }
 
@@ -1278,6 +1334,207 @@ VMMR3DECL(int) DBGFR3Resume(PUVM pUVM)
 
 
 /**
+ * Classifies the current instruction.
+ *
+ * @returns Type of instruction.
+ * @param   pVM                 The cross context VM structure.
+ * @param   pVCpu               The current CPU.
+ * @thread  EMT(pVCpu)
+ */
+static DBGFSTEPINSTRTYPE dbgfStepGetCurInstrType(PVM pVM, PVMCPU pVCpu)
+{
+    /*
+     * Read the instruction.
+     */
+    bool     fIsHyper = dbgfR3FigureEventCtx(pVM) == DBGFEVENTCTX_HYPER;
+    size_t   cbRead   = 0;
+    uint8_t  abOpcode[16] = { 0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0 };
+    int rc = PGMR3DbgReadGCPtr(pVM, abOpcode, !fIsHyper ? CPUMGetGuestFlatPC(pVCpu) : CPUMGetHyperRIP(pVCpu),
+                               sizeof(abOpcode) - 1, 0 /*fFlags*/, &cbRead);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Do minimal parsing.  No real need to involve the disassembler here.
+         */
+        uint8_t *pb = abOpcode;
+        for (;;)
+        {
+            switch (*pb++)
+            {
+                default:
+                    return DBGFSTEPINSTRTYPE_OTHER;
+
+                case 0xe8: /* call rel16/32 */
+                case 0x9a: /* call farptr */
+                case 0xcc: /* int3 */
+                case 0xcd: /* int xx */
+                // case 0xce: /* into */
+                    return DBGFSTEPINSTRTYPE_CALL;
+
+                case 0xc2: /* ret xx */
+                case 0xc3: /* ret */
+                case 0xca: /* retf xx */
+                case 0xcb: /* retf */
+                case 0xcf: /* iret */
+                    return DBGFSTEPINSTRTYPE_RET;
+
+                case 0xff:
+                    if (   ((*pb >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) == 2  /* call indir */
+                        || ((*pb >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) == 3) /* call indir-farptr */
+                        return DBGFSTEPINSTRTYPE_CALL;
+                    return DBGFSTEPINSTRTYPE_OTHER;
+
+                case 0x0f:
+                    switch (*pb++)
+                    {
+                        case 0x05: /* syscall */
+                        case 0x34: /* sysenter */
+                            return DBGFSTEPINSTRTYPE_CALL;
+                        case 0x07: /* sysret */
+                        case 0x35: /* sysexit */
+                            return DBGFSTEPINSTRTYPE_RET;
+                    }
+                    break;
+
+                /* Must handle some REX prefixes. So we do all normal prefixes. */
+                case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
+                case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+                    if (fIsHyper) /* ASSUMES 32-bit raw-mode! */
+                        return DBGFSTEPINSTRTYPE_OTHER;
+                    if (!CPUMIsGuestIn64BitCode(pVCpu))
+                        return DBGFSTEPINSTRTYPE_OTHER;
+                    break;
+
+                case 0x2e: /* CS */
+                case 0x36: /* SS */
+                case 0x3e: /* DS */
+                case 0x26: /* ES */
+                case 0x64: /* FS */
+                case 0x65: /* GS */
+                case 0x66: /* op size */
+                case 0x67: /* addr size */
+                case 0xf0: /* lock */
+                case 0xf2: /* REPNZ */
+                case 0xf3: /* REPZ */
+                    break;
+            }
+        }
+    }
+
+    return DBGFSTEPINSTRTYPE_INVALID;
+}
+
+
+/**
+ * Checks if the stepping has reached a stop point.
+ *
+ * Called when raising a stepped event.
+ *
+ * @returns true if the event should be raised, false if we should take one more
+ *          step first.
+ * @param   pVM         The cross context VM structure.
+ * @param   pVCpu       The cross context per CPU structure of the calling EMT.
+ * @thread  EMT(pVCpu)
+ */
+static bool dbgfStepAreWeThereYet(PVM pVM, PVMCPU pVCpu)
+{
+    /*
+     * Check valid pVCpu and that it matches the CPU one stepping.
+     */
+    if (pVCpu)
+    {
+        if (pVCpu->idCpu == pVM->dbgf.s.SteppingFilter.idCpu)
+        {
+            /*
+             * Increase the number of steps and see if we've reached the max.
+             */
+            pVM->dbgf.s.SteppingFilter.cSteps++;
+            if (pVM->dbgf.s.SteppingFilter.cSteps < pVM->dbgf.s.SteppingFilter.cMaxSteps)
+            {
+                /*
+                 * Check PC and SP address filtering.
+                 */
+                if (pVM->dbgf.s.SteppingFilter.fFlags & (DBGF_STEP_F_STOP_ON_ADDRESS | DBGF_STEP_F_STOP_ON_STACK_POP))
+                {
+                    bool fIsHyper = dbgfR3FigureEventCtx(pVM) == DBGFEVENTCTX_HYPER;
+                    if (   (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_ADDRESS)
+                        && pVM->dbgf.s.SteppingFilter.AddrPc == (!fIsHyper ? CPUMGetGuestFlatPC(pVCpu) : CPUMGetHyperRIP(pVCpu)))
+                        return true;
+                    if (   (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_STACK_POP)
+                        &&     (!fIsHyper ? CPUMGetGuestFlatSP(pVCpu) : (uint64_t)CPUMGetHyperESP(pVCpu))
+                             - pVM->dbgf.s.SteppingFilter.AddrStackPop
+                           < pVM->dbgf.s.SteppingFilter.cbStackPop)
+                        return true;
+                }
+
+                /*
+                 * Do step-over filtering separate from the step-into one.
+                 */
+                if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_OVER)
+                {
+                    DBGFSTEPINSTRTYPE enmType = dbgfStepGetCurInstrType(pVM, pVCpu);
+                    switch (enmType)
+                    {
+                        default:
+                            if (   pVM->dbgf.s.SteppingFilter.uCallDepth != 0
+                                || (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_FILTER_MASK))
+                                break;
+                            return true;
+                        case DBGFSTEPINSTRTYPE_CALL:
+                            if (   (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_CALL)
+                                && pVM->dbgf.s.SteppingFilter.uCallDepth == 0)
+                                return true;
+                            pVM->dbgf.s.SteppingFilter.uCallDepth++;
+                            break;
+                        case DBGFSTEPINSTRTYPE_RET:
+                            if (pVM->dbgf.s.SteppingFilter.uCallDepth == 0)
+                            {
+                                if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_RET)
+                                    return true;
+                                /* If after return, we use the cMaxStep limit to stop the next time. */
+                                if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_AFTER_RET)
+                                    pVM->dbgf.s.SteppingFilter.cMaxSteps = pVM->dbgf.s.SteppingFilter.cSteps + 1;
+                            }
+                            else if (pVM->dbgf.s.SteppingFilter.uCallDepth > 0)
+                                pVM->dbgf.s.SteppingFilter.uCallDepth--;
+                            break;
+                    }
+                    return false;
+                }
+                /*
+                 * Filtered step-into.
+                 */
+                else if (  pVM->dbgf.s.SteppingFilter.fFlags
+                         & (DBGF_STEP_F_STOP_ON_CALL | DBGF_STEP_F_STOP_ON_RET | DBGF_STEP_F_STOP_AFTER_RET))
+                {
+                    DBGFSTEPINSTRTYPE enmType = dbgfStepGetCurInstrType(pVM, pVCpu);
+                    switch (enmType)
+                    {
+                        default:
+                            break;
+                        case DBGFSTEPINSTRTYPE_CALL:
+                            if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_CALL)
+                                return true;
+                            break;
+                        case DBGFSTEPINSTRTYPE_RET:
+                            if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_RET)
+                                return true;
+                            /* If after return, we use the cMaxStep limit to stop the next time. */
+                            if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_AFTER_RET)
+                                pVM->dbgf.s.SteppingFilter.cMaxSteps = pVM->dbgf.s.SteppingFilter.cSteps + 1;
+                            break;
+                    }
+                    return false;
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+
+/**
  * Step Into.
  *
  * A single step event is generated from this command.
@@ -1289,6 +1546,52 @@ VMMR3DECL(int) DBGFR3Resume(PUVM pUVM)
  */
 VMMR3DECL(int) DBGFR3Step(PUVM pUVM, VMCPUID idCpu)
 {
+    return DBGFR3StepEx(pUVM, idCpu, DBGF_STEP_F_INTO, NULL, NULL, 0, 1);
+}
+
+
+/**
+ * Full fleged step.
+ *
+ * This extended stepping API allows for doing multiple steps before raising an
+ * event, helping implementing step over, step out and other more advanced
+ * features.
+ *
+ * Like the DBGFR3Step() API, this will normally generate a DBGFEVENT_STEPPED or
+ * DBGFEVENT_STEPPED_EVENT.  However the stepping may be interrupted by other
+ * events, which will abort the stepping.
+ *
+ * The stop on pop area feature is for safeguarding step out.
+ *
+ * Please note though, that it will always use stepping and never breakpoints.
+ * While this allows for a much greater flexibility it can at times be rather
+ * slow.
+ *
+ * @returns VBox status code.
+ * @param   pUVM            The user mode VM handle.
+ * @param   idCpu           The ID of the CPU to single step on.
+ * @param   fFlags          Flags controlling the stepping, DBGF_STEP_F_XXX.
+ *                          Either DBGF_STEP_F_INTO or DBGF_STEP_F_OVER must
+ *                          always be specified.
+ * @param   pStopPcAddr     Address to stop executing at.  Completely ignored
+ *                          unless DBGF_STEP_F_STOP_ON_ADDRESS is specified.
+ * @param   pStopPopAddr    Stack address that SP must be lower than when
+ *                          performing DBGF_STEP_F_STOP_ON_STACK_POP filtering.
+ * @param   cbStopPop       The range starting at @a pStopPopAddr which is
+ *                          considered to be within the same thread stack. Note
+ *                          that the API allows @a pStopPopAddr and @a cbStopPop
+ *                          to form an area that wraps around and it will
+ *                          consider the part starting at 0 as included.
+ * @param   cMaxSteps       The maximum number of steps to take.  This is to
+ *                          prevent stepping for ever, so passing UINT32_MAX is
+ *                          not recommended.
+ *
+ * @remarks The two address arguments must be guest context virtual addresses,
+ *          or HMA.  The code doesn't make much of a point of out HMA, though.
+ */
+VMMR3DECL(int) DBGFR3StepEx(PUVM pUVM, VMCPUID idCpu, uint32_t fFlags, PCDBGFADDRESS pStopPcAddr,
+                            PCDBGFADDRESS pStopPopAddr, RTGCUINTPTR cbStopPop, uint32_t cMaxSteps)
+{
     /*
      * Check state.
      */
@@ -1296,15 +1599,57 @@ VMMR3DECL(int) DBGFR3Step(PUVM pUVM, VMCPUID idCpu)
     PVM pVM = pUVM->pVM;
     VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
     AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_PARAMETER);
+    AssertReturn(!(fFlags & ~DBGF_STEP_F_VALID_MASK), VERR_INVALID_FLAGS);
+    AssertReturn(RT_BOOL(fFlags & DBGF_STEP_F_INTO) != RT_BOOL(fFlags & DBGF_STEP_F_OVER), VERR_INVALID_FLAGS);
+    if (fFlags & DBGF_STEP_F_STOP_ON_ADDRESS)
+    {
+        AssertReturn(RT_VALID_PTR(pStopPcAddr), VERR_INVALID_POINTER);
+        AssertReturn(DBGFADDRESS_IS_VALID(pStopPcAddr), VERR_INVALID_PARAMETER);
+        AssertReturn(DBGFADDRESS_IS_VIRT_GC(pStopPcAddr), VERR_INVALID_PARAMETER);
+    }
+    AssertReturn(!(fFlags & DBGF_STEP_F_STOP_ON_STACK_POP) || RT_VALID_PTR(pStopPopAddr), VERR_INVALID_POINTER);
+    if (fFlags & DBGF_STEP_F_STOP_ON_STACK_POP)
+    {
+        AssertReturn(RT_VALID_PTR(pStopPopAddr), VERR_INVALID_POINTER);
+        AssertReturn(DBGFADDRESS_IS_VALID(pStopPopAddr), VERR_INVALID_PARAMETER);
+        AssertReturn(DBGFADDRESS_IS_VIRT_GC(pStopPopAddr), VERR_INVALID_PARAMETER);
+        AssertReturn(cbStopPop > 0, VERR_INVALID_PARAMETER);
+    }
+
     AssertReturn(pVM->dbgf.s.fAttached, VERR_DBGF_NOT_ATTACHED);
     if (RT_LIKELY(RTSemPongIsSpeaker(&pVM->dbgf.s.PingPong)))
     { /* likely */ }
     else
         return VERR_SEM_OUT_OF_TURN;
+    Assert(pVM->dbgf.s.SteppingFilter.idCpu == NIL_VMCPUID);
 
     /*
      * Send the ping back to the emulation thread telling it to run.
      */
+    if (fFlags == DBGF_STEP_F_INTO)
+        pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID;
+    else
+        pVM->dbgf.s.SteppingFilter.idCpu = idCpu;
+    pVM->dbgf.s.SteppingFilter.fFlags = fFlags;
+    if (fFlags & DBGF_STEP_F_STOP_ON_ADDRESS)
+        pVM->dbgf.s.SteppingFilter.AddrPc = pStopPcAddr->FlatPtr;
+    else
+        pVM->dbgf.s.SteppingFilter.AddrPc = 0;
+    if (fFlags & DBGF_STEP_F_STOP_ON_STACK_POP)
+    {
+        pVM->dbgf.s.SteppingFilter.AddrStackPop = pStopPopAddr->FlatPtr;
+        pVM->dbgf.s.SteppingFilter.cbStackPop   = cbStopPop;
+    }
+    else
+    {
+        pVM->dbgf.s.SteppingFilter.AddrStackPop = 0;
+        pVM->dbgf.s.SteppingFilter.cbStackPop   = RTGCPTR_MAX;
+    }
+
+    pVM->dbgf.s.SteppingFilter.cMaxSteps    = cMaxSteps;
+    pVM->dbgf.s.SteppingFilter.cSteps       = 0;
+    pVM->dbgf.s.SteppingFilter.uCallDepth   = 0;
+
 /** @todo SMP (idCpu) */
     dbgfR3SetCmd(pVM, DBGFCMD_SINGLE_STEP);
     int rc = RTSemPong(&pVM->dbgf.s.PingPong);
diff --git a/src/VBox/VMM/VMMR3/DBGFCpu.cpp b/src/VBox/VMM/VMMR3/DBGFCpu.cpp
index 39d3ef3..763f2d2 100644
--- a/src/VBox/VMM/VMMR3/DBGFCpu.cpp
+++ b/src/VBox/VMM/VMMR3/DBGFCpu.cpp
@@ -108,6 +108,44 @@ VMMR3DECL(bool) DBGFR3CpuIsIn64BitCode(PUVM pUVM, VMCPUID idCpu)
 
 
 /**
+ * Wrapper around CPUMIsGuestInV86Code.
+ *
+ * @returns VINF_SUCCESS.
+ * @param   pVM             The cross context VM structure.
+ * @param   idCpu           The current CPU ID.
+ * @param   pfInV86Code     Where to return the result.
+ */
+static DECLCALLBACK(int) dbgfR3CpuInV86Code(PVM pVM, VMCPUID idCpu, bool *pfInV86Code)
+{
+    Assert(idCpu == VMMGetCpuId(pVM));
+    PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu);
+    *pfInV86Code = CPUMIsGuestInV86ModeEx(CPUMQueryGuestCtxPtr(pVCpu));
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Checks if the given CPU is executing V8086 code or not.
+ *
+ * @returns true / false accordingly.
+ * @param   pUVM        The user mode VM handle.
+ * @param   idCpu       The target CPU ID.
+ */
+VMMR3DECL(bool) DBGFR3CpuIsInV86Code(PUVM pUVM, VMCPUID idCpu)
+{
+    UVM_ASSERT_VALID_EXT_RETURN(pUVM, false);
+    VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, false);
+    AssertReturn(idCpu < pUVM->pVM->cCpus, false);
+
+    bool fInV86Code;
+    int rc = VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3CpuInV86Code, 3, pUVM->pVM, idCpu, &fInV86Code);
+    if (RT_FAILURE(rc))
+        return false;
+    return fInV86Code;
+}
+
+
+/**
  * Get the number of CPUs (or threads if you insist).
  *
  * @returns The number of CPUs
diff --git a/src/VBox/VMM/VMMR3/DBGFDisas.cpp b/src/VBox/VMM/VMMR3/DBGFDisas.cpp
index 630a582..ef11c1e 100644
--- a/src/VBox/VMM/VMMR3/DBGFDisas.cpp
+++ b/src/VBox/VMM/VMMR3/DBGFDisas.cpp
@@ -132,6 +132,7 @@ static int dbgfR3DisasInstrFirst(PVM pVM, PVMCPU pVCpu, PDBGFSELINFO pSelInfo, P
     {
         default:
             AssertFailed();
+            /* fall thru */
         case DBGF_DISAS_FLAGS_DEFAULT_MODE:
             enmCpuMode   = pState->f64Bits
                          ? DISCPUMODE_64BIT
diff --git a/src/VBox/VMM/VMMR3/EM.cpp b/src/VBox/VMM/VMMR3/EM.cpp
index c1e651d..ca6ffb0 100644
--- a/src/VBox/VMM/VMMR3/EM.cpp
+++ b/src/VBox/VMM/VMMR3/EM.cpp
@@ -2280,6 +2280,7 @@ VMMR3_INT_DECL(int) EMR3ExecuteVM(PVM pVM, PVMCPU pVCpu)
                         break;
                     }
                     /* fall through and get scheduled. */
+                    /* fall thru */
 
                 /*
                  * Reschedule.
@@ -2442,6 +2443,8 @@ VMMR3_INT_DECL(int) EMR3ExecuteVM(PVM pVM, PVMCPU pVCpu)
                         continue;
                     }
                     /* Else fall through and trigger a guru. */
+                    /* fall thru */
+
                 case VERR_VMM_RING0_ASSERTION:
                     Log(("EMR3ExecuteVM: %Rrc: %d -> %d (EMSTATE_GURU_MEDITATION)\n", rc, enmOldState, EMSTATE_GURU_MEDITATION));
                     pVCpu->em.s.enmState = EMSTATE_GURU_MEDITATION;
diff --git a/src/VBox/VMM/VMMR3/EMRaw.cpp b/src/VBox/VMM/VMMR3/EMRaw.cpp
index 6ddd29c..3cd6081 100644
--- a/src/VBox/VMM/VMMR3/EMRaw.cpp
+++ b/src/VBox/VMM/VMMR3/EMRaw.cpp
@@ -1030,6 +1030,7 @@ static int emR3RawPrivileged(PVM pVM, PVMCPU pVCpu)
                             pCtx->eip = pOrgInstrGC;
                     }
                     /* no break; we could just return VINF_EM_HALT here */
+                    /* fall thru */
 
                 case OP_MOV_CR:
                 case OP_MOV_DR:
diff --git a/src/VBox/VMM/VMMR3/HM.cpp b/src/VBox/VMM/VMMR3/HM.cpp
index 6e26cf8..7e50118 100644
--- a/src/VBox/VMM/VMMR3/HM.cpp
+++ b/src/VBox/VMM/VMMR3/HM.cpp
@@ -1619,17 +1619,17 @@ static int hmR3InitFinalizeR0Amd(PVM pVM)
     static const struct { uint32_t fFlag; const char *pszName; } s_aSvmFeatures[] =
     {
 #define HMSVM_REPORT_FEATURE(a_StrDesc, a_Define) { a_Define, a_StrDesc }
-        HMSVM_REPORT_FEATURE("NESTED_PAGING",          AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING),
-        HMSVM_REPORT_FEATURE("LBR_VIRT",               AMD_CPUID_SVM_FEATURE_EDX_LBR_VIRT),
-        HMSVM_REPORT_FEATURE("SVM_LOCK",               AMD_CPUID_SVM_FEATURE_EDX_SVM_LOCK),
-        HMSVM_REPORT_FEATURE("NRIP_SAVE",              AMD_CPUID_SVM_FEATURE_EDX_NRIP_SAVE),
-        HMSVM_REPORT_FEATURE("TSC_RATE_MSR",           AMD_CPUID_SVM_FEATURE_EDX_TSC_RATE_MSR),
-        HMSVM_REPORT_FEATURE("VMCB_CLEAN",             AMD_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN),
-        HMSVM_REPORT_FEATURE("FLUSH_BY_ASID",          AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID),
-        HMSVM_REPORT_FEATURE("DECODE_ASSIST",          AMD_CPUID_SVM_FEATURE_EDX_DECODE_ASSIST),
-        HMSVM_REPORT_FEATURE("PAUSE_FILTER",           AMD_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER),
-        HMSVM_REPORT_FEATURE("PAUSE_FILTER_THRESHOLD", AMD_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD),
-        HMSVM_REPORT_FEATURE("AVIC",                   AMD_CPUID_SVM_FEATURE_EDX_AVIC),
+        HMSVM_REPORT_FEATURE("NESTED_PAGING",          X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING),
+        HMSVM_REPORT_FEATURE("LBR_VIRT",               X86_CPUID_SVM_FEATURE_EDX_LBR_VIRT),
+        HMSVM_REPORT_FEATURE("SVM_LOCK",               X86_CPUID_SVM_FEATURE_EDX_SVM_LOCK),
+        HMSVM_REPORT_FEATURE("NRIP_SAVE",              X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE),
+        HMSVM_REPORT_FEATURE("TSC_RATE_MSR",           X86_CPUID_SVM_FEATURE_EDX_TSC_RATE_MSR),
+        HMSVM_REPORT_FEATURE("VMCB_CLEAN",             X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN),
+        HMSVM_REPORT_FEATURE("FLUSH_BY_ASID",          X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID),
+        HMSVM_REPORT_FEATURE("DECODE_ASSIST",          X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSIST),
+        HMSVM_REPORT_FEATURE("PAUSE_FILTER",           X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER),
+        HMSVM_REPORT_FEATURE("PAUSE_FILTER_THRESHOLD", X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD),
+        HMSVM_REPORT_FEATURE("AVIC",                   X86_CPUID_SVM_FEATURE_EDX_AVIC),
 #undef HMSVM_REPORT_FEATURE
     };
 
@@ -1649,7 +1649,7 @@ static int hmR3InitFinalizeR0Amd(PVM pVM)
      * Nested paging is determined in HMR3Init, verify the sanity of that.
      */
     AssertLogRelReturn(   !pVM->hm.s.fNestedPaging
-                       || (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING),
+                       || (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING),
                        VERR_HM_IPE_1);
 
 #if 0
diff --git a/src/VBox/VMM/VMMR3/IEMR3.cpp b/src/VBox/VMM/VMMR3/IEMR3.cpp
index 0f3e507..554e5f9 100644
--- a/src/VBox/VMM/VMMR3/IEMR3.cpp
+++ b/src/VBox/VMM/VMMR3/IEMR3.cpp
@@ -22,6 +22,7 @@
 #define LOG_GROUP LOG_GROUP_EM
 #include <VBox/vmm/iem.h>
 #include <VBox/vmm/cpum.h>
+#include <VBox/vmm/mm.h>
 #include "IEMInternal.h"
 #include <VBox/vmm/vm.h>
 #include <VBox/err.h>
@@ -113,6 +114,22 @@ VMMR3DECL(int)      IEMR3Init(PVM pVM)
         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.DataTlb.uTlbPhysRev, STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
                         "Data TLB physical revision",               "/IEM/CPU%u/DataTlb-PhysRev", idCpu);
 
+#if defined(VBOX_WITH_STATISTICS) && !defined(DOXYGEN_RUNNING)
+        /* Allocate instruction statistics and register them. */
+        pVCpu->iem.s.pStatsR3 = (PIEMINSTRSTATS)MMR3HeapAllocZ(pVM, MM_TAG_IEM, sizeof(IEMINSTRSTATS));
+        AssertLogRelReturn(pVCpu->iem.s.pStatsR3, VERR_NO_MEMORY);
+        int rc = MMHyperAlloc(pVM, sizeof(IEMINSTRSTATS), sizeof(uint64_t), MM_TAG_IEM, (void **)&pVCpu->iem.s.pStatsCCR3);
+        AssertLogRelRCReturn(rc, rc);
+        pVCpu->iem.s.pStatsR0 = MMHyperR3ToR0(pVM, pVCpu->iem.s.pStatsCCR3);
+        pVCpu->iem.s.pStatsRC = MMHyperR3ToR0(pVM, pVCpu->iem.s.pStatsCCR3);
+# define IEM_DO_INSTR_STAT(a_Name, a_szDesc) \
+            STAMR3RegisterF(pVM, &pVCpu->iem.s.pStatsCCR3->a_Name, STAMTYPE_U32_RESET, STAMVISIBILITY_USED, \
+                            STAMUNIT_COUNT, a_szDesc, "/IEM/CPU%u/instr-RZ/" #a_Name, idCpu); \
+            STAMR3RegisterF(pVM, &pVCpu->iem.s.pStatsR3->a_Name, STAMTYPE_U32_RESET, STAMVISIBILITY_USED, \
+                            STAMUNIT_COUNT, a_szDesc, "/IEM/CPU%u/instr-R3/" #a_Name, idCpu);
+# include "IEMInstructionStatisticsTmpl.h"
+# undef IEM_DO_INSTR_STAT
+#endif
 
         /*
          * Host and guest CPU information.
@@ -161,6 +178,14 @@ VMMR3DECL(int)      IEMR3Init(PVM pVM)
 VMMR3DECL(int)      IEMR3Term(PVM pVM)
 {
     NOREF(pVM);
+#if defined(VBOX_WITH_STATISTICS) && !defined(DOXYGEN_RUNNING)
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPU pVCpu = &pVM->aCpus[idCpu];
+        MMR3HeapFree(pVCpu->iem.s.pStatsR3);
+        pVCpu->iem.s.pStatsR3 = NULL;
+    }
+#endif
     return VINF_SUCCESS;
 }
 
@@ -168,6 +193,10 @@ VMMR3DECL(int)      IEMR3Term(PVM pVM)
 VMMR3DECL(void)     IEMR3Relocate(PVM pVM)
 {
     for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
         pVM->aCpus[idCpu].iem.s.pCtxRC = VM_RC_ADDR(pVM, pVM->aCpus[idCpu].iem.s.pCtxR3);
+        if (pVM->aCpus[idCpu].iem.s.pStatsRC)
+            pVM->aCpus[idCpu].iem.s.pStatsRC = MMHyperR3ToRC(pVM, pVM->aCpus[idCpu].iem.s.pStatsCCR3);
+    }
 }
 
diff --git a/src/VBox/VMM/VMMR3/PATM.cpp b/src/VBox/VMM/VMMR3/PATM.cpp
index 2b3d797..a4d4752 100644
--- a/src/VBox/VMM/VMMR3/PATM.cpp
+++ b/src/VBox/VMM/VMMR3/PATM.cpp
@@ -1555,7 +1555,7 @@ static int patmAnalyseBlockCallback(PVM pVM, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_
             }
             break;  /* sti doesn't mark the end of a pushf block; only popf does. */
         }
-        /* else: fall through. */
+        /* fall thru */
     case OP_RETN: /* exit point for function replacement */
         return VINF_SUCCESS;
 
@@ -5204,10 +5204,10 @@ VMMR3_INT_DECL(int) PATMR3ReadOrgInstr(PVM pVM, RTGCPTR32 GCPtrInstr, uint8_t *p
             cbToRead = cbMax;
         switch (cbToRead)
         {
-            case 5: pbDst[4] = pbSrc[4];
-            case 4: pbDst[3] = pbSrc[3];
-            case 3: pbDst[2] = pbSrc[2];
-            case 2: pbDst[1] = pbSrc[1];
+            case 5: pbDst[4] = pbSrc[4]; /* fall thru */
+            case 4: pbDst[3] = pbSrc[3]; /* fall thru */
+            case 3: pbDst[2] = pbSrc[2]; /* fall thru */
+            case 2: pbDst[1] = pbSrc[1]; /* fall thru */
             case 1: pbDst[0] = pbSrc[0];
                 break;
             default:
diff --git a/src/VBox/VMM/VMMR3/PATMA.asm b/src/VBox/VMM/VMMR3/PATMA.asm
index bc47148..64029f9 100644
--- a/src/VBox/VMM/VMMR3/PATMA.asm
+++ b/src/VBox/VMM/VMMR3/PATMA.asm
@@ -58,7 +58,7 @@ istruc PATCHASMRECORD
     at PATCHASMRECORD.offSizeOverride,DD          0
     at PATCHASMRECORD.cbFunction,     DD          NAME(%1 %+ _EndProc) - NAME(%1)
     at PATCHASMRECORD.cRelocs,        DD          %2
-iend                                                         
+iend
 %endmacro
 
 ;;
@@ -75,7 +75,7 @@ istruc PATCHASMRECORD
     at PATCHASMRECORD.offSizeOverride,DD          0
     at PATCHASMRECORD.cbFunction,     DD          NAME(%1 %+ _EndProc) - NAME(%1)
     at PATCHASMRECORD.cRelocs,        DD          %3
-iend                                                         
+iend
 %endmacro
 
 ;;
@@ -106,7 +106,7 @@ istruc PATCHASMRECORD
 %endif
     at PATCHASMRECORD.cbFunction,     DD          NAME(%1 %+ _EndProc) - NAME(%1)
     at PATCHASMRECORD.cRelocs,        DD          %5
-iend                                                         
+iend
 %endmacro
 
 ;;
@@ -123,7 +123,7 @@ BEGINCODE
 %endmacro
 
 ;;
-; Switches to the data section for the read-only patch descriptor data and 
+; Switches to the data section for the read-only patch descriptor data and
 ; aligns it appropriately.
 ;
 ; @remarks This section must be different from the patch code section!
@@ -176,7 +176,7 @@ BEGIN_PATCH_CODE_SECTION_NO_ALIGN
 
 ;;
 ; Ends a patch.
-; 
+;
 ; This terminates the function and fixup array.
 ;
 ; @param %1     The patch record name (externally visible).
@@ -197,7 +197,7 @@ BEGIN_PATCH_CODE_SECTION_NO_ALIGN
 ; Switch to 32-bit mode (x86).
 ;
 %ifdef RT_ARCH_AMD64
- BITS 32 
+ BITS 32
 %endif
 
 
@@ -356,7 +356,7 @@ ENDPROC     PATMClearInhibitIRQContIF0
 BEGIN_PATCH_RODATA_SECTION
 GLOBALNAME g_patmClearInhibitIRQContIF0Record
     PATCHASMRECORD_INIT PATMClearInhibitIRQContIF0, 11
-    DD      PATM_ASMFIX_INTERRUPTFLAG,      0   
+    DD      PATM_ASMFIX_INTERRUPTFLAG,      0
     DD      PATM_ASMFIX_INHIBITIRQADDR,     0
     DD      PATM_ASMFIX_VMFLAGS,            0
     DD      PATM_ASMFIX_VM_FORCEDACTIONS,   0
@@ -1603,9 +1603,9 @@ GLOBALNAME g_patmIretRing1Record
     PATCHASMRECORD_INIT PATMIretRing1Replacement, 25
 %endif
     DD      PATM_ASMFIX_INTERRUPTFLAG,      0
-%ifdef PATM_LOG_PATCHIRET            
+%ifdef PATM_LOG_PATCHIRET
     DD      PATM_ASMFIX_PENDINGACTION,      0
-%endif                               
+%endif
     DD      PATM_ASMFIX_VM_FORCEDACTIONS,   0
     DD      PATM_ASMFIX_TEMP_EAX,           0
     DD      PATM_ASMFIX_TEMP_ECX,           0
@@ -1770,7 +1770,7 @@ ENDPROC PATMJEcxReplacement
 
 ; Patch record for 'JEcx'
 BEGIN_PATCH_RODATA_SECTION
-GLOBALNAME g_patmJEcxRecord 
+GLOBALNAME g_patmJEcxRecord
     PATCHASMRECORD_INIT_EX PATMJEcxReplacement, , PATMJEcxJump, PATMJEcxSizeOverride, 3
     DD      PATM_ASMFIX_INTERRUPTFLAG, 0
     DD      PATM_ASMFIX_INTERRUPTFLAG, 0
@@ -2010,9 +2010,9 @@ GLOBALNAME g_patmLookupAndCallRecord
     DD      PATM_ASMFIX_STACKBASE,              0
     DD      PATM_ASMFIX_STACKBASE_GUEST,        0
     DD      PATM_ASMFIX_CALL_PATCH_TARGET_ADDR, 0
-%ifdef PATM_LOG_PATCHINSTR               
+%ifdef PATM_LOG_PATCHINSTR
     DD      PATM_ASMFIX_PENDINGACTION,          0
-%endif                                   
+%endif
     DD      PATM_ASMFIX_CALL_RETURN_ADDR,       0
     DD      PATM_ASMFIX_CALL_PATCH_TARGET_ADDR, 0
     DD      0ffffffffh, 0ffffffffh
@@ -2462,12 +2462,12 @@ GLOBALNAME g_patmRetFunctionRecord
     DD      PATM_ASMFIX_STACKBASE_GUEST, 0
     DD      PATM_ASMFIX_STACKBASE,       0
     DD      PATM_ASMFIX_PATCHBASE,       0
-%ifdef PATM_LOG_PATCHINSTR        
+%ifdef PATM_LOG_PATCHINSTR
     DD      PATM_ASMFIX_PENDINGACTION,   0
-%endif                            
+%endif
     DD      PATM_ASMFIX_PENDINGACTION,   0
     DD      PATM_ASMFIX_PATCHBASE,       0
-%ifdef PATM_LOG_PATCHINSTR        
+%ifdef PATM_LOG_PATCHINSTR
     DD      PATM_ASMFIX_PENDINGACTION,   0
 %endif
     DD      0ffffffffh, 0ffffffffh
@@ -2523,9 +2523,9 @@ GLOBALNAME g_patmCheckIFRecord
     DD      PATM_ASMFIX_VMFLAGS,       0
     DD      PATM_ASMFIX_INTERRUPTFLAG, 0
     DD      PATM_ASMFIX_STACKPTR,      0
-%ifdef PATM_LOG_PATCHINSTR      
+%ifdef PATM_LOG_PATCHINSTR
     DD      PATM_ASMFIX_PENDINGACTION, 0
-%endif                          
+%endif
     DD      PATM_ASMFIX_INTERRUPTFLAG, 0
     DD      0ffffffffh, 0ffffffffh
 
diff --git a/src/VBox/VMM/VMMR3/PATMA.mac b/src/VBox/VMM/VMMR3/PATMA.mac
index 490c0ad..7b29948 100644
--- a/src/VBox/VMM/VMMR3/PATMA.mac
+++ b/src/VBox/VMM/VMMR3/PATMA.mac
@@ -75,7 +75,7 @@
 ;; @}
 
 
-;; Everything except IOPL, NT, IF, VM, VIF, VIP and RF 
+;; Everything except IOPL, NT, IF, VM, VIF, VIP and RF
 %define PATM_FLAGS_MASK      (X86_EFL_CF|X86_EFL_PF|X86_EFL_AF|X86_EFL_ZF|X86_EFL_SF|X86_EFL_TF|X86_EFL_DF|X86_EFL_OF|X86_EFL_AC|X86_EFL_ID)
 
 ; currently only IF & IOPL
@@ -133,10 +133,10 @@ struc PATCHASMRECORD
     .pbFunction         RTCCPTR_RES 1
     ;; Offset of the jump table?
     .offJump            resd 1
-    ;; Used only by loop/loopz/loopnz. 
-    .offRelJump         resd 1        
+    ;; Used only by loop/loopz/loopnz.
+    .offRelJump         resd 1
     ;; Size override byte position.
-    .offSizeOverride    resd 1        
+    .offSizeOverride    resd 1
     ;; The size of the patch function.
     .cbFunction         resd 1
     ;; The number of relocations in aRelocs.
diff --git a/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp b/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp
index dbab87a..921358a 100644
--- a/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp
+++ b/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp
@@ -1464,9 +1464,11 @@ static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAI
              * but to get the cause of the error (disk full, file too big, I/O error, ...)
              * the transfer needs to be continued.
              */
-            if (RT_UNLIKELY(   cbTransfered < pTask->DataSeg.cbSeg
+            pTask->cbTransfered += cbTransfered;
+
+            if (RT_UNLIKELY(   pTask->cbTransfered < pTask->DataSeg.cbSeg
                             || (   pTask->cbBounceBuffer
-                                && cbTransfered < pTask->cbBounceBuffer)))
+                                && pTask->cbTransfered < pTask->cbBounceBuffer)))
             {
                 RTFOFF offStart;
                 size_t cbToTransfer;
@@ -1479,16 +1481,16 @@ static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAI
                 if (pTask->cbBounceBuffer)
                 {
                     AssertPtr(pTask->pvBounceBuffer);
-                    offStart     = (pTask->Off & ~((RTFOFF)512-1)) + cbTransfered;
-                    cbToTransfer = pTask->cbBounceBuffer - cbTransfered;
-                    pbBuf        = (uint8_t *)pTask->pvBounceBuffer + cbTransfered;
+                    offStart     = (pTask->Off & ~((RTFOFF)512-1)) + pTask->cbTransfered;
+                    cbToTransfer = pTask->cbBounceBuffer - pTask->cbTransfered;
+                    pbBuf        = (uint8_t *)pTask->pvBounceBuffer + pTask->cbTransfered;
                 }
                 else
                 {
                     Assert(!pTask->pvBounceBuffer);
-                    offStart     = pTask->Off + cbTransfered;
-                    cbToTransfer = pTask->DataSeg.cbSeg - cbTransfered;
-                    pbBuf        = (uint8_t *)pTask->DataSeg.pvSeg + cbTransfered;
+                    offStart     = pTask->Off + pTask->cbTransfered;
+                    cbToTransfer = pTask->DataSeg.cbSeg - pTask->cbTransfered;
+                    pbBuf        = (uint8_t *)pTask->DataSeg.pvSeg + pTask->cbTransfered;
                 }
 
                 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
@@ -1524,6 +1526,8 @@ static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAI
                 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
                 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
 
+                pTask->cbTransfered = 0;
+
                 /* Grow the file if needed. */
                 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
                 {
diff --git a/src/VBox/VMM/VMMR3/PDMDriver.cpp b/src/VBox/VMM/VMMR3/PDMDriver.cpp
index 1de9e1f..68f73a6 100644
--- a/src/VBox/VMM/VMMR3/PDMDriver.cpp
+++ b/src/VBox/VMM/VMMR3/PDMDriver.cpp
@@ -424,16 +424,20 @@ static int pdmR3DrvMaybeTransformChain(PVM pVM, PPDMDRVINS pDrvAbove, PPDMLUN pL
         rc = CFGMR3GetName(pCurTrans, szCurTransNm, sizeof(szCurTransNm));
         AssertLogRelRCReturn(rc, rc);
 
-        /* Match against the driver multi pattern. */
+        /** @cfgm{/PDM/DriverTransformations/\<name\>/Device,string,*}
+         * One or more simple wildcard patters separated by '|' for matching
+         * the devices this transformation rule applies to. */
         char *pszMultiPat;
-        rc = CFGMR3QueryStringAllocDef(pCurTrans, "Driver", &pszMultiPat, "*");
+        rc = CFGMR3QueryStringAllocDef(pCurTrans, "Device", &pszMultiPat, "*");
         AssertLogRelRCReturn(rc, rc);
         bool fMatch = RTStrSimplePatternMultiMatch(pszMultiPat, RTSTR_MAX, pszDevice, RTSTR_MAX, NULL);
         MMR3HeapFree(pszMultiPat);
         if (!fMatch)
             continue;
 
-        /* Match against the lun multi pattern. */
+        /** @cfgm{/PDM/DriverTransformations/\<name\>/LUN,string,*}
+         * One or more simple wildcard patters separated by '|' for matching
+         * the LUNs this transformation rule applies to. */
         rc = CFGMR3QueryStringAllocDef(pCurTrans, "LUN", &pszMultiPat, "*");
         AssertLogRelRCReturn(rc, rc);
         fMatch = RTStrSimplePatternMultiMatch(pszMultiPat, RTSTR_MAX, szLun, RTSTR_MAX, NULL);
@@ -441,7 +445,12 @@ static int pdmR3DrvMaybeTransformChain(PVM pVM, PPDMDRVINS pDrvAbove, PPDMLUN pL
         if (!fMatch)
             continue;
 
-        /* Match against the below-driver multi pattern. */
+        /** @cfgm{/PDM/DriverTransformations/\<name\>/BelowDriver,string,*}
+         * One or more simple wildcard patters separated by '|' for matching the
+         * drivers the transformation should be applied below.  This means, that
+         * when the drivers matched here attached another driver below them, the
+         * transformation will be applied.  To represent the device, '\<top\>' is
+         * used. */
         rc = CFGMR3QueryStringAllocDef(pCurTrans, "BelowDriver", &pszMultiPat, "*");
         AssertLogRelRCReturn(rc, rc);
         fMatch = RTStrSimplePatternMultiMatch(pszMultiPat, RTSTR_MAX, pszAbove, RTSTR_MAX, NULL);
@@ -449,7 +458,12 @@ static int pdmR3DrvMaybeTransformChain(PVM pVM, PPDMDRVINS pDrvAbove, PPDMLUN pL
         if (!fMatch)
             continue;
 
-        /* Match against the above-driver multi pattern. */
+        /** @cfgm{/PDM/DriverTransformations/\<name\>/AboveDriver,string,*}
+         * One or more simple wildcard patters separated by '|' for matching the
+         * drivers the transformation should be applie above or at (depending on
+         * the action).  The value being matched against here is the driver that
+         * is in the process of being attached, so for mergeconfig actions this is
+         * usually what you need to match on. */
         rc = CFGMR3QueryStringAlloc(pCurTrans, "AboveDriver", &pszMultiPat);
         if (rc == VERR_CFGM_VALUE_NOT_FOUND)
             rc = VINF_SUCCESS;
@@ -468,7 +482,7 @@ static int pdmR3DrvMaybeTransformChain(PVM pVM, PPDMDRVINS pDrvAbove, PPDMLUN pL
          * We've got a match! Now, what are we supposed to do?
          */
         /** @cfgm{/PDM/DriverTransformations/<name>/Action,string,inject}
-         * The action that the transformation takes.  Possible values are:
+         * The action that the the transformation takes.  Possible values are:
          *      - inject
          *      - mergeconfig: This merges and the content of the 'Config' key under the
          *        transformation into the driver's own 'Config' key, replacing any
diff --git a/src/VBox/VMM/VMMR3/PDMNetShaper.cpp b/src/VBox/VMM/VMMR3/PDMNetShaper.cpp
index e021948..2633608 100644
--- a/src/VBox/VMM/VMMR3/PDMNetShaper.cpp
+++ b/src/VBox/VMM/VMMR3/PDMNetShaper.cpp
@@ -475,6 +475,8 @@ int pdmR3NetShaperTerm(PVM pVM)
     }
 
     RTCritSectDelete(&pShaper->Lock);
+    MMR3HeapFree(pShaper);
+    pUVM->pdm.s.pNetShaper = NULL;
     return VINF_SUCCESS;
 }
 
diff --git a/src/VBox/VMM/VMMR3/PGM.cpp b/src/VBox/VMM/VMMR3/PGM.cpp
index 1ecbd72..65c3680 100644
--- a/src/VBox/VMM/VMMR3/PGM.cpp
+++ b/src/VBox/VMM/VMMR3/PGM.cpp
@@ -3593,6 +3593,7 @@ VMMR3DECL(int) PGMR3ChangeMode(PVM pVM, PVMCPU pVCpu, PGMMODE enmGuestMode)
                 case PGMMODE_AMD64:
                 case PGMMODE_AMD64_NX:
                     AssertMsgFailed(("Should use PAE shadow mode!\n"));
+                    /* fall thru */
                 default: AssertFailed(); break;
             }
             break;
@@ -3617,6 +3618,7 @@ VMMR3DECL(int) PGMR3ChangeMode(PVM pVM, PVMCPU pVCpu, PGMMODE enmGuestMode)
                 case PGMMODE_AMD64:
                 case PGMMODE_AMD64_NX:
                     AssertMsgFailed(("Should use PAE shadow mode!\n"));
+                    /* fall thru */
                 default: AssertFailed(); break;
             }
             break;
@@ -3642,6 +3644,7 @@ VMMR3DECL(int) PGMR3ChangeMode(PVM pVM, PVMCPU pVCpu, PGMMODE enmGuestMode)
                 case PGMMODE_AMD64:
                 case PGMMODE_AMD64_NX:
                     AssertMsgFailed(("Should use PAE shadow mode!\n"));
+                    /* fall thru */
                 default: AssertFailed(); break;
             }
             break;
@@ -3674,6 +3677,7 @@ VMMR3DECL(int) PGMR3ChangeMode(PVM pVM, PVMCPU pVCpu, PGMMODE enmGuestMode)
                 case PGMMODE_AMD64:
                 case PGMMODE_AMD64_NX:
                     AssertMsgFailed(("Should use PAE shadow mode!\n"));
+                    /* fall thru */
                 default: AssertFailed(); break;
             }
             break;
@@ -3700,6 +3704,7 @@ VMMR3DECL(int) PGMR3ChangeMode(PVM pVM, PVMCPU pVCpu, PGMMODE enmGuestMode)
                 case PGMMODE_PAE:
                 case PGMMODE_PAE_NX:
                     AssertMsgFailed(("Should use AMD64 shadow mode!\n"));
+                    /* fall thru */
                 default: AssertFailed(); break;
             }
             break;
@@ -4029,6 +4034,7 @@ static DECLCALLBACK(int) pgmR3CmdPhysToFile(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp,
 
                     default:
                         AssertFailed();
+                        /* fall thru */
                     case PGMPAGETYPE_MMIO:
                     case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
                     case PGMPAGETYPE_SPECIAL_ALIAS_MMIO:
diff --git a/src/VBox/VMM/VMMR3/PGMPhys.cpp b/src/VBox/VMM/VMMR3/PGMPhys.cpp
index d81b78d..6944430 100644
--- a/src/VBox/VMM/VMMR3/PGMPhys.cpp
+++ b/src/VBox/VMM/VMMR3/PGMPhys.cpp
@@ -2016,7 +2016,7 @@ int pgmR3PhysRamZeroAll(PVM pVM)
                             case PGM_PAGE_STATE_WRITE_MONITORED:
                                 rc = pgmPhysPageMakeWritable(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
                                 AssertLogRelRCReturn(rc, rc);
-                                /* no break */
+                                /* fall thru */
 
                             case PGM_PAGE_STATE_ALLOCATED:
                                 if (pVM->pgm.s.fZeroRamPagesOnReset)
@@ -4779,8 +4779,8 @@ int pgmR3PhysChunkMap(PVM pVM, uint32_t idChunk, PPPGMCHUNKR3MAP ppChunk)
                         pgmR3PhysUnmapChunkRendezvous(pVM, pVCpu, NULL);
                         break;
                     }
-                    /* fall thru */
                 }
+                /* fall thru */
                 default:
                     rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysUnmapChunk, 1, pVM);
                     AssertRC(rc);
@@ -5338,6 +5338,7 @@ VMMR3DECL(int) PGMR3PhysTlbGCPhys2Ptr(PVM pVM, RTGCPHYS GCPhys, bool fWritable,
                     case PGM_PAGE_STATE_SHARED:
                         if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE)
                             break;
+                        /* fall thru */
                     case PGM_PAGE_STATE_WRITE_MONITORED:
                         rc2 = pgmPhysPageMakeWritable(pVM, pPage, GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
                         AssertLogRelRCReturn(rc2, rc2);
diff --git a/src/VBox/VMM/VMMR3/PGMPool.cpp b/src/VBox/VMM/VMMR3/PGMPool.cpp
index c0c5fd6..b6becf8 100644
--- a/src/VBox/VMM/VMMR3/PGMPool.cpp
+++ b/src/VBox/VMM/VMMR3/PGMPool.cpp
@@ -631,8 +631,8 @@ DECLCALLBACK(VBOXSTRICTRC) pgmR3PoolClearAllRendezvous(PVM pVM, PVMCPU pVCpu, vo
                     }
                 }
                 /* fall thru */
-
 #ifdef PGM_WITH_LARGE_PAGES
+                /* fall thru */
                 default_case:
 #endif
                 default:
diff --git a/src/VBox/VMM/VMMR3/PGMSavedState.cpp b/src/VBox/VMM/VMMR3/PGMSavedState.cpp
index 3931370..3cfebbe 100644
--- a/src/VBox/VMM/VMMR3/PGMSavedState.cpp
+++ b/src/VBox/VMM/VMMR3/PGMSavedState.cpp
@@ -1178,6 +1178,7 @@ static int pgmR3PrepRamPages(PVM pVM)
 
                         default:
                             AssertMsgFailed(("%R[pgmpage]", pPage));
+                            /* fall thru */
                         case PGMPAGETYPE_MMIO2:
                         case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
                             paLSPages[iPage].fZero   = 0;
diff --git a/src/VBox/VMM/VMMR3/SSM.cpp b/src/VBox/VMM/VMMR3/SSM.cpp
index 9747f99..9213fc9 100644
--- a/src/VBox/VMM/VMMR3/SSM.cpp
+++ b/src/VBox/VMM/VMMR3/SSM.cpp
@@ -6219,12 +6219,16 @@ static int ssmR3DataReadRecHdrV2(PSSMHANDLE pSSM)
         {
             case 6:
                 AssertLogRelMsgReturn((abHdr[6] & 0xc0) == 0x80, ("6/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR);
+                /* fall thru */
             case 5:
                 AssertLogRelMsgReturn((abHdr[5] & 0xc0) == 0x80, ("5/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR);
+                /* fall thru */
             case 4:
                 AssertLogRelMsgReturn((abHdr[4] & 0xc0) == 0x80, ("4/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR);
+                /* fall thru */
             case 3:
                 AssertLogRelMsgReturn((abHdr[3] & 0xc0) == 0x80, ("3/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR);
+                /* fall thru */
             case 2:
                 AssertLogRelMsgReturn((abHdr[2] & 0xc0) == 0x80, ("2/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR);
                 break;
diff --git a/src/VBox/VMM/VMMR3/STAM.cpp b/src/VBox/VMM/VMMR3/STAM.cpp
index 5fa6166..8663bd0 100644
--- a/src/VBox/VMM/VMMR3/STAM.cpp
+++ b/src/VBox/VMM/VMMR3/STAM.cpp
@@ -2043,8 +2043,11 @@ static DECLCALLBACK(size_t) stamR3SnapshotOutput(void *pvArg, const char *pach,
     /*
      * Copy the chars to the buffer and terminate it.
      */
-    memcpy(pThis->psz, pach, cch);
-    pThis->psz += cch;
+    if (cch)
+    {
+        memcpy(pThis->psz, pach, cch);
+        pThis->psz += cch;
+    }
     *pThis->psz = '\0';
     return cch;
 }
diff --git a/src/VBox/VMM/VMMR3/TM.cpp b/src/VBox/VMM/VMMR3/TM.cpp
index f60c40b..4857e17 100644
--- a/src/VBox/VMM/VMMR3/TM.cpp
+++ b/src/VBox/VMM/VMMR3/TM.cpp
@@ -2929,6 +2929,7 @@ VMMR3DECL(uint64_t) TMR3TimeVirtGet(PUVM pUVM)
     return TMVirtualGetNoCheck(pVM);
 }
 
+
 /**
  * Gets the current TMCLOCK_VIRTUAL time in milliseconds without checking
  * timers or anything.
@@ -2946,6 +2947,7 @@ VMMR3DECL(uint64_t) TMR3TimeVirtGetMilli(PUVM pUVM)
     return TMVirtualToMilli(pVM, TMVirtualGetNoCheck(pVM));
 }
 
+
 /**
  * Gets the current TMCLOCK_VIRTUAL time in microseconds without checking
  * timers or anything.
@@ -2963,6 +2965,7 @@ VMMR3DECL(uint64_t) TMR3TimeVirtGetMicro(PUVM pUVM)
     return TMVirtualToMicro(pVM, TMVirtualGetNoCheck(pVM));
 }
 
+
 /**
  * Gets the current TMCLOCK_VIRTUAL time in nanoseconds without checking
  * timers or anything.
diff --git a/src/VBox/VMM/VMMR3/VM.cpp b/src/VBox/VMM/VMMR3/VM.cpp
index 2db75e7..72837ba 100644
--- a/src/VBox/VMM/VMMR3/VM.cpp
+++ b/src/VBox/VMM/VMMR3/VM.cpp
@@ -2551,45 +2551,57 @@ static void vmR3DestroyUVM(PUVM pUVM, uint32_t cMilliesEMTWait)
      * Signal termination of each the emulation threads and
      * wait for them to complete.
      */
-    /* Signal them. */
+    /* Signal them - in reverse order since EMT(0) waits for the others. */
     ASMAtomicUoWriteBool(&pUVM->vm.s.fTerminateEMT, true);
     if (pUVM->pVM)
         VM_FF_SET(pUVM->pVM, VM_FF_CHECK_VM_STATE); /* Can't hurt... */
-    for (VMCPUID i = 0; i < pUVM->cCpus; i++)
+    VMCPUID iCpu = pUVM->cCpus;
+    while (iCpu-- > 0)
     {
         VMR3NotifyGlobalFFU(pUVM, VMNOTIFYFF_FLAGS_DONE_REM);
-        RTSemEventSignal(pUVM->aCpus[i].vm.s.EventSemWait);
+        RTSemEventSignal(pUVM->aCpus[iCpu].vm.s.EventSemWait);
     }
 
-    /* Wait for them. */
-    uint64_t    NanoTS = RTTimeNanoTS();
-    RTTHREAD    hSelf  = RTThreadSelf();
+    /* Wait for EMT(0), it in turn waits for the rest. */
     ASMAtomicUoWriteBool(&pUVM->vm.s.fTerminateEMT, true);
-    for (VMCPUID i = 0; i < pUVM->cCpus; i++)
+
+    RTTHREAD const hSelf = RTThreadSelf();
+    RTTHREAD hThread = pUVM->aCpus[0].vm.s.ThreadEMT;
+    if (   hThread != NIL_RTTHREAD
+        && hThread != hSelf)
+    {
+        int rc2 = RTThreadWait(hThread, RT_MAX(cMilliesEMTWait, 2000), NULL);
+        if (rc2 == VERR_TIMEOUT) /* avoid the assertion when debugging. */
+            rc2 = RTThreadWait(hThread, 1000, NULL);
+        AssertLogRelMsgRC(rc2, ("iCpu=0 rc=%Rrc\n", rc2));
+        if (RT_SUCCESS(rc2))
+            pUVM->aCpus[0].vm.s.ThreadEMT = NIL_RTTHREAD;
+    }
+
+    /* Just in case we're in a weird failure situation w/o EMT(0) to do the
+       waiting, wait the other EMTs too. */
+    for (iCpu = 1; iCpu < pUVM->cCpus; iCpu++)
     {
-        RTTHREAD hThread = pUVM->aCpus[i].vm.s.ThreadEMT;
-        if (    hThread != NIL_RTTHREAD
-            &&  hThread != hSelf)
+        ASMAtomicXchgHandle(&pUVM->aCpus[iCpu].vm.s.ThreadEMT, NIL_RTTHREAD, &hThread);
+        if (hThread != NIL_RTTHREAD)
         {
-            uint64_t cMilliesElapsed = (RTTimeNanoTS() - NanoTS) / 1000000;
-            int rc2 = RTThreadWait(hThread,
-                                   cMilliesElapsed < cMilliesEMTWait
-                                   ? RT_MAX(cMilliesEMTWait - cMilliesElapsed, 2000)
-                                   : 2000,
-                                   NULL);
-            if (rc2 == VERR_TIMEOUT) /* avoid the assertion when debugging. */
-                rc2 = RTThreadWait(hThread, 1000, NULL);
-            AssertLogRelMsgRC(rc2, ("i=%u rc=%Rrc\n", i, rc2));
-            if (RT_SUCCESS(rc2))
-                pUVM->aCpus[0].vm.s.ThreadEMT = NIL_RTTHREAD;
+            if (hThread != hSelf)
+            {
+                int rc2 = RTThreadWait(hThread, 250 /*ms*/, NULL);
+                AssertLogRelMsgRC(rc2, ("iCpu=%u rc=%Rrc\n", iCpu, rc2));
+                if (RT_SUCCESS(rc2))
+                    continue;
+            }
+            pUVM->aCpus[iCpu].vm.s.ThreadEMT = hThread;
         }
     }
 
     /* Cleanup the semaphores. */
-    for (VMCPUID i = 0; i < pUVM->cCpus; i++)
+    iCpu = pUVM->cCpus;
+    while (iCpu-- > 0)
     {
-        RTSemEventDestroy(pUVM->aCpus[i].vm.s.EventSemWait);
-        pUVM->aCpus[i].vm.s.EventSemWait = NIL_RTSEMEVENT;
+        RTSemEventDestroy(pUVM->aCpus[iCpu].vm.s.EventSemWait);
+        pUVM->aCpus[iCpu].vm.s.EventSemWait = NIL_RTSEMEVENT;
     }
 
     /*
@@ -2670,6 +2682,9 @@ static void vmR3DestroyUVM(PUVM pUVM, uint32_t cMilliesEMTWait)
      */
     PDMR3TermUVM(pUVM);
 
+    RTCritSectDelete(&pUVM->vm.s.AtErrorCritSect);
+    RTCritSectDelete(&pUVM->vm.s.AtStateCritSect);
+
     /*
      * Terminate the support library if initialized.
      */
@@ -3096,6 +3111,9 @@ static void vmR3DoReleaseUVM(PUVM pUVM)
      */
     Assert(!pUVM->pVM);
 
+    MMR3HeapFree(pUVM->vm.s.pszName);
+    pUVM->vm.s.pszName = NULL;
+
     MMR3TermUVM(pUVM);
     STAMR3TermUVM(pUVM);
 
diff --git a/src/VBox/VMM/VMMR3/VMEmt.cpp b/src/VBox/VMM/VMMR3/VMEmt.cpp
index 424b0ca..37fcbca 100644
--- a/src/VBox/VMM/VMMR3/VMEmt.cpp
+++ b/src/VBox/VMM/VMMR3/VMEmt.cpp
@@ -250,15 +250,29 @@ int vmR3EmulationThreadWithId(RTTHREAD hThreadSelf, PUVMCPU pUVCpu, VMCPUID idCp
      */
     Log(("vmR3EmulationThread: Terminating emulation thread! Thread=%#x pUVM=%p rc=%Rrc enmBefore=%d enmVMState=%d\n",
          hThreadSelf, pUVM, rc, enmBefore, pUVM->pVM ? pUVM->pVM->enmVMState : VMSTATE_TERMINATED));
+    PVM pVM;
     if (   idCpu == 0
-        && pUVM->pVM)
+        && (pVM = pUVM->pVM) != NULL)
     {
-        PVM pVM = pUVM->pVM;
+        /* Wait for any other EMTs to terminate before we destroy the VM (see vmR3DestroyVM). */
+        for (VMCPUID iCpu = 1; iCpu < pUVM->cCpus; iCpu++)
+        {
+            RTTHREAD hThread;
+            ASMAtomicXchgHandle(&pUVM->aCpus[iCpu].vm.s.ThreadEMT, NIL_RTTHREAD, &hThread);
+            if (hThread != NIL_RTTHREAD)
+            {
+                int rc2 = RTThreadWait(hThread, 5 * RT_MS_1SEC, NULL);
+                AssertLogRelMsgRC(rc2, ("iCpu=%u rc=%Rrc\n", iCpu, rc2));
+                if (RT_FAILURE(rc2))
+                    pUVM->aCpus[iCpu].vm.s.ThreadEMT = hThread;
+            }
+        }
+
+        /* Switch to the terminated state, clearing the VM pointer and finally destroy the VM. */
         vmR3SetTerminated(pVM);
+
         pUVM->pVM = NULL;
 
-        /** @todo SMP: This isn't 100% safe. We should wait for the other
-         *        threads to finish before destroy the VM. */
         int rc2 = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_GVMM_DESTROY_VM, 0, NULL);
         AssertLogRelRC(rc2);
     }
diff --git a/src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp b/src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp
index e524575..423349b 100644
--- a/src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp
+++ b/src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp
@@ -282,8 +282,8 @@ VMMR3DECL(void) VMMR3FatalDump(PVM pVM, PVMCPU pVCpu, int rcErr)
                 ||  !*pszMsg2
                 ||  strchr(pszMsg2, '\0')[-1] != '\n')
                 pHlp->pfnPrintf(pHlp, "\n");
-            /* fall thru */
         }
+        /* fall thru */
         case VERR_TRPM_DONT_PANIC:
         case VERR_TRPM_PANIC:
         case VINF_EM_RAW_STALE_SELECTOR:
diff --git a/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h b/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h
index 0f95e86..fefe3e1 100644
--- a/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h
+++ b/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h
@@ -207,6 +207,7 @@ static CPUMDBENTRY const g_Entry_AMD_Athlon_64_3200 =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 40,
+    /*.fMxCsrMask       = */ 0xffff, ///< @todo check.
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_AMD_Athlon_64_3200),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_AMD_Athlon_64_3200)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h b/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h
index 3d504f8..315f614 100644
--- a/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h
+++ b/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h
@@ -215,6 +215,7 @@ static CPUMDBENTRY const g_Entry_AMD_Athlon_64_X2_Dual_Core_4200 =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 40,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_AMD_Athlon_64_X2_Dual_Core_4200),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_AMD_Athlon_64_X2_Dual_Core_4200)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h b/src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h
index 430d57a..c261a79 100644
--- a/src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h
+++ b/src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h
@@ -366,6 +366,7 @@ static CPUMDBENTRY const g_Entry_AMD_FX_8150_Eight_Core =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 48,
+    /*.fMxCsrMask       = */ 0x2ffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_AMD_FX_8150_Eight_Core),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_AMD_FX_8150_Eight_Core)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h b/src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h
index bb74c29..41d1f2c 100644
--- a/src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h
+++ b/src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h
@@ -255,6 +255,7 @@ static CPUMDBENTRY const g_Entry_AMD_Phenom_II_X6_1100T =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 48,
+    /*.fMxCsrMask       = */ 0x2ffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_AMD_Phenom_II_X6_1100T),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_AMD_Phenom_II_X6_1100T)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_80186.h b/src/VBox/VMM/VMMR3/cpus/Intel_80186.h
index bb23901..c7e6847 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_80186.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_80186.h
@@ -58,6 +58,7 @@ static CPUMDBENTRY const g_Entry_Intel_80186 =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ CPUDB_F_EXECUTE_ALL_IN_IEM,
     /*.cMaxPhysAddrWidth= */ 20,
+    /*.fMxCsrMask       = */ 0,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_80186),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_80186)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_80286.h b/src/VBox/VMM/VMMR3/cpus/Intel_80286.h
index 6118283..1a1e3db 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_80286.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_80286.h
@@ -58,6 +58,7 @@ static CPUMDBENTRY const g_Entry_Intel_80286 =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ CPUDB_F_EXECUTE_ALL_IN_IEM,
     /*.cMaxPhysAddrWidth= */ 24,
+    /*.fMxCsrMask       = */ 0,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_80286),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_80286)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_80386.h b/src/VBox/VMM/VMMR3/cpus/Intel_80386.h
index 4a53b91..f0e1323 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_80386.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_80386.h
@@ -58,6 +58,7 @@ static CPUMDBENTRY const g_Entry_Intel_80386 =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ CPUDB_F_EXECUTE_ALL_IN_IEM,
     /*.cMaxPhysAddrWidth= */ 24,
+    /*.fMxCsrMask       = */ 0,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_80386),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_80386)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_8086.h b/src/VBox/VMM/VMMR3/cpus/Intel_8086.h
index 736fa95..1d1a99a 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_8086.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_8086.h
@@ -58,6 +58,7 @@ static CPUMDBENTRY const g_Entry_Intel_8086 =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ CPUDB_F_EXECUTE_ALL_IN_IEM,
     /*.cMaxPhysAddrWidth= */ 20,
+    /*.fMxCsrMask       = */ 0,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_8086),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_8086)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h
index 8d7637e..6f0af1a 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h
@@ -193,6 +193,7 @@ static CPUMDBENTRY const g_Entry_Intel_Atom_330_1_60GHz =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_133MHZ,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 32,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Atom_330_1_60GHz),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Atom_330_1_60GHz)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h
index cbd03ac..fe84a96 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h
@@ -322,6 +322,7 @@ static CPUMDBENTRY const g_Entry_Intel_Core_i5_3570 =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 36,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i5_3570),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i5_3570)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h
index b4e7638..3310d92 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h
@@ -315,6 +315,7 @@ static CPUMDBENTRY const g_Entry_Intel_Core_i7_2635QM =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 36,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_2635QM),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_2635QM)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h
index c70ba3d..dc65770 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h
@@ -352,6 +352,7 @@ static CPUMDBENTRY const g_Entry_Intel_Core_i7_3960X =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 46,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_3960X),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_3960X)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h
index 78e0055..adabe6e 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h
@@ -351,6 +351,7 @@ static CPUMDBENTRY const g_Entry_Intel_Core_i7_5600U =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 39,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_5600U),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_5600U)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h
index 09356d5..be2be38 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h
@@ -493,6 +493,7 @@ static CPUMDBENTRY const g_Entry_Intel_Core_i7_6700K =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 39,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_6700K),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_6700K)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h
index eb6ab23..2c75866 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h
@@ -260,6 +260,7 @@ static CPUMDBENTRY const g_Entry_Intel_Pentium_4_3_00GHz =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 36,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Pentium_4_3_00GHz),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Pentium_4_3_00GHz)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h
index 2f1d0af..64cb1bb 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h
@@ -199,6 +199,7 @@ static CPUMDBENTRY const g_Entry_Intel_Pentium_M_processor_2_00GHz =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 32,
+    /*.fMxCsrMask       = */ 0xffbf, ///< @todo check this
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Pentium_M_processor_2_00GHz),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Pentium_M_processor_2_00GHz)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h
index 8c2abfd..7cc261f 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h
@@ -248,6 +248,7 @@ static CPUMDBENTRY const g_Entry_Intel_Pentium_N3530_2_16GHz =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_267MHZ,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 36,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Pentium_N3530_2_16GHz),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Pentium_N3530_2_16GHz)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX,
diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h
index 7c6b329..a42463b 100644
--- a/src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h
+++ b/src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h
@@ -228,6 +228,7 @@ static CPUMDBENTRY const g_Entry_Intel_Xeon_X5482_3_20GHz =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_400MHZ,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 38,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Xeon_X5482_3_20GHz),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Xeon_X5482_3_20GHz)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF,
diff --git a/src/VBox/VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h b/src/VBox/VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h
index 194d0c4..dc29a4f 100644
--- a/src/VBox/VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h
+++ b/src/VBox/VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h
@@ -253,6 +253,7 @@ static CPUMDBENTRY const g_Entry_Quad_Core_AMD_Opteron_2384 =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN,
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 48,
+    /*.fMxCsrMask       = */ 0x2ffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_Quad_Core_AMD_Opteron_2384),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Quad_Core_AMD_Opteron_2384)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h b/src/VBox/VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h
index 90a19a7..da2b645 100644
--- a/src/VBox/VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h
+++ b/src/VBox/VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h
@@ -387,6 +387,7 @@ static CPUMDBENTRY const g_Entry_VIA_QuadCore_L4700_1_2_GHz =
     /*.uScalableBusFreq = */ CPUM_SBUSFREQ_267MHZ, /*??*/
     /*.fFlags           = */ 0,
     /*.cMaxPhysAddrWidth= */ 36,
+    /*.fMxCsrMask       = */ 0xffff,
     /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_VIA_QuadCore_L4700_1_2_GHz),
     /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_VIA_QuadCore_L4700_1_2_GHz)),
     /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_DEFAULTS,
diff --git a/src/VBox/VMM/VMMRC/CPUMRCPatchHlp.asm b/src/VBox/VMM/VMMRC/CPUMRCPatchHlp.asm
index c76549d..7e98deb 100644
--- a/src/VBox/VMM/VMMRC/CPUMRCPatchHlp.asm
+++ b/src/VBox/VMM/VMMRC/CPUMRCPatchHlp.asm
@@ -35,8 +35,8 @@ extern IMPNAME(g_VM)
 BEGIN_PATCH_HLP_SEG
 
 ;;
-; Helper for PATMCpuidReplacement. 
-; 
+; Helper for PATMCpuidReplacement.
+;
 ; We have at most 32 bytes of stack to play with, .
 ;
 ; @input    eax, ecx(, edx, ebx)
@@ -58,8 +58,8 @@ BEGINPROC_EXPORTED CPUMPatchHlpCpuId
 
     ;
     ; Perform a binary search looking for leaf with the EAX value.
-    ;             
-    mov     edx, [ss:edi + VM.cpum + CPUM.GuestInfo + CPUMINFO.cCpuIdLeaves]   
+    ;
+    mov     edx, [ss:edi + VM.cpum + CPUM.GuestInfo + CPUMINFO.cCpuIdLeaves]
     mov     ecx, [ss:edi + VM.cpum + CPUM.GuestInfo + CPUMINFO.paCpuIdLeavesRC]
     test    edx, edx
     jz      cpuid_unknown
@@ -97,7 +97,7 @@ cpuid_lookup_split_up:
     ;
     ; We've to a matching leaf, does the sub-leaf match too?
     ;
-cpuid_match_eax:    
+cpuid_match_eax:
     mov     ecx, [esp + 4]
     and     ecx, [ss:ebx + CPUMCPUIDLEAF.fSubLeafMask]
     cmp     ecx, [ss:ebx + CPUMCPUIDLEAF.uSubLeaf]
@@ -111,7 +111,7 @@ cpuid_lookup_subleaf_backwards:
     mov     edx, [ss:edi + VM.cpum + CPUM.GuestInfo + CPUMINFO.paCpuIdLeavesRC] ; edx = first leaf
 
 cpuid_lookup_subleaf_backwards_loop:
-    cmp     ebx, edx                    ; Is there a leaf before the current? 
+    cmp     ebx, edx                    ; Is there a leaf before the current?
     jbe     cpuid_subleaf_not_found     ; If not we're out of luck.
     cmp     eax, [ss:ebx - CPUMCPUIDLEAF_size + CPUMCPUIDLEAF.uLeaf]
     jne     cpuid_subleaf_not_found     ; If the leaf before us does not have the same leaf number, we failed.
@@ -122,11 +122,11 @@ cpuid_lookup_subleaf_backwards_loop:
     jmp     cpuid_subleaf_not_found     ; Too bad.
 
     ;
-    ; Search forward until we've got a matching sub-leaf (or not).  
+    ; Search forward until we've got a matching sub-leaf (or not).
     ;
 cpuid_lookup_subleaf_forwards:
     ; Calculate the last leaf address.
-    mov     edx, [ss:edi + VM.cpum + CPUM.GuestInfo + CPUMINFO.cCpuIdLeaves]   
+    mov     edx, [ss:edi + VM.cpum + CPUM.GuestInfo + CPUMINFO.cCpuIdLeaves]
     dec     edx
     shl     edx, CPUMCPUIDLEAF_SIZE_LOG2
     add     edx, [ss:edi + VM.cpum + CPUM.GuestInfo + CPUMINFO.paCpuIdLeavesRC] ; edx = last leaf (inclusive)
@@ -137,14 +137,14 @@ cpuid_subleaf_lookup:
     cmp     eax, [ss:ebx + CPUMCPUIDLEAF_size + CPUMCPUIDLEAF.uLeaf]
     jne     cpuid_subleaf_not_found
     add     ebx, CPUMCPUIDLEAF_size
-    cmp     ecx, [ss:ebx + CPUMCPUIDLEAF.uSubLeaf]    
+    cmp     ecx, [ss:ebx + CPUMCPUIDLEAF.uSubLeaf]
     ja      cpuid_subleaf_lookup
     je      cpuid_fetch
-    
+
     ;
     ; Out of range sub-leaves aren't quite as easy and pretty as we emulate them
     ; here, but we do an adequate job.
-    ;    
+    ;
 cpuid_subleaf_not_found:
     xor     ecx, ecx
     test    dword [ss:ebx + CPUMCPUIDLEAF.fFlags], CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES
@@ -188,7 +188,7 @@ cpuid_fetch:
     mov     ecx, [ss:ebx + CPUMCPUIDLEAF.uEcx]
     mov     eax, [ss:ebx + CPUMCPUIDLEAF.uEax]
     mov     ebx, [ss:ebx + CPUMCPUIDLEAF.uEbx]
-             
+
 cpuid_done:
     pop     edi
     add     esp, 12
diff --git a/src/VBox/VMM/VMMRC/TRPMRCHandlers.cpp b/src/VBox/VMM/VMMRC/TRPMRCHandlers.cpp
index 79e5a0e..345df77 100644
--- a/src/VBox/VMM/VMMRC/TRPMRCHandlers.cpp
+++ b/src/VBox/VMM/VMMRC/TRPMRCHandlers.cpp
@@ -901,6 +901,7 @@ static int trpmGCTrap0dHandlerRing0(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFram
             if (    !PATMIsPatchGCAddr(pVM, PC)
                 &&  !CSAMIsKnownDangerousInstr(pVM, PC))
                 break;
+            /* fall thru */
         case OP_INVLPG:
         case OP_LLDT:
         case OP_STI:
diff --git a/src/VBox/VMM/include/CPUMInternal.h b/src/VBox/VMM/include/CPUMInternal.h
index ba0593c..e3b22e2 100644
--- a/src/VBox/VMM/include/CPUMInternal.h
+++ b/src/VBox/VMM/include/CPUMInternal.h
@@ -156,13 +156,14 @@ typedef struct CPUMINFO
      * instruction.  Older hardware has been observed to ignore higher bits. */
     uint32_t                    fMsrMask;
 
+    /** MXCSR mask. */
+    uint32_t                    fMxCsrMask;
+
     /** The number of CPUID leaves (CPUMCPUIDLEAF) in the array pointed to below. */
     uint32_t                    cCpuIdLeaves;
     /** The index of the first extended CPUID leaf in the array.
      *  Set to cCpuIdLeaves if none present. */
     uint32_t                    iFirstExtCpuIdLeaf;
-    /** Alignment padding. */
-    uint32_t                    uPadding;
     /** How to handle unknown CPUID leaves. */
     CPUMUNKNOWNCPUID            enmUnknownCpuIdMethod;
     /** For use with CPUMUNKNOWNCPUID_DEFAULTS (DB & VM),
@@ -402,7 +403,10 @@ typedef struct CPUM
     /** XSAVE/XRSTOR host mask.  Only state components in this mask can be exposed
      * to the guest.  This is 0 if no XSAVE/XRSTOR bits can be exposed. */
     uint64_t                fXStateHostMask;
-    uint8_t                 abPadding1[24];
+
+    /** The host MXCSR mask (determined at init). */
+    uint32_t                fHostMxCsrMask;
+    uint8_t                 abPadding1[20];
 
     /** Host CPU feature information.
      * Externaly visible via the VM structure, aligned on 64-byte boundrary. */
@@ -498,7 +502,7 @@ typedef struct CPUMCPU
      *  when loading state, so we won't save it.) */
     bool                    fCpuIdApicFeatureVisible;
 
-    /** Align the next member on a 64-bit boundrary. */
+    /** Align the next member on a 64-byte boundrary. */
     uint8_t                 abPadding2[64 - 16 - (HC_ARCH_BITS == 64 ? 8 : 4) - 4 - 1 - 3];
 
     /** Saved host context.  Only valid while inside RC or HM contexts.
diff --git a/src/VBox/VMM/include/CPUMInternal.mac b/src/VBox/VMM/include/CPUMInternal.mac
index 8980115..0a95008 100644
--- a/src/VBox/VMM/include/CPUMInternal.mac
+++ b/src/VBox/VMM/include/CPUMInternal.mac
@@ -38,9 +38,9 @@
 struc CPUMINFO
     .cMsrRanges             resd 1                  ; uint32_t
     .fMsrMask               resd 1                  ; uint32_t
+    .fMxCsrMask             resd 1                  ; uint32_t
     .cCpuIdLeaves           resd 1                  ; uint32_t
     .iFirstExtCpuIdLeaf     resd 1                  ; uint32_t
-    .uPadding               resd 1                  ; uint32_t
     .enmUnknownCpuIdMethod  resd 1                  ; CPUMUNKNOWNCPUID
     .DefCpuId               resb CPUMCPUID_size     ; CPUMCPUID
     .uScalableBusFreq       resq 1                  ; uint64_t
diff --git a/src/VBox/VMM/include/DBGFInternal.h b/src/VBox/VMM/include/DBGFInternal.h
index 5deabe6..dfaa1b4 100644
--- a/src/VBox/VMM/include/DBGFInternal.h
+++ b/src/VBox/VMM/include/DBGFInternal.h
@@ -244,6 +244,34 @@ typedef struct DBGF
      * Not all commands take data. */
     DBGFCMDDATA                 VMMCmdData;
 
+    /** Stepping filtering. */
+    struct
+    {
+        /** The CPU doing the stepping.
+         * Set to NIL_VMCPUID when filtering is inactive */
+        VMCPUID                 idCpu;
+        /** The specified flags. */
+        uint32_t                fFlags;
+        /** The effective PC address to stop at, if given. */
+        RTGCPTR                 AddrPc;
+        /** The lowest effective stack address to stop at.
+         * Together with cbStackPop, this forms a range of effective stack pointer
+         * addresses that we stop for.   */
+        RTGCPTR                 AddrStackPop;
+        /** The size of the stack stop area starting at AddrStackPop. */
+        RTGCPTR                 cbStackPop;
+        /** Maximum number of steps. */
+        uint32_t                cMaxSteps;
+
+        /** Number of steps made thus far. */
+        uint32_t                cSteps;
+        /** Current call counting balance for step-over handling. */
+        uint32_t                uCallDepth;
+
+        uint32_t                u32Padding; /**< Alignment padding. */
+
+    } SteppingFilter;
+
     uint32_t                    u32Padding; /**< Alignment padding. */
 
     /** The number of enabled hardware breakpoints. */
diff --git a/src/VBox/VMM/include/EMHandleRCTmpl.h b/src/VBox/VMM/include/EMHandleRCTmpl.h
index 89493e0..62124ed 100644
--- a/src/VBox/VMM/include/EMHandleRCTmpl.h
+++ b/src/VBox/VMM/include/EMHandleRCTmpl.h
@@ -236,6 +236,12 @@ int emR3HmHandleRC(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, int rc)
          */
         case VINF_GIM_R3_HYPERCALL:
         {
+            /* Currently hypercall instruction (vmcall/vmmcall) emulation is compiled
+               only when Nested Hw. virt feature is enabled in IEM (for easier IEM backports). */
+#ifdef VBOX_WITH_NESTED_HWVIRT
+            rc = emR3ExecuteInstruction(pVM, pVCpu, "Hypercall");
+            break;
+#else
             /** @todo IEM/REM need to handle VMCALL/VMMCALL, see
              *        @bugref{7270#c168}. */
             uint8_t cbInstr = 0;
@@ -258,6 +264,7 @@ int emR3HmHandleRC(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, int rc)
                 rc = VBOXSTRICTRC_VAL(rcStrict);
             }
             break;
+#endif
         }
 
 #ifdef EMHANDLERC_WITH_HM
diff --git a/src/VBox/VMM/include/IEMInternal.h b/src/VBox/VMM/include/IEMInternal.h
index 8d69ef8..af0115c 100644
--- a/src/VBox/VMM/include/IEMInternal.h
+++ b/src/VBox/VMM/include/IEMInternal.h
@@ -41,6 +41,23 @@ RT_C_DECLS_BEGIN
 # define IEM_STATIC static
 #endif
 
+/** @def IEM_WITH_3DNOW
+ * Includes the 3DNow decoding.  */
+#define IEM_WITH_3DNOW
+
+/** @def IEM_WITH_THREE_0F_38
+ * Includes the three byte opcode map for instrs starting with 0x0f 0x38. */
+#define IEM_WITH_THREE_0F_38
+
+/** @def IEM_WITH_THREE_0F_3A
+ * Includes the three byte opcode map for instrs starting with 0x0f 0x38. */
+#define IEM_WITH_THREE_0F_3A
+
+/** @def IEM_WITH_VEX
+ * Includes the VEX decoding. */
+#define IEM_WITH_VEX
+
+
 /** @def IEM_VERIFICATION_MODE_FULL
  * Shorthand for:
  *    defined(IEM_VERIFICATION_MODE) && !defined(IEM_VERIFICATION_MODE_MINIMAL)
@@ -67,6 +84,21 @@ RT_C_DECLS_BEGIN
 //#define IEM_WITH_CODE_TLB// - work in progress
 
 
+#if !defined(IN_TSTVMSTRUCT) && !defined(DOXYGEN_RUNNING)
+/** Instruction statistics.   */
+typedef struct IEMINSTRSTATS
+{
+# define IEM_DO_INSTR_STAT(a_Name, a_szDesc) uint32_t a_Name;
+# include "IEMInstructionStatisticsTmpl.h"
+# undef IEM_DO_INSTR_STAT
+} IEMINSTRSTATS;
+#else
+struct IEMINSTRSTATS;
+typedef struct IEMINSTRSTATS IEMINSTRSTATS;
+#endif
+/** Pointer to IEM instruction statistics. */
+typedef IEMINSTRSTATS *PIEMINSTRSTATS;
+
 /** Finish and move to types.h */
 typedef union
 {
@@ -413,20 +445,22 @@ typedef struct IEMCPU
     /** The current CPU execution mode (CS). */
     IEMMODE                 enmCpuMode;                                                                     /* 0x04 */
     /** The CPL. */
-    uint8_t                 uCpl;                                                                           /* 0x08 */
+    uint8_t                 uCpl;                                                                           /* 0x05 */
 
     /** Whether to bypass access handlers or not. */
-    bool                    fBypassHandlers;                                                                /* 0x09 */
+    bool                    fBypassHandlers;                                                                /* 0x06 */
     /** Indicates that we're interpreting patch code - RC only! */
-    bool                    fInPatchCode;                                                                   /* 0x0a */
+    bool                    fInPatchCode;                                                                   /* 0x07 */
 
     /** @name Decoder state.
      * @{ */
 #ifdef IEM_WITH_CODE_TLB
-    /** Unused. */
-    uint8_t                 bUnused0;                                                                       /* 0x0b */
     /** The offset of the next instruction byte. */
-    uint32_t                offInstrNextByte;                                                               /* 0x0c */
+    uint32_t                offInstrNextByte;                                                               /* 0x08 */
+    /** The number of bytes available at pbInstrBuf for the current instruction.
+     * This takes the max opcode length into account so that doesn't need to be
+     * checked separately. */
+    uint32_t                cbInstrBuf;                                                                     /* 0x0c */
     /** Pointer to the page containing RIP, user specified buffer or abOpcode.
      * This can be NULL if the page isn't mappable for some reason, in which
      * case we'll do fallback stuff.
@@ -440,86 +474,92 @@ typedef struct IEMCPU
      * therefore precludes stuff like <tt>pbInstrBuf[offInstrNextByte + cbInstrBuf - cbCurInstr]</tt>
      */
     uint8_t const          *pbInstrBuf;                                                                     /* 0x10 */
-# if defined(IN_RC) && HC_ARCH_BITS != 32
+# if ARCH_BITS == 32
     uint32_t                uInstrBufHigh; /** The high dword of the host context pbInstrBuf member. */
 # endif
     /** The program counter corresponding to pbInstrBuf.
      * This is set to a non-canonical address when we need to invalidate it. */
     uint64_t                uInstrBufPc;                                                                    /* 0x18 */
-    /** The number of bytes available at pbInstrBuf for the current instruction.
-     * This takes the max opcode length into account so that doesn't need to be
-     * checked separately. */
-    uint32_t                cbInstrBuf;                                                                     /* 0x20 */
     /** The number of bytes available at pbInstrBuf in total (for IEMExecLots).
      * This takes the CS segment limit into account. */
-    uint16_t                cbInstrBufTotal;                                                                /* 0x24 */
+    uint16_t                cbInstrBufTotal;                                                                /* 0x20 */
     /** Offset into pbInstrBuf of the first byte of the current instruction.
      * Can be negative to efficiently handle cross page instructions. */
-    int16_t                 offCurInstrStart;                                                               /* 0x26 */
+    int16_t                 offCurInstrStart;                                                               /* 0x22 */
 
     /** The prefix mask (IEM_OP_PRF_XXX). */
-    uint32_t                fPrefixes;                                                                      /* 0x28 */
+    uint32_t                fPrefixes;                                                                      /* 0x24 */
     /** The extra REX ModR/M register field bit (REX.R << 3). */
-    uint8_t                 uRexReg;                                                                        /* 0x2c */
+    uint8_t                 uRexReg;                                                                        /* 0x28 */
     /** The extra REX ModR/M r/m field, SIB base and opcode reg bit
      * (REX.B << 3). */
-    uint8_t                 uRexB;                                                                          /* 0x2d */
+    uint8_t                 uRexB;                                                                          /* 0x29 */
     /** The extra REX SIB index field bit (REX.X << 3). */
-    uint8_t                 uRexIndex;                                                                      /* 0x2e */
+    uint8_t                 uRexIndex;                                                                      /* 0x2a */
 
     /** The effective segment register (X86_SREG_XXX). */
-    uint8_t                 iEffSeg;                                                                        /* 0x2f */
+    uint8_t                 iEffSeg;                                                                        /* 0x2b */
 
 #else
-    /** The current offset into abOpcodes. */
-    uint8_t                 offOpcode;                                                                      /*       0x0b */
-    /** The size of what has currently been fetched into abOpcodes. */
-    uint8_t                 cbOpcode;                                                                       /*       0x0c */
+    /** The size of what has currently been fetched into abOpcode. */
+    uint8_t                 cbOpcode;                                                                       /*       0x08 */
+    /** The current offset into abOpcode. */
+    uint8_t                 offOpcode;                                                                      /*       0x09 */
 
     /** The effective segment register (X86_SREG_XXX). */
-    uint8_t                 iEffSeg;                                                                        /*       0x0d */
+    uint8_t                 iEffSeg;                                                                        /*       0x0a */
 
     /** The extra REX ModR/M register field bit (REX.R << 3). */
-    uint8_t                 uRexReg;                                                                        /*       0x0e */
+    uint8_t                 uRexReg;                                                                        /*       0x0b */
+    /** The prefix mask (IEM_OP_PRF_XXX). */
+    uint32_t                fPrefixes;                                                                      /*       0x0c */
     /** The extra REX ModR/M r/m field, SIB base and opcode reg bit
      * (REX.B << 3). */
-    uint8_t                 uRexB;                                                                          /*       0x0f */
-    /** The prefix mask (IEM_OP_PRF_XXX). */
-    uint32_t                fPrefixes;                                                                      /*       0x10 */
+    uint8_t                 uRexB;                                                                          /*       0x10 */
     /** The extra REX SIB index field bit (REX.X << 3). */
-    uint8_t                 uRexIndex;                                                                      /*       0x14 */
+    uint8_t                 uRexIndex;                                                                      /*       0x11 */
 
-    /** Explicit alignment padding. */
-    uint8_t                 abAlignment1[3];                                                                /*       0x15 */
 #endif
 
-    /** The effective operand mode . */
-    IEMMODE                 enmEffOpSize;                                                                   /* 0x30, 0x18 */
-    /** The default addressing mode . */
-    IEMMODE                 enmDefAddrMode;                                                                 /* 0x34, 0x1c */
-    /** The effective addressing mode . */
-    IEMMODE                 enmEffAddrMode;                                                                 /* 0x38, 0x20 */
-    /** The default operand mode . */
-    IEMMODE                 enmDefOpSize;                                                                   /* 0x3c, 0x24 */
+    /** The effective operand mode. */
+    IEMMODE                 enmEffOpSize;                                                                   /* 0x2c, 0x12 */
+    /** The default addressing mode. */
+    IEMMODE                 enmDefAddrMode;                                                                 /* 0x2d, 0x13 */
+    /** The effective addressing mode. */
+    IEMMODE                 enmEffAddrMode;                                                                 /* 0x2e, 0x14 */
+    /** The default operand mode. */
+    IEMMODE                 enmDefOpSize;                                                                   /* 0x2f, 0x15 */
+
+    /** Prefix index (VEX.pp) for two byte and three byte tables. */
+    uint8_t                 idxPrefix;                                                                      /* 0x30, 0x16 */
+    /** 3rd VEX/EVEX/XOP register. */
+    uint8_t                 uVex3rdReg;                                                                     /* 0x31, 0x17 */
+    /** The VEX/EVEX/XOP length field. */
+    uint8_t                 uVexLength;                                                                     /* 0x32, 0x18 */
+    /** Additional EVEX stuff. */
+    uint8_t                 fEvexStuff;                                                                     /* 0x33, 0x19 */
 
     /** The FPU opcode (FOP). */
-    uint16_t                uFpuOpcode;                                                                     /* 0x40, 0x28 */
-    /** Align the opcode buffer on a dword boundrary. */
-    uint8_t                 abAlignment2a[2];                                                               /* 0x42, 0x2a */
+    uint16_t                uFpuOpcode;                                                                     /* 0x34, 0x1a */
+
+    /** Explicit alignment padding. */
+#ifdef IEM_WITH_CODE_TLB
+    uint8_t                 abAlignment2a[2];                                                               /* 0x36       */
+#endif
 
     /** The opcode bytes. */
-    uint8_t                 abOpcode[15];                                                                   /* 0x44, 0x2c */
+    uint8_t                 abOpcode[15];                                                                   /* 0x48, 0x1c */
     /** Explicit alignment padding. */
 #ifdef IEM_WITH_CODE_TLB
-    uint8_t                 abAlignment2b[1+4];                                                             /* 0x53 */
+    uint8_t                 abAlignment2c[0x48 - 0x47];                                                     /* 0x37 */
 #else
-    uint8_t                 abAlignment2b[1+28];                                                            /*       0x3b */
+    uint8_t                 abAlignment2c[0x48 - 0x2b];                                                     /*       0x2b */
 #endif
     /** @} */
 
 
     /** The flags of the current exception / interrupt. */
-    uint32_t                fCurXcpt;                                                                       /* 0x58, 0x58 */
+    uint32_t                fCurXcpt;                                                                       /* 0x48, 0x48 */
     /** The current exception / interrupt. */
     uint8_t                 uCurXcpt;
     /** Exception / interrupt recursion depth. */
@@ -585,6 +625,12 @@ typedef struct IEMCPU
     /** Pointer set jump buffer - raw-mode context. */
     RCPTRTYPE(jmp_buf *)    pJmpBufRC;
 
+    /** @todo Should move this near @a fCurXcpt later. */
+    /** The error code for the current exception / interrupt. */
+    uint32_t                uCurXcptErr;
+    /** The CR2 for the current exception / interrupt. */
+    uint64_t                uCurXcptCr2;
+
     /** @name Statistics
      * @{  */
     /** The number of instructions we've executed. */
@@ -664,7 +710,7 @@ typedef struct IEMCPU
     CPUMCPUVENDOR           enmHostCpuVendor;
     /** @} */
 
-    uint32_t                au32Alignment8[HC_ARCH_BITS == 64 ? 1 + 2 + 8 : 1 + 2]; /**< Alignment padding. */
+    uint32_t                au32Alignment8[HC_ARCH_BITS == 64 ? 4 + 8 : 4]; /**< Alignment padding. */
 
     /** Data TLB.
      * @remarks Must be 64-byte aligned. */
@@ -680,8 +726,15 @@ typedef struct IEMCPU
     R0PTRTYPE(PCPUMCTX)     pCtxR0;
     /** Pointer to the CPU context - raw-mode context. */
     RCPTRTYPE(PCPUMCTX)     pCtxRC;
-    /** Alignment padding. */
-    RTRCPTR                 uAlignment9;
+
+    /** Pointer to instruction statistics for raw-mode context (same as R0). */
+    RCPTRTYPE(PIEMINSTRSTATS) pStatsRC;
+    /** Pointer to instruction statistics for ring-0 context (same as RC). */
+    R0PTRTYPE(PIEMINSTRSTATS) pStatsR0;
+    /** Pointer to instruction statistics for non-ring-3 code. */
+    R3PTRTYPE(PIEMINSTRSTATS) pStatsCCR3;
+    /** Pointer to instruction statistics for ring-3 context. */
+    R3PTRTYPE(PIEMINSTRSTATS) pStatsR3;
 
 #ifdef IEM_VERIFICATION_MODE_FULL
     /** The event verification records for what IEM did (LIFO). */
@@ -696,6 +749,7 @@ typedef struct IEMCPU
     R3PTRTYPE(PIEMVERIFYEVTREC)     pFreeEvtRec;
 #endif
 } IEMCPU;
+AssertCompileMemberOffset(IEMCPU, fCurXcpt, 0x48);
 AssertCompileMemberAlignment(IEMCPU, DataTlb, 64);
 AssertCompileMemberAlignment(IEMCPU, CodeTlb, 64);
 /** Pointer to the per-CPU IEM state. */
@@ -805,9 +859,14 @@ typedef IEMCPU const *PCIEMCPU;
  * the first opcode byte.
  * For testing whether any REX prefix is present, use  IEM_OP_PRF_REX instead. */
 #define IEM_OP_PRF_REX_MASK  (IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W )
+
+#define IEM_OP_PRF_VEX                  RT_BIT_32(28) /**< Indiciates VEX prefix. */
+#define IEM_OP_PRF_EVEX                 RT_BIT_32(29) /**< Indiciates EVEX prefix. */
+#define IEM_OP_PRF_XOP                  RT_BIT_32(30) /**< Indiciates XOP prefix. */
 /** @} */
 
-/** @name Opcode forms
+/** @name IEMOPFORM_XXX - Opcode forms
+ * @note These are ORed together with IEMOPHINT_XXX.
  * @{ */
 /** ModR/M: reg, r/m */
 #define IEMOPFORM_RM            0
@@ -830,8 +889,33 @@ typedef IEMCPU const *PCIEMCPU;
 /** ModR/M: reg only */
 #define IEMOPFORM_R             3
 
+/** VEX+ModR/M: reg, r/m */
+#define IEMOPFORM_VEX_RM        4
+/** VEX+ModR/M: reg, r/m (register) */
+#define IEMOPFORM_VEX_RM_REG        (IEMOPFORM_VEX_RM | IEMOPFORM_MOD3)
+/** VEX+ModR/M: reg, r/m (memory)   */
+#define IEMOPFORM_VEX_RM_MEM        (IEMOPFORM_VEX_RM | IEMOPFORM_NOT_MOD3)
+/** VEX+ModR/M: r/m, reg */
+#define IEMOPFORM_VEX_MR        5
+/** VEX+ModR/M: r/m (register), reg */
+#define IEMOPFORM_VEX_MR_REG        (IEMOPFORM_VEX_MR | IEMOPFORM_MOD3)
+/** VEX+ModR/M: r/m (memory), reg */
+#define IEMOPFORM_VEX_MR_MEM        (IEMOPFORM_VEX_MR | IEMOPFORM_NOT_MOD3)
+/** VEX+ModR/M: r/m only */
+#define IEMOPFORM_VEX_M         6
+/** VEX+ModR/M: r/m only (register). */
+#define IEMOPFORM_VEX_M_REG         (IEMOPFORM_VEX_M | IEMOPFORM_MOD3)
+/** VEX+ModR/M: r/m only (memory). */
+#define IEMOPFORM_VEX_M_MEM         (IEMOPFORM_VEX_M | IEMOPFORM_NOT_MOD3)
+/** VEX+ModR/M: reg only */
+#define IEMOPFORM_VEX_R         7
+/** VEX+ModR/M: reg, vvvv, r/m */
+#define IEMOPFORM_VEX_RVM       8
+/** VEX+ModR/M: r/m, vvvv, reg */
+#define IEMOPFORM_VEX_MVR       9
+
 /** Fixed register instruction, no R/M. */
-#define IEMOPFORM_FIXED         4
+#define IEMOPFORM_FIXED         16
 
 /** The r/m is a register. */
 #define IEMOPFORM_MOD3          RT_BIT_32(8)
@@ -839,6 +923,17 @@ typedef IEMCPU const *PCIEMCPU;
 #define IEMOPFORM_NOT_MOD3      RT_BIT_32(9)
 /** @} */
 
+/** @name IEMOPHINT_XXX - Additional Opcode Hints
+ * @note These are ORed together with IEMOPFORM_XXX.
+ * @{ */
+/** Both the operand size prefixes are ignored. */
+#define IEMOPHINT_IGNORES_OP_SIZE   RT_BIT_32(10)
+/** Allowed with the lock prefix. */
+#define IEMOPHINT_LOCK_ALLOWED      RT_BIT_32(11)
+/** Hint to IEMAllInstructionPython.py that this macro should be skipped.  */
+#define IEMOPHINT_SKIP_PYTHON       RT_BIT_32(31)
+/** @} */
+
 /**
  * Possible hardware task switch sources.
  */
@@ -855,6 +950,20 @@ typedef enum IEMTASKSWITCH
 } IEMTASKSWITCH;
 AssertCompileSize(IEMTASKSWITCH, 4);
 
+/**
+ * Possible CrX load (write) sources.
+ */
+typedef enum IEMACCESSCRX
+{
+    /** CrX access caused by 'mov crX' instruction. */
+    IEMACCESSCRX_MOV_CRX,
+    /** CrX (CR0) write caused by 'lmsw' instruction. */
+    IEMACCESSCRX_LMSW,
+    /** CrX (CR0) write caused by 'clts' instruction. */
+    IEMACCESSCRX_CLTS,
+    /** CrX (CR0) read caused by 'smsw' instruction. */
+    IEMACCESSCRX_SMSW
+} IEMACCESSCRX;
 
 /**
  * Tests if verification mode is enabled.
@@ -1079,10 +1188,12 @@ IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64Ea
                                             uint32_t *pEFlags));
 IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
                                                    uint32_t *pEFlags));
-IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U *pu128Dst, PRTUINT128U pu64RaxRdx, PRTUINT128U pu64RbxRcx,
+IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
                                              uint32_t *pEFlags));
-IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U *pu128Dst, PRTUINT128U pu64RaxRdx, PRTUINT128U pu64RbxRcx,
+IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
                                                     uint32_t *pEFlags));
+IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
+                                                      PRTUINT128U pu128RbxRcx, uint32_t *pEFlags));
 /** @} */
 
 /** @name Memory ordering
@@ -1317,6 +1428,7 @@ FNIEMAIMPLFPUR80            iemAImpl_fprem1_r80_by_r80;
 FNIEMAIMPLFPUR80            iemAImpl_fscale_r80_by_r80;
 
 FNIEMAIMPLFPUR80            iemAImpl_fpatan_r80_by_r80;
+FNIEMAIMPLFPUR80            iemAImpl_fyl2x_r80_by_r80;
 FNIEMAIMPLFPUR80            iemAImpl_fyl2xp1_r80_by_r80;
 
 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR80FSW,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
@@ -1336,7 +1448,6 @@ typedef FNIEMAIMPLFPUR80UNARY *PFNIEMAIMPLFPUR80UNARY;
 FNIEMAIMPLFPUR80UNARY       iemAImpl_fabs_r80;
 FNIEMAIMPLFPUR80UNARY       iemAImpl_fchs_r80;
 FNIEMAIMPLFPUR80UNARY       iemAImpl_f2xm1_r80;
-FNIEMAIMPLFPUR80UNARY       iemAImpl_fyl2x_r80;
 FNIEMAIMPLFPUR80UNARY       iemAImpl_fsqrt_r80;
 FNIEMAIMPLFPUR80UNARY       iemAImpl_frndint_r80;
 FNIEMAIMPLFPUR80UNARY       iemAImpl_fsin_r80;
@@ -1452,7 +1563,7 @@ typedef IEMVMM256 *PCIEMVMM256;
  * @{ */
 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF2U64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src));
 typedef FNIEMAIMPLMEDIAF2U64   *PFNIEMAIMPLMEDIAF2U64;
-typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF2U128,(PCX86FXSTATE pFpuState, uint128_t *pu128Dst, uint128_t const *pu128Src));
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF2U128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src));
 typedef FNIEMAIMPLMEDIAF2U128  *PFNIEMAIMPLMEDIAF2U128;
 FNIEMAIMPLMEDIAF2U64  iemAImpl_pxor_u64,  iemAImpl_pcmpeqb_u64,  iemAImpl_pcmpeqw_u64,  iemAImpl_pcmpeqd_u64;
 FNIEMAIMPLMEDIAF2U128 iemAImpl_pxor_u128, iemAImpl_pcmpeqb_u128, iemAImpl_pcmpeqw_u128, iemAImpl_pcmpeqd_u128;
@@ -1462,7 +1573,7 @@ FNIEMAIMPLMEDIAF2U128 iemAImpl_pxor_u128, iemAImpl_pcmpeqb_u128, iemAImpl_pcmpeq
  * @{ */
 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1L1U64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src));
 typedef FNIEMAIMPLMEDIAF1L1U64   *PFNIEMAIMPLMEDIAF1L1U64;
-typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1L1U128,(PCX86FXSTATE pFpuState, uint128_t *pu128Dst, uint64_t const *pu64Src));
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1L1U128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src));
 typedef FNIEMAIMPLMEDIAF1L1U128  *PFNIEMAIMPLMEDIAF1L1U128;
 FNIEMAIMPLMEDIAF1L1U64  iemAImpl_punpcklbw_u64,  iemAImpl_punpcklwd_u64,  iemAImpl_punpckldq_u64;
 FNIEMAIMPLMEDIAF1L1U128 iemAImpl_punpcklbw_u128, iemAImpl_punpcklwd_u128, iemAImpl_punpckldq_u128, iemAImpl_punpcklqdq_u128;
@@ -1472,7 +1583,7 @@ FNIEMAIMPLMEDIAF1L1U128 iemAImpl_punpcklbw_u128, iemAImpl_punpcklwd_u128, iemAIm
  * @{ */
 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1H1U64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src));
 typedef FNIEMAIMPLMEDIAF2U64   *PFNIEMAIMPLMEDIAF1H1U64;
-typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1H1U128,(PCX86FXSTATE pFpuState, uint128_t *pu128Dst, uint128_t const *pu128Src));
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1H1U128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src));
 typedef FNIEMAIMPLMEDIAF2U128  *PFNIEMAIMPLMEDIAF1H1U128;
 FNIEMAIMPLMEDIAF1H1U64  iemAImpl_punpckhbw_u64,  iemAImpl_punpckhwd_u64,  iemAImpl_punpckhdq_u64;
 FNIEMAIMPLMEDIAF1H1U128 iemAImpl_punpckhbw_u128, iemAImpl_punpckhwd_u128, iemAImpl_punpckhdq_u128, iemAImpl_punpckhqdq_u128;
@@ -1480,8 +1591,8 @@ FNIEMAIMPLMEDIAF1H1U128 iemAImpl_punpckhbw_u128, iemAImpl_punpckhwd_u128, iemAIm
 
 /** @name Media (SSE/MMX/AVX) operation: Packed Shuffle Stuff (evil)
  * @{ */
-typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAPSHUF,(PCX86FXSTATE pFpuState, uint128_t *pu128Dst,
-                                                       uint128_t const *pu128Src, uint8_t bEvil));
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAPSHUF,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst,
+                                                       PCRTUINT128U pu128Src, uint8_t bEvil));
 typedef FNIEMAIMPLMEDIAPSHUF *PFNIEMAIMPLMEDIAPSHUF;
 FNIEMAIMPLMEDIAPSHUF iemAImpl_pshufhw, iemAImpl_pshuflw, iemAImpl_pshufd;
 IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src, uint8_t bEvil));
@@ -1490,9 +1601,14 @@ IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64D
 /** @name Media (SSE/MMX/AVX) operation: Move Byte Mask
  * @{ */
 IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src));
-IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint128_t const *pu128Src));
+IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, PCRTUINT128U pu128Src));
 /** @} */
 
+/** @name Media (SSE/MMX/AVX) operation: Sort this later
+ * @{ */
+IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc));
+IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc));
+/** @} */
 
 
 /** @name Function tables.
diff --git a/src/VBox/VMM/include/PDMAsyncCompletionFileInternal.h b/src/VBox/VMM/include/PDMAsyncCompletionFileInternal.h
index d4c2467..d19d22f 100644
--- a/src/VBox/VMM/include/PDMAsyncCompletionFileInternal.h
+++ b/src/VBox/VMM/include/PDMAsyncCompletionFileInternal.h
@@ -483,6 +483,8 @@ typedef struct PDMACTASKFILE
     PDMACTASKFILETRANSFER                enmTransferType;
     /** Start offset */
     RTFOFF                               Off;
+    /** Amount of data transfered so far. */
+    size_t                               cbTransfered;
     /** Data segment. */
     RTSGSEG                              DataSeg;
     /** When non-zero the segment uses a bounce buffer because the provided buffer
diff --git a/src/VBox/VMM/include/PGMInline.h b/src/VBox/VMM/include/PGMInline.h
index fbc428a..fb8d9b6 100644
--- a/src/VBox/VMM/include/PGMInline.h
+++ b/src/VBox/VMM/include/PGMInline.h
@@ -720,7 +720,7 @@ DECLINLINE(PX86PDPE) pgmGstGetPaePDPEPtr(PVMCPU pVCpu, RTGCPTR GCPtr)
             return NULL;
     }
 #endif
-    return &pGuestPDPT->a[(GCPtr >> X86_PDPT_SHIFT) & X86_PDPT_MASK_PAE];
+    return &pGuestPDPT->a[(uint32_t)GCPtr >> X86_PDPT_SHIFT];
 }
 
 
@@ -738,7 +738,7 @@ DECLINLINE(X86PDEPAE) pgmGstGetPaePDE(PVMCPU pVCpu, RTGCPTR GCPtr)
     PX86PDPT    pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu);
     if (RT_LIKELY(pGuestPDPT))
     {
-        const unsigned iPdpt = (GCPtr >> X86_PDPT_SHIFT) & X86_PDPT_MASK_PAE;
+        const unsigned iPdpt = (uint32_t)GCPtr >> X86_PDPT_SHIFT;
         if (    pGuestPDPT->a[iPdpt].n.u1Present
             &&  !(pGuestPDPT->a[iPdpt].u & pVCpu->pgm.s.fGstPaeMbzPdpeMask) )
         {
@@ -787,7 +787,7 @@ DECLINLINE(PX86PDPAE) pgmGstGetPaePDPtr(PVMCPU pVCpu, RTGCPTR GCPtr, unsigned *p
     PX86PDPT        pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu);
     if (RT_UNLIKELY(!pGuestPDPT))
         return NULL;
-    const unsigned  iPdpt = (GCPtr >> X86_PDPT_SHIFT) & X86_PDPT_MASK_PAE;
+    const unsigned  iPdpt = (uint32_t)GCPtr >> X86_PDPT_SHIFT;
     if (pPdpe)
         *pPdpe = pGuestPDPT->a[iPdpt];
     if (!pGuestPDPT->a[iPdpt].n.u1Present)
@@ -1007,15 +1007,13 @@ DECLINLINE(PX86PD) pgmShwGet32BitPDPtr(PVMCPU pVCpu)
  */
 DECLINLINE(X86PDE) pgmShwGet32BitPDE(PVMCPU pVCpu, RTGCPTR GCPtr)
 {
-    const unsigned iPd = (GCPtr >> X86_PD_SHIFT) & X86_PD_MASK;
-
     PX86PD pShwPde = pgmShwGet32BitPDPtr(pVCpu);
     if (!pShwPde)
     {
         X86PDE ZeroPde = {0};
         return ZeroPde;
     }
-    return pShwPde->a[iPd];
+    return pShwPde->a[(uint32_t)GCPtr >> X86_PD_SHIFT];
 }
 
 
@@ -1029,11 +1027,9 @@ DECLINLINE(X86PDE) pgmShwGet32BitPDE(PVMCPU pVCpu, RTGCPTR GCPtr)
  */
 DECLINLINE(PX86PDE) pgmShwGet32BitPDEPtr(PVMCPU pVCpu, RTGCPTR GCPtr)
 {
-    const unsigned iPd = (GCPtr >> X86_PD_SHIFT) & X86_PD_MASK;
-
     PX86PD pPde = pgmShwGet32BitPDPtr(pVCpu);
     AssertReturn(pPde, NULL);
-    return &pPde->a[iPd];
+    return &pPde->a[(uint32_t)GCPtr >> X86_PD_SHIFT];
 }
 
 
@@ -1058,7 +1054,7 @@ DECLINLINE(PX86PDPT) pgmShwGetPaePDPTPtr(PVMCPU pVCpu)
  */
 DECLINLINE(PX86PDPAE) pgmShwGetPaePDPtr(PVMCPU pVCpu, RTGCPTR GCPtr)
 {
-    const unsigned  iPdpt = (GCPtr >> X86_PDPT_SHIFT) & X86_PDPT_MASK_PAE;
+    const unsigned  iPdpt = (uint32_t)GCPtr >> X86_PDPT_SHIFT;
     PX86PDPT        pPdpt = pgmShwGetPaePDPTPtr(pVCpu);
 
     if (!pPdpt->a[iPdpt].n.u1Present)
@@ -1083,7 +1079,7 @@ DECLINLINE(PX86PDPAE) pgmShwGetPaePDPtr(PVMCPU pVCpu, RTGCPTR GCPtr)
  */
 DECLINLINE(PX86PDPAE) pgmShwGetPaePDPtr(PVMCPU pVCpu, PX86PDPT pPdpt, RTGCPTR GCPtr)
 {
-    const unsigned  iPdpt = (GCPtr >> X86_PDPT_SHIFT) & X86_PDPT_MASK_PAE;
+    const unsigned  iPdpt = (uint32_t)GCPtr >> X86_PDPT_SHIFT;
 
     if (!pPdpt->a[iPdpt].n.u1Present)
         return NULL;
diff --git a/src/VBox/VMM/include/PGMInternal.h b/src/VBox/VMM/include/PGMInternal.h
index 1d2e46a..9d5ce06 100644
--- a/src/VBox/VMM/include/PGMInternal.h
+++ b/src/VBox/VMM/include/PGMInternal.h
@@ -2781,11 +2781,11 @@ typedef struct PGMPTWALKCORE
     bool            fBigPage;
     /** Set if it involves a gigantic page (1 GB). */
     bool            fGigantPage;
-    /** The effect X86_PTE_US flag for the address. */
+    /** The effective X86_PTE_US flag for the address. */
     bool            fEffectiveUS;
-    /** The effect X86_PTE_RW flag for the address. */
+    /** The effective X86_PTE_RW flag for the address. */
     bool            fEffectiveRW;
-    /** The effect X86_PTE_NX flag for the address. */
+    /** The effective X86_PTE_NX flag for the address. */
     bool            fEffectiveNX;
 } PGMPTWALKCORE;
 
@@ -3880,7 +3880,7 @@ typedef struct PGMCPU
     /** The guest paging mode. */
     PGMMODE                         enmGuestMode;
 
-    /** The current physical address representing in the guest CR3 register. */
+    /** The current physical address represented in the guest CR3 register. */
     RTGCPHYS                        GCPhysCR3;
 
     /** @name 32-bit Guest Paging.
@@ -4105,6 +4105,7 @@ typedef PGMCPU *PPGMCPU;
 
 
 /** @name PGM::fSyncFlags Flags
+ * @note Was part of saved state a long time ago.
  * @{
  */
 /** Updates the virtual access handler state bit in PGMPAGE. */
diff --git a/src/VBox/VMM/testcase/Instructions/InstructionTestGen.py b/src/VBox/VMM/testcase/Instructions/InstructionTestGen.py
index 44a61d1..ce27467 100755
--- a/src/VBox/VMM/testcase/Instructions/InstructionTestGen.py
+++ b/src/VBox/VMM/testcase/Instructions/InstructionTestGen.py
@@ -20,7 +20,7 @@ Foundation, in version 2 as it comes in the "COPYING" file of the
 VirtualBox OSE distribution. VirtualBox OSE is distributed in the
 hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
 """
-__version__ = "$Revision: 100868 $";
+__version__ = "$Revision: 114778 $";
 
 
 # pylint: disable=C0103,R0913
@@ -88,8 +88,8 @@ X86_OP_PRF_GS       = 0x65;
 X86_OP_PRF_SIZE_OP  = 0x66;
 X86_OP_PRF_SIZE_ADDR = 0x67;
 X86_OP_PRF_LOCK     = 0xf0;
-X86_OP_PRF_REPZ     = 0xf2;
-X86_OP_PRF_REPNZ    = 0xf3;
+X86_OP_PRF_REPNZ    = 0xf2;
+X86_OP_PRF_REPZ     = 0xf3;
 X86_OP_REX_B        = 0x41;
 X86_OP_REX_X        = 0x42;
 X86_OP_REX_R        = 0x44;
diff --git a/src/VBox/VMM/testcase/tstIEMCheckMc.cpp b/src/VBox/VMM/testcase/tstIEMCheckMc.cpp
index e317af0..56f8e37 100644
--- a/src/VBox/VMM/testcase/tstIEMCheckMc.cpp
+++ b/src/VBox/VMM/testcase/tstIEMCheckMc.cpp
@@ -26,6 +26,7 @@
 #include <VBox/types.h>
 #include <VBox/err.h>
 #include <VBox/log.h>
+#define IN_TSTVMSTRUCT 1
 #include "../include/IEMInternal.h"
 #include <VBox/vmm/vm.h>
 
@@ -35,7 +36,7 @@
 *********************************************************************************************************************************/
 bool volatile       g_fRandom;
 uint8_t volatile    g_bRandom;
-uint128_t           g_u128Zero;
+RTUINT128U          g_u128Zero;
 
 
 /** For hacks.  */
@@ -117,16 +118,31 @@ typedef VBOXSTRICTRC (* PFNIEMOPRM)(PVMCPU pVCpu, uint8_t bRm);
 #define IEMOP_HLP_64BIT_OP_SIZE()                           do { } while (0)
 #define IEMOP_HLP_DEFAULT_64BIT_OP_SIZE()                   do { } while (0)
 #define IEMOP_HLP_CLEAR_REX_NOT_BEFORE_OPCODE(a_szPrf)      do { } while (0)
-#define IEMOP_HLP_DONE_DECODING()                           do { } while (0)
+#define IEMOP_HLP_DONE_VEX_DECODING_L_ZERO_NO_VVV()         do { } while (0)
 #define IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX()            do { } while (0)
 #define IEMOP_HLP_DONE_DECODING_NO_LOCK_REPZ_OR_REPNZ_PREFIXES()                                    do { } while (0)
+#define IEMOP_HLP_DONE_DECODING()                           do { } while (0)
+#define IEMOP_HLP_DONE_VEX_DECODING()                       do { } while (0)
+
+#define IEMOP_HLP_SVM_CTRL_INTERCEPT(a_pVCpu, a_Intercept, a_uExitCode, a_uExitInfo1, a_uExitInfo2) do { } while (0)
+#define IEMOP_HLP_SVM_READ_CR_INTERCEPT(a_pVCpu, a_uCr, a_uExitInfo1, a_uExitInfo2)                 do { } while (0)
+
 #define IEMOP_HLP_DECODED_NL_1(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_fDisOpType)               do { } while (0)
 #define IEMOP_HLP_DECODED_NL_2(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_uDisParam1, a_fDisOpType) do { } while (0)
 #define IEMOP_RAISE_DIVIDE_ERROR()                          VERR_TRPM_ACTIVE_TRAP
 #define IEMOP_RAISE_INVALID_OPCODE()                        VERR_TRPM_ACTIVE_TRAP
 #define IEMOP_RAISE_INVALID_LOCK_PREFIX()                   VERR_TRPM_ACTIVE_TRAP
-#define IEMOP_MNEMONIC(a_szMnemonic)                        do { } while (0)
-#define IEMOP_MNEMONIC2(a_szMnemonic, a_szOps)              do { } while (0)
+#define IEMOP_MNEMONIC(a_Stats, a_szMnemonic)               do { } while (0)
+#define IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints) do { } while (0)
+#define IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints) do { } while (0)
+#define IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) do { } while (0)
+#define IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) do { } while (0)
+#define IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints) do { } while (0)
+#define IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)                         do { } while (0)
+#define IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)                  do { } while (0)
+#define IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)           do { } while (0)
+#define IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)    do { } while (0)
+#define IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)    do { } while (0)
 #define IEMOP_BITCH_ABOUT_STUB()                            do { } while (0)
 #define FNIEMOP_STUB(a_Name) \
     FNIEMOP_DEF(a_Name) { return VERR_NOT_IMPLEMENTED; } \
@@ -239,6 +255,7 @@ IEMOPMEDIAF2 g_iemAImpl_pcmpeqd;
 #define iemAImpl_fscale_r80_by_r80      NULL
 
 #define iemAImpl_fpatan_r80_by_r80      NULL
+#define iemAImpl_fyl2x_r80_by_r80       NULL
 #define iemAImpl_fyl2xp1_r80_by_r80     NULL
 
 #define iemAImpl_fcom_r80_by_r80        NULL
@@ -248,7 +265,6 @@ IEMOPMEDIAF2 g_iemAImpl_pcmpeqd;
 #define iemAImpl_ftst_r80               NULL
 #define iemAImpl_fxam_r80               NULL
 #define iemAImpl_f2xm1_r80              NULL
-#define iemAImpl_fyl2x_r80              NULL
 #define iemAImpl_fsqrt_r80              NULL
 #define iemAImpl_frndint_r80            NULL
 #define iemAImpl_fsin_r80               NULL
@@ -334,7 +350,10 @@ IEMOPMEDIAF2 g_iemAImpl_pcmpeqd;
 #define IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT() do {} while (0)
 #define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT()           do {} while (0)
 #define IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT()          do {} while (0)
+#define IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT()          do {} while (0)
 #define IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO()              do {} while (0)
+#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
+    do { AssertCompile(RT_IS_POWER_OF_TWO(a_cbAlign)); CHK_TYPE(RTGCPTR,  a_EffAddr); } while (0)
 
 #define IEM_MC_LOCAL(a_Type, a_Name) \
     a_Type a_Name; NOREF(a_Name)
@@ -482,16 +501,17 @@ IEMOPMEDIAF2 g_iemAImpl_pcmpeqd;
 #define IEM_MC_REF_MREG_U64_CONST(a_pu64Dst, a_iMReg)       do { (a_pu64Dst) = (uint64_t const *)((uintptr_t)0); CHK_PTYPE(uint64_t const *, a_pu64Dst); (void)fFpuWrite; } while (0)
 #define IEM_MC_REF_MREG_U32_CONST(a_pu32Dst, a_iMReg)       do { (a_pu32Dst) = (uint32_t const *)((uintptr_t)0); CHK_PTYPE(uint32_t const *, a_pu32Dst); (void)fFpuWrite; } while (0)
 
-#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg)        do { (a_u128Value) = g_u128Zero; CHK_TYPE(uint128_t, a_u128Value); (void)fSseRead;  } while (0)
+#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg)        do { (a_u128Value) = g_u128Zero; CHK_TYPE(RTUINT128U, a_u128Value); (void)fSseRead;  } while (0)
 #define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg)          do { (a_u64Value) = 0; CHK_TYPE(uint64_t, a_u64Value); (void)fSseRead; } while (0)
 #define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg)          do { (a_u32Value) = 0; CHK_TYPE(uint32_t, a_u32Value); (void)fSseRead; } while (0)
-#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value)        do { CHK_TYPE(uint128_t, a_u128Value); (void)fSseWrite; } while (0)
+#define IEM_MC_FETCH_XREG_HI_U64(a_u64Value, a_iXReg)       do { (a_u64Value) = 0; CHK_TYPE(uint64_t, a_u64Value); (void)fSseRead; } while (0)
+#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value)        do { CHK_TYPE(RTUINT128U, a_u128Value); (void)fSseWrite; } while (0)
 #define IEM_MC_STORE_XREG_U64(a_iXReg, a_u64Value)          do { CHK_TYPE(uint64_t,  a_u64Value);  (void)fSseWrite; } while (0)
 #define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value)  do { CHK_TYPE(uint64_t,  a_u64Value);  (void)fSseWrite; } while (0)
 #define IEM_MC_STORE_XREG_U32(a_iXReg, a_u32Value)          do { CHK_TYPE(uint32_t,  a_u32Value);  (void)fSseWrite; } while (0)
 #define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value)  do { CHK_TYPE(uint32_t,  a_u32Value);  (void)fSseWrite; } while (0)
-#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg)           do { (a_pu128Dst) = (uint128_t *)((uintptr_t)0);        CHK_PTYPE(uint128_t *, a_pu128Dst);       (void)fSseWrite; } while (0)
-#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg)     do { (a_pu128Dst) = (uint128_t const *)((uintptr_t)0);  CHK_PTYPE(uint128_t const *, a_pu128Dst); (void)fSseWrite; } while (0)
+#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg)           do { (a_pu128Dst) = (PRTUINT128U)((uintptr_t)0);        CHK_PTYPE(PRTUINT128U, a_pu128Dst);       (void)fSseWrite; } while (0)
+#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg)     do { (a_pu128Dst) = (PCRTUINT128U)((uintptr_t)0);  CHK_PTYPE(PCRTUINT128U, a_pu128Dst); (void)fSseWrite; } while (0)
 #define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg)       do { (a_pu64Dst)  = (uint64_t const *)((uintptr_t)0);   CHK_PTYPE(uint64_t const *, a_pu64Dst);   (void)fSseWrite; } while (0)
 #define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc)       do { (void)fSseWrite; } while (0)
 
@@ -531,8 +551,8 @@ IEMOPMEDIAF2 g_iemAImpl_pcmpeqd;
 #define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem)              do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTFLOAT32U, a_r32Dst);} while (0)
 #define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem)              do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTFLOAT64U, a_r64Dst);} while (0)
 #define IEM_MC_FETCH_MEM_R80(a_r80Dst, a_iSeg, a_GCPtrMem)              do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTFLOAT80U, a_r80Dst);} while (0)
-#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem)            do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(uint128_t, a_u128Dst);} while (0)
-#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem)  do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(uint128_t, a_u128Dst);} while (0)
+#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem)            do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTUINT128U, a_u128Dst);} while (0)
+#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem)  do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTUINT128U, a_u128Dst);} while (0)
 
 #define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value)              do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(uint8_t,  a_u8Value); CHK_SEG_IDX(a_iSeg); } while (0)
 #define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value)            do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(uint16_t, a_u16Value);      } while (0)
@@ -549,8 +569,8 @@ IEMOPMEDIAF2 g_iemAImpl_pcmpeqd;
 #define IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF(a_pr32Dst)                 do { CHK_TYPE(PRTFLOAT32U, a_pr32Dst); } while (0)
 #define IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF(a_pr64Dst)                 do { CHK_TYPE(PRTFLOAT64U, a_pr64Dst); } while (0)
 #define IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF(a_pr80Dst)                 do { CHK_TYPE(PRTFLOAT80U, a_pr80Dst); } while (0)
-#define IEM_MC_STORE_MEM_U128(a_iSeg, a_GCPtrMem, a_u128Dst)            do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(uint128_t, a_u128Dst); CHK_SEG_IDX(a_iSeg);} while (0)
-#define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Dst)  do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(uint128_t, a_u128Dst); CHK_SEG_IDX(a_iSeg);} while (0)
+#define IEM_MC_STORE_MEM_U128(a_iSeg, a_GCPtrMem, a_u128Dst)            do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTUINT128U, a_u128Dst); CHK_SEG_IDX(a_iSeg);} while (0)
+#define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Dst)  do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTUINT128U, a_u128Dst); CHK_SEG_IDX(a_iSeg);} while (0)
 
 #define IEM_MC_PUSH_U16(a_u16Value)                                     do {} while (0)
 #define IEM_MC_PUSH_U32(a_u32Value)                                     do {} while (0)
diff --git a/src/VBox/VMM/testcase/tstVMStructDTrace.cpp b/src/VBox/VMM/testcase/tstVMStructDTrace.cpp
index 3b4d152..b2f2635 100644
--- a/src/VBox/VMM/testcase/tstVMStructDTrace.cpp
+++ b/src/VBox/VMM/testcase/tstVMStructDTrace.cpp
@@ -21,6 +21,7 @@
 /*********************************************************************************************************************************
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
+#define IN_TSTVMSTRUCT 1
 #define IN_TSTVMSTRUCTGC 1
 #include <VBox/vmm/cfgm.h>
 #include <VBox/vmm/cpum.h>
diff --git a/src/VBox/VMM/testcase/tstVMStructRC.cpp b/src/VBox/VMM/testcase/tstVMStructRC.cpp
index 642518f..4707f08 100644
--- a/src/VBox/VMM/testcase/tstVMStructRC.cpp
+++ b/src/VBox/VMM/testcase/tstVMStructRC.cpp
@@ -49,6 +49,7 @@ AssertCompileSize(RTHCPHYS, 8);
 /*********************************************************************************************************************************
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
+#define IN_TSTVMSTRUCT 1
 #define IN_TSTVMSTRUCTGC 1
 #include <VBox/vmm/cfgm.h>
 #include <VBox/vmm/cpum.h>
diff --git a/src/VBox/VMM/testcase/tstVMStructSize.cpp b/src/VBox/VMM/testcase/tstVMStructSize.cpp
index 13ec0f2..52477fb 100644
--- a/src/VBox/VMM/testcase/tstVMStructSize.cpp
+++ b/src/VBox/VMM/testcase/tstVMStructSize.cpp
@@ -21,6 +21,7 @@
 /*********************************************************************************************************************************
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
+#define IN_TSTVMSTRUCT 1
 #include <VBox/vmm/cfgm.h>
 #include <VBox/vmm/cpum.h>
 #include <VBox/vmm/mm.h>
diff --git a/src/VBox/VMM/tools/VBoxCpuReport.cpp b/src/VBox/VMM/tools/VBoxCpuReport.cpp
index b3cc4c8..83bede5 100644
--- a/src/VBox/VMM/tools/VBoxCpuReport.cpp
+++ b/src/VBox/VMM/tools/VBoxCpuReport.cpp
@@ -70,6 +70,8 @@ static bool             g_fIntelNetBurst = false;
 static PRTSTREAM        g_pReportOut;
 /** The alternative debug stream. */
 static PRTSTREAM        g_pDebugOut;
+/** Whether to skip MSR collection.   */
+static bool             g_fNoMsrs = false;
 
 /** Snooping info storage for vbCpuRepGuessScalableBusFrequencyName. */
 static uint64_t         g_uMsrIntelP6FsbFrequency = UINT64_MAX;
@@ -162,16 +164,19 @@ static int vbCpuRepMsrsAddOne(VBCPUREPMSR **ppaMsrs, uint32_t *pcMsrs,
 static uint8_t vbCpuRepGetPhysAddrWidth(void)
 {
     uint8_t  cMaxWidth;
-    uint32_t cMaxExt = ASMCpuId_EAX(0x80000000);
     if (!ASMHasCpuId())
         cMaxWidth = 32;
-    else if (ASMIsValidExtRange(cMaxExt)&& cMaxExt >= 0x80000008)
-        cMaxWidth = ASMCpuId_EAX(0x80000008) & 0xff;
-    else if (   ASMIsValidStdRange(ASMCpuId_EAX(0))
-             && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_PSE36))
-        cMaxWidth = 36;
     else
-        cMaxWidth = 32;
+    {
+        uint32_t cMaxExt = ASMCpuId_EAX(0x80000000);
+        if (ASMIsValidExtRange(cMaxExt)&& cMaxExt >= 0x80000008)
+            cMaxWidth = ASMCpuId_EAX(0x80000008) & 0xff;
+        else if (   ASMIsValidStdRange(ASMCpuId_EAX(0))
+                 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_PSE36))
+            cMaxWidth = 36;
+        else
+            cMaxWidth = 32;
+    }
     return cMaxWidth;
 }
 
@@ -518,7 +523,7 @@ static int findMsrs(VBCPUREPMSR **ppaMsrs, uint32_t *pcMsrs, uint32_t fMsrMask)
                     && g_enmVendor == CPUMCPUVENDOR_INTEL
                     && g_enmMicroarch == kCpumMicroarch_Intel_Atom_Bonnell)
                 || (   (uMsr == 0x280)
-                    && g_enmMicroarch == kCpumMicroarch_Intel_P6_III)) 
+                    && g_enmMicroarch == kCpumMicroarch_Intel_P6_III))
                 vbCpuRepDebug("Skipping %#x\n", uMsr);
             else
             {
@@ -4330,6 +4335,11 @@ static int probeMsrs(bool fHacking, const char *pszNameC, const char *pszCpuDesc
         vbCpuRepDebug("Skipping MSR probing, CPUID indicates there isn't any MSR support.\n");
         return VINF_SUCCESS;
     }
+    if (g_fNoMsrs)
+    {
+        vbCpuRepDebug("Skipping MSR probing (--no-msr).\n");
+        return VINF_SUCCESS;
+    }
 
     /*
      * Initialize the support library and check if we can read MSRs.
@@ -4717,6 +4727,7 @@ static int produceCpuReport(void)
                    "    /*.uScalableBusFreq = */ CPUM_SBUSFREQ_%s,\n"
                    "    /*.fFlags           = */ 0,\n"
                    "    /*.cMaxPhysAddrWidth= */ %u,\n"
+                   "    /*.fMxCsrMask       = */ %#010x,\n"
                    "    /*.paCpuIdLeaves    = */ NULL_ALONE(g_aCpuIdLeaves_%s),\n"
                    "    /*.cCpuIdLeaves     = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_%s)),\n"
                    "    /*.enmUnknownCpuId  = */ CPUMUNKNOWNCPUID_%s,\n"
@@ -4739,6 +4750,7 @@ static int produceCpuReport(void)
                    CPUMR3MicroarchName(enmMicroarch),
                    vbCpuRepGuessScalableBusFrequencyName(),
                    vbCpuRepGetPhysAddrWidth(),
+                   CPUMR3DeterminHostMxCsrMask(),
                    szNameC,
                    szNameC,
                    CPUMR3CpuIdUnknownLeafMethodName(enmUnknownMethod),
@@ -4769,6 +4781,7 @@ int main(int argc, char **argv)
     {
         { "--msrs-only", 'm', RTGETOPT_REQ_NOTHING },
         { "--msrs-dev",  'd', RTGETOPT_REQ_NOTHING },
+        { "--no-msrs",   'n', RTGETOPT_REQ_NOTHING },
         { "--output",    'o', RTGETOPT_REQ_STRING  },
         { "--log",       'l', RTGETOPT_REQ_STRING  },
     };
@@ -4800,6 +4813,10 @@ int main(int argc, char **argv)
                 enmOp = kCpuReportOp_MsrsHacking;
                 break;
 
+            case 'n':
+                g_fNoMsrs = true;
+                break;
+
             case 'o':
                 pszOutput = ValueUnion.psz;
                 break;
@@ -4809,7 +4826,7 @@ int main(int argc, char **argv)
                 break;
 
             case 'h':
-                RTPrintf("Usage: VBoxCpuReport [-m|--msrs-only] [-d|--msrs-dev] [-h|--help] [-V|--version] [-o filename.h] [-l debug.log]\n");
+                RTPrintf("Usage: VBoxCpuReport [-m|--msrs-only] [-d|--msrs-dev] [-n|--no-msrs] [-h|--help] [-V|--version] [-o filename.h] [-l debug.log]\n");
                 RTPrintf("Internal tool for gathering information to the VMM CPU database.\n");
                 return RTEXITCODE_SUCCESS;
             case 'V':
diff --git a/src/VBox/ValidationKit/bootsectors/Config.kmk b/src/VBox/ValidationKit/bootsectors/Config.kmk
index 9248d3a..50c8ef3 100644
--- a/src/VBox/ValidationKit/bootsectors/Config.kmk
+++ b/src/VBox/ValidationKit/bootsectors/Config.kmk
@@ -404,7 +404,6 @@ TOOL_Bs3Ow32_COMPILE_C_OUTPUT_MAYBE   = $(TOOL_OPENWATCOM_COMPILE_C_OUTPUT_MAYBE
 define TOOL_Bs3Ow32_COMPILE_C_CMDS
 $(TOOL_OPENWATCOM_COMPILE_C_CMDS)
 	$(QUIET)$(VBoxBs3ObjConverter_1_TARGET) "$(obj)"
-	$(QUIET)$(REDIRECT) -wo /dev/null -- $(KBUILD_BIN_PATH)/kDepObj$(HOSTSUFF_EXE) -s -f -q -o "$(dep)" -t "$(obj)" "$(obj)"
 endef
 
 TOOL_Bs3Ow32_C32OBJSUFF                 = $(TOOL_Bs3Ow32_COBJSUFF)
@@ -438,7 +437,6 @@ TOOL_Bs3Ow32_COMPILE_CXX_OUTPUT_MAYBE = $(TOOL_OPENWATCOM_COMPILE_CXX_OUTPUT_MAY
 define TOOL_Bs3Ow32_COMPILE_CXX_CMDS
 $(TOOL_OPENWATCOM_COMPILE_CXX_CMDS)
 	$(QUIET)$(VBoxBs3ObjConverter_1_TARGET) "$(obj)"
-	$(QUIET)$(REDIRECT) -wo /dev/null -- $(KBUILD_BIN_PATH)/kDepObj$(HOSTSUFF_EXE) -s -f -q -o "$(dep)" -t "$(obj)" "$(obj)"
 endef
 
 
@@ -464,7 +462,6 @@ TOOL_Bs3Ow16_COMPILE_C_OUTPUT_MAYBE   = $(TOOL_OPENWATCOM-16_COMPILE_C_OUTPUT_MA
 define TOOL_Bs3Ow16_COMPILE_C_CMDS
 $(TOOL_OPENWATCOM-16_COMPILE_C_CMDS)
 	$(QUIET)$(VBoxBs3ObjConverter_1_TARGET) "$(obj)"
-	$(QUIET)$(REDIRECT) -wo /dev/null -- $(KBUILD_BIN_PATH)/kDepObj$(HOSTSUFF_EXE) -s -f -q -o "$(dep)" -t "$(obj)" "$(obj)"
 endef
 
 TOOL_Bs3Ow16_C16OBJSUFF                 = $(TOOL_Bs3Ow16_C16OBJSUFF)
@@ -498,7 +495,6 @@ TOOL_Bs3Ow16_COMPILE_CXX_OUTPUT_MAYBE = $(TOOL_OPENWATCOM-16_COMPILE_CXX_OUTPUT_
 define TOOL_Bs3Ow16_COMPILE_CXX_CMDS
 $(TOOL_OPENWATCOM-16_COMPILE_CXX_CMDS)
 	$(QUIET)$(VBoxBs3ObjConverter_1_TARGET) "$(obj)"
-	$(QUIET)$(REDIRECT) -wo /dev/null -- $(KBUILD_BIN_PATH)/kDepObj$(HOSTSUFF_EXE) -s -f -q -o "$(dep)" -t "$(obj)" "$(obj)"
 endef
 
 # Debug info format depends on what we use for 64-bit.
@@ -573,6 +569,7 @@ endef
 #       -of                    Generate stack frames when needed.
 #       -oi                    Inline instrinsics functions.
 #       -ol                    Loop optimizations.
+#       -oh                    Expensive optimizations. (saves a byte or two)
 #       -or                    Reorder for best pipeline.
 #       -os                    Favor size over speed.
 #
@@ -600,9 +597,9 @@ TEMPLATE_VBoxBS3KitImg_ARTOOL       = OPENWATCOM-16
 TEMPLATE_VBoxBS3KitImg_CTOOL        = Bs3Ow16
 TEMPLATE_VBoxBS3KitImg_CXXTOOL      = Bs3Ow16
 TEMPLATE_VBoxBS3KitImg_CFLAGS       = $(if $(BS3KIT_SEGNM_DATA16),-nd=$(BS3KIT_SEGNM_DATA16),) \
-	-nt=BS3TEXT16 -nc=$(BS3KIT_CLASS_CODE16) -ecc -q -0 -wx -zl -zdp -zu -ml $(BS3_OW_DBG_OPT) -s -oa -ob -of -oi -ol -or -os -d+
+	-nt=BS3TEXT16 -nc=$(BS3KIT_CLASS_CODE16) -ecc -q -0 -wx -zl -zdp -zu -ml $(BS3_OW_DBG_OPT) -s -oa -ob -of -oi -ol -or -os -oh -d+
 TEMPLATE_VBoxBS3KitImg_CXXFLAGS     = $(if $(BS3KIT_SEGNM_DATA16),-nd=$(BS3KIT_SEGNM_DATA16),) \
-	-nt=BS3TEXT16 -nc=$(BS3KIT_CLASS_CODE16) -ecc -q -0 -wx -zl -zdp -zu -ml $(BS3_OW_DBG_OPT) -s -oa -ob -of -oi -ol -or -os -d+
+	-nt=BS3TEXT16 -nc=$(BS3KIT_CLASS_CODE16) -ecc -q -0 -wx -zl -zdp -zu -ml $(BS3_OW_DBG_OPT) -s -oa -ob -of -oi -ol -or -os -oh -d+
 TEMPLATE_VBoxBS3KitImg_CDEFS        = ARCH_BITS=16 RT_ARCH_X86
 
 TEMPLATE_VBoxBS3KitImg_TOOL         = $(NO_SUCH_VARIABLE)
diff --git a/src/VBox/ValidationKit/bootsectors/Makefile.kmk b/src/VBox/ValidationKit/bootsectors/Makefile.kmk
index 2b1e434..c8e404f 100644
--- a/src/VBox/ValidationKit/bootsectors/Makefile.kmk
+++ b/src/VBox/ValidationKit/bootsectors/Makefile.kmk
@@ -243,6 +243,8 @@ bs3-cpu-basic-2_SOURCES = \
 	bs3kit/bs3-first-rm.asm \
 	bs3-cpu-basic-2.c \
 	bs3-cpu-basic-2-x0.c \
+	bs3-cpu-basic-2-32.c32 \
+	bs3-cpu-basic-2-pf.c32 \
 	bs3-cpu-basic-2-asm.asm \
        bs3kit/bs3-cmn-instantiate-x0.c16 \
        bs3kit/bs3-cmn-instantiate.c32 \
@@ -285,11 +287,11 @@ bs3-cpu-decoding-1_SOURCES = \
 #       bs3kit/bs3-cmn-instantiate.c16 \
 #       bs3kit/bs3-cmn-instantiate.c32 \
 #       bs3kit/bs3-cmn-instantiate.c64
-#bs3-cpu-decoding-1-template.o:: \
+bs3-cpu-decoding-1-template.o:: \
+	$$(bs3-cpu-decoding-1_0_OUTDIR)/bs3-cpu-decoding-1-asm.o16
 #	$$(bs3-cpu-decoding-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o16 \
 #	$$(bs3-cpu-decoding-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o32 \
 #	$$(bs3-cpu-decoding-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o64 \
-#	$$(bs3-cpu-decoding-1_0_OUTDIR)/bs3-cpu-decoding-1-asm.o16
 
 
 # CPU instructions #2 (first being bootsector2-cpu-instr-1).
@@ -311,8 +313,42 @@ bs3-cpu-instr-2-template.o:: \
 	$$(bs3-cpu-instr-2_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o64 \
 	$$(bs3-cpu-instr-2_0_OUTDIR)/bs3-cpu-instr-2-asm.o16
 
+# CPU generated instruction tests #1
+MISCBINS += bs3-cpu-generated-1
+bs3-cpu-generated-1_TEMPLATE = VBoxBS3KitImg
+bs3-cpu-generated-1_INCS = .
+bs3-cpu-generated-1_DEFS = BS3_CMN_INSTANTIATE_FILE1=bs3-cpu-generated-1-template.c
+bs3-cpu-generated-1_SOURCES = \
+	bs3kit/bs3-first-rm.asm \
+	bs3-cpu-generated-1.c \
+	bs3-cpu-generated-1-asm.asm \
+       bs3kit/bs3-cmn-instantiate-x0.c16 \
+       bs3kit/bs3-cmn-instantiate.c32 \
+       bs3kit/bs3-cmn-instantiate.c64 \
+	$(bs3-cpu-generated-1_0_OUTDIR)/bs3-cpu-generated-1-data.c16
+bs3-cpu-generated-1_CLEAN = $(bs3-cpu-generated-1_0_OUTDIR)/bs3-cpu-generated-1-data.c16
+
+bs3-cpu-generated-1-template.o:: \
+	$$(bs3-cpu-generated-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate-x0.o16 \
+	$$(bs3-cpu-generated-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o32 \
+	$$(bs3-cpu-generated-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o64 \
+	$$(bs3-cpu-generated-1_0_OUTDIR)/bs3-cpu-generated-1-asm.o16
+
+$$(bs3-cpu-generated-1_0_OUTDIR)/bs3-cpu-generated-1-data.c16: \
+		$(PATH_SUB_CURRENT)/bs3-cpu-generated-1-data.py \
+		$(PATH_SUB_CURRENT)/../../VMM/VMMAll/IEMAllInstructionsPython.py \
+		$(PATH_SUB_CURRENT)/../../VMM/VMMAll/IEMAllInstructions*.cpp.h \
+               | $$(dir $$@)
+	$(VBOX_BLD_PYTHON) $< $@
+
 endif # VBOX_WITH_BS3KIT
 
 
+#
+# pylint
+#
+VBOX_VALIDATIONKIT_PYTHON_SOURCES += $(wildcard $(PATH_SUB_CURRENT)/*.py)
+$(evalcall def_vbox_validationkit_process_python_sources)
+
 include $(FILE_KBUILD_SUB_FOOTER)
 
diff --git a/src/VBox/ValidationKit/bootsectors/bootsector2-common-routines-template-1.mac b/src/VBox/ValidationKit/bootsectors/bootsector2-common-routines-template-1.mac
index 17f3640..c8bfdaa 100644
--- a/src/VBox/ValidationKit/bootsectors/bootsector2-common-routines-template-1.mac
+++ b/src/VBox/ValidationKit/bootsectors/bootsector2-common-routines-template-1.mac
@@ -1127,7 +1127,7 @@ BEGINPROC TMPL_NM_CMN(TestInstallTrapRecs)
         cmp     dword [sDI + BS2TRAPREC.offResumeAddend], 0xff
         je      .nok
 
-        cmp     dword [sDI + BS2TRAPREC.u8TrapNo], X86_XCPT_MAX
+        cmp     dword [sDI + BS2TRAPREC.u8TrapNo], X86_XCPT_LAST
         ja      .nok
 
         ; next.
diff --git a/src/VBox/ValidationKit/bootsectors/bootsector2-cpu-instr-1-template.mac b/src/VBox/ValidationKit/bootsectors/bootsector2-cpu-instr-1-template.mac
index e9c28f6..26a9c4a 100644
--- a/src/VBox/ValidationKit/bootsectors/bootsector2-cpu-instr-1-template.mac
+++ b/src/VBox/ValidationKit/bootsectors/bootsector2-cpu-instr-1-template.mac
@@ -94,8 +94,8 @@ BEGINPROC TMPL_NM(TestMemFences)
         db                                 X86_OP_PRF_SIZE_ADDR, 0fh, 0aeh, MY_RM
         BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_SIZE_OP,   0fh, 0aeh, MY_RM ; (used in group)
         BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_LOCK,      0fh, 0aeh, MY_RM ; (used in group)
-        BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_REPZ,      0fh, 0aeh, MY_RM ; (used in group)
         BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_REPNZ,     0fh, 0aeh, MY_RM ; (used in group)
+        BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_REPZ,      0fh, 0aeh, MY_RM ; (used in group)
 %ifdef TMPL_64BIT
  %assign MY_REX 0x40
  %rep 10h
@@ -110,8 +110,8 @@ BEGINPROC TMPL_NM(TestMemFences)
         db                                 X86_OP_PRF_SIZE_ADDR, MY_REX, 0fh, 0aeh, MY_RM
         BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_SIZE_OP,   MY_REX, 0fh, 0aeh, MY_RM ; (used in group)
         BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_LOCK,      MY_REX, 0fh, 0aeh, MY_RM ; (used in group)
-        BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_REPZ,      MY_REX, 0fh, 0aeh, MY_RM ; (used in group)
         BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_REPNZ,     MY_REX, 0fh, 0aeh, MY_RM ; (used in group)
+        BS2_TRAP_INSTR X86_XCPT_UD, 0,  db X86_OP_PRF_REPZ,      MY_REX, 0fh, 0aeh, MY_RM ; (used in group)
   %assign MY_REX (MY_REX + 1)
  %endrep
 %endif
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-32.c32
similarity index 76%
copy from src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c
copy to src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-32.c32
index 7e322d0..a15b0b9 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-32.c32
@@ -1,6 +1,6 @@
-/* $Id: bs3-cpu-instr-2.c $ */
+/* $Id: bs3-cpu-basic-2-32.c32 $ */
 /** @file
- * BS3Kit - bs3-cpu-instr-2, 16-bit C code.
+ * BS3Kit - bs3-cpu-basic-2, 32-bit C code.
  */
 
 /*
@@ -29,36 +29,28 @@
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
 #include <bs3kit.h>
+#include <iprt/asm-amd64-x86.h>
 
 
 /*********************************************************************************************************************************
 *   Internal Functions                                                                                                           *
 *********************************************************************************************************************************/
-BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_mul);
-BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_imul);
-BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_div);
-BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_idiv);
+FNBS3TESTDOMODE             bs3CpuBasic2_RaiseXcpt0e_c32;
 
 
 /*********************************************************************************************************************************
 *   Global Variables                                                                                                             *
 *********************************************************************************************************************************/
-static const BS3TESTMODEENTRY g_aModeTests[] =
+static const BS3TESTMODEBYONEENTRY g_aModeByOne32Tests[] =
 {
-    BS3TESTMODEENTRY_CMN("mul", bs3CpuInstr2_mul),
-    BS3TESTMODEENTRY_CMN("imul", bs3CpuInstr2_imul),
-    BS3TESTMODEENTRY_CMN("div", bs3CpuInstr2_div),
-    BS3TESTMODEENTRY_CMN("idiv", bs3CpuInstr2_idiv),
+    { "#PF",  bs3CpuBasic2_RaiseXcpt0e_c32, BS3TESTMODEBYONEENTRY_F_ONLY_PAGING },
 };
 
 
-BS3_DECL(void) Main_rm()
+BS3_DECL(void) bs3CpuBasic2_Do32BitTests_pe32(void)
 {
-    Bs3InitAll_rm();
-    Bs3TestInit("bs3-cpu-instr-2");
+    Bs3TestPrintf("bs3CpuBasic2_Do32BitTests=%#x\n", g_uBs3CpuDetected);
 
-    Bs3TestDoModes_rm(g_aModeTests, RT_ELEMENTS(g_aModeTests));
-
-    Bs3TestTerm();
+    Bs3TestDoModesByOne_pe32(g_aModeByOne32Tests, RT_ELEMENTS(g_aModeByOne32Tests), 0);
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-asm.asm b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-asm.asm
index 766e313..cd201c3 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-asm.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-asm.asm
@@ -101,6 +101,57 @@ AssertCompile(_bs3CpuBasic2_iret_rexw_EndProc - _bs3CpuBasic2_iret_rexw == 2)
 
 
 ;
+; CPU mode agnostic test code snippets.
+;
+BS3_BEGIN_TEXT32
+
+;;
+; @param    [xBP + xCB*2]   puDst
+; @param    [xBP + xCB*3]   uNewValue
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_Store_mov, BS3_PBC_NEAR
+        push    xBP
+        mov     xBP, xSP
+        mov     xCX, [xBP + xCB*2]
+        mov     xAX, [xBP + xCB*3]
+        mov     [xCX], xAX
+        leave
+        ret
+BS3_PROC_END_CMN   bs3CpuBasic2_Store_mov
+
+;;
+; @param    [xBP + xCB*2]   puDst
+; @param    [xBP + xCB*3]   uNewValue
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_Store_xchg, BS3_PBC_NEAR
+        push    xBP
+        mov     xBP, xSP
+        mov     xCX, [xBP + xCB*2]
+        mov     xAX, [xBP + xCB*3]
+        xchg    [xCX], xAX
+        leave
+        ret
+BS3_PROC_END_CMN   bs3CpuBasic2_Store_xchg
+
+;;
+; @param    [xBP + xCB*2]   puDst
+; @param    [xBP + xCB*3]   uNewValue
+; @param    [xBP + xCB*4]   uOldValue
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_Store_cmpxchg, BS3_PBC_NEAR
+        push    xBP
+        mov     xBP, xSP
+        mov     xCX, [xBP + xCB*2]
+        mov     xDX, [xBP + xCB*3]
+        mov     xAX, [xBP + xCB*4]
+.again:
+        cmpxchg [xCX], xDX
+        jnz     .again
+        leave
+        ret
+BS3_PROC_END_CMN   bs3CpuBasic2_Store_cmpxchg
+
+
+BS3_BEGIN_TEXT16
+
+;
 ; Instantiate code templates.
 ;
 BS3_INSTANTIATE_COMMON_TEMPLATE          "bs3-cpu-basic-2-template.mac"
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-pf.c32 b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-pf.c32
new file mode 100644
index 0000000..dc9f1d7
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-pf.c32
@@ -0,0 +1,1880 @@
+/* $Id: bs3-cpu-basic-2-pf.c32 $ */
+/** @file
+ * BS3Kit - bs3-cpu-basic-2, 32-bit C code.
+ */
+
+/*
+ * Copyright (C) 2007-2016 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <bs3kit.h>
+#include <iprt/asm-amd64-x86.h>
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+#define CHECK_MEMBER(a_pszMode, a_szName, a_szFmt, a_Actual, a_Expected) \
+    do { \
+        if ((a_Actual) == (a_Expected)) { /* likely */ } \
+        else Bs3TestFailedF("%u - %s: " a_szName "=" a_szFmt " expected " a_szFmt, \
+                            g_usBs3TestStep, (a_pszMode), (a_Actual), (a_Expected)); \
+    } while (0)
+
+#define BS3CPUBASIC2PF_HALT(pThis) \
+    do { \
+        Bs3TestPrintf("Halting: pteworker=%s store=%s accessor=%s\n", \
+                      pThis->pszPteWorker, pThis->pszStore, pThis->pszAccessor); \
+        ASMHalt(); \
+    } while (0)
+
+
+/** @def BS3CPUBASIC2PF_FASTER
+ * This is useful for IEM execution. */
+#define BS3CPUBASIC2PF_FASTER
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+typedef void BS3_CALL FNBS3CPUBASIC2PFSNIPPET(void);
+
+typedef struct FNBS3CPUBASIC2PFTSTCODE
+{
+    FNBS3CPUBASIC2PFSNIPPET    *pfn;
+    uint8_t                     offUd2;
+
+} FNBS3CPUBASIC2PFTSTCODE;
+typedef FNBS3CPUBASIC2PFTSTCODE const *PCFNBS3CPUBASIC2PFTSTCODE;
+
+typedef struct BS3CPUBASIC2PFTTSTCMNMODE
+{
+    uint8_t                     bMode;
+    FNBS3CPUBASIC2PFTSTCODE     MovLoad;
+    FNBS3CPUBASIC2PFTSTCODE     MovStore;
+    FNBS3CPUBASIC2PFTSTCODE     Xchg;
+    FNBS3CPUBASIC2PFTSTCODE     CmpXchg;
+    FNBS3CPUBASIC2PFTSTCODE     DivMem;
+} BS3CPUBASIC2PFTTSTCMNMODE;
+typedef BS3CPUBASIC2PFTTSTCMNMODE const *PCBS3CPUBASIC2PFTTSTCMNMODE;
+
+
+typedef struct BS3CPUBASIC2PFSTATE
+{
+    /** The mode we're currently testing. */
+    uint8_t                     bMode;
+    /** The size of a natural access. */
+    uint8_t                     cbAccess;
+    /** The common mode functions. */
+    PCBS3CPUBASIC2PFTTSTCMNMODE pCmnMode;
+    /** Address of the test area (alias). */
+    union
+    {
+        uint64_t                u;
+        uint32_t                u32;
+        uint16_t                u16;
+    }                           uTestAddr;
+    /** Pointer to the orignal test area mapping. */
+    uint8_t                    *pbOrgTest;
+    /** The size of the test area (at least two pages). */
+    uint32_t                    cbTest;
+    /** cbTest expressed as a page count. */
+    uint16_t                    cTestPages;
+    /** The number of PTEs in the first PTE, i.e. what we can
+     *  safely access via PgInfo.u.Pae.pPte/PgInfo.u.Legacy.pPte. */
+    uint16_t                    cTest1stPtes;
+    /** The number of PDEs for cTestPages. */
+    uint16_t                    cTestPdes;
+    /** 16-bit data selector for uTestAddr.u32. */
+    uint16_t                    uSel16TestData;
+    /** 16-bit code selector for uTestAddr.u32. */
+    uint16_t                    uSel16TestCode;
+    /** The size of the PDE backup. */
+    uint16_t                    cbPdeBackup;
+    /** The size of the PTE backup. */
+    uint16_t                    cbPteBackup;
+    /** Test paging information for uTestAddr.u. */
+    BS3PAGINGINFO4ADDR          PgInfo;
+
+    /** Set if we can use the INVLPG instruction. */
+    bool                        fUseInvlPg;
+    /** Physical addressing width. */
+    uint8_t                     cBitsPhysWidth;
+
+    /** Reflects CR0.WP. */
+    bool                        fWp;
+    /** Reflects EFER.NXE & CR4.PAE. */
+    bool                        fNxe;
+
+    const char                 *pszAccessor;
+    const char                 *pszPteWorker;
+    const char                 *pszStore;
+
+    /** Trap context frame. */
+    BS3TRAPFRAME                TrapCtx;
+    /** Expected result context. */
+    BS3REGCTX                   ExpectCtx;
+
+    /** The PML4E backup. */
+    uint64_t                    u64Pml4eBackup;
+    /** The PDPTE backup. */
+    uint64_t                    u64PdpteBackup;
+    /** The PDE backup. */
+    uint64_t                    au64PdeBackup[16];
+    /** The PTE backup. */
+    union
+    {
+        uint32_t                Legacy[X86_PG_ENTRIES];
+        uint64_t                Pae[X86_PG_PAE_ENTRIES];
+    } PteBackup;
+
+} BS3CPUBASIC2PFSTATE;
+/** Pointer to state for the \#PF test. */
+typedef BS3CPUBASIC2PFSTATE *PBS3CPUBASIC2PFSTATE;
+
+
+/**
+ * Paging modification worker.
+ */
+typedef struct BS3CPUBASIC2PFMODPT
+{
+    const char *pszName;
+    uint32_t fPresent    : 1;
+    uint32_t fUser       : 1;
+    uint32_t fWriteable  : 1;
+    uint32_t fNoExecute  : 1;
+    uint32_t fReserved   : 1;
+    uint32_t uModifyArg  : 24;
+    void   (*pfnModify)(PBS3CPUBASIC2PFSTATE pThis, unsigned iStore, struct BS3CPUBASIC2PFMODPT const *pEntry,
+                        uint32_t fClearMask, uint32_t fSetMask);
+    bool   (*pfnApplicable)(PBS3CPUBASIC2PFSTATE pThis, struct BS3CPUBASIC2PFMODPT const *pEntry);
+} BS3CPUBASIC2PFMODPT;
+typedef BS3CPUBASIC2PFMODPT const *PCBS3CPUBASIC2PFMODPT;
+
+/** Page level protection.  Alternative is page directory or higher level. */
+#define BS3CB2PFACC_F_PAGE_LEVEL    RT_BIT(0)
+/** Directly access the boobytrapped page, no edging on or off it. */
+#define BS3CB2PFACC_F_DIRECT        RT_BIT(1)
+
+/**
+ * Memory accessor.
+ */
+typedef struct BS3CPUBASIC2PFACCESSOR
+{
+    /** Accessor name.   */
+    const char *pszName;
+    /** The accessor. */
+    void      (*pfnAccessor)(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags, uint8_t bXcpt, uint8_t uPfErrCd);
+    /** The X86_TRAP_PF_XXX access flags this access sets.   */
+    uint32_t    fAccess;
+    /** The exception when things are fine. */
+    uint8_t     bOkayXcpt;
+} BS3CPUBASIC2PFACCESSOR;
+typedef const BS3CPUBASIC2PFACCESSOR *PCBS3CPUBASIC2PFACCESSOR;
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+FNBS3TESTDOMODE bs3CpuBasic2_RaiseXcpt0e_c32;
+
+/* bs3-cpu-basic-2-asm.asm: */
+void BS3_CALL bs3CpuBasic2_Store_mov_c32(void *pvDst, uint32_t uValue, uint32_t uOld);
+void BS3_CALL bs3CpuBasic2_Store_xchg_c32(void *pvDst, uint32_t uValue, uint32_t uOld);
+void BS3_CALL bs3CpuBasic2_Store_cmpxchg_c32(void *pvDst, uint32_t uValue, uint32_t uOld);
+
+
+/* bs3-cpu-basic-2-template.mac: */
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_mov_ax_ds_bx__ud2_c16;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_mov_ds_bx_ax__ud2_c16;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_xchg_ds_bx_ax__ud2_c16;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2_c16;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_div_ds_bx__ud2_c16;
+
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_mov_ax_ds_bx__ud2_c32;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_mov_ds_bx_ax__ud2_c32;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_xchg_ds_bx_ax__ud2_c32;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2_c32;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_div_ds_bx__ud2_c32;
+
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_mov_ax_ds_bx__ud2_c64;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_mov_ds_bx_ax__ud2_c64;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_xchg_ds_bx_ax__ud2_c64;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2_c64;
+FNBS3CPUBASIC2PFSNIPPET bs3CpuBasic2_div_ds_bx__ud2_c64;
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** Page table access functions. */
+static const struct
+{
+    const char     *pszName;
+    void (BS3_CALL *pfnStore)(void *pvDst, uint32_t uValue, uint32_t uOld);
+} g_aStoreMethods[] =
+{
+    { "mov",        bs3CpuBasic2_Store_mov_c32 },
+    { "xchg",       bs3CpuBasic2_Store_xchg_c32 },
+    { "cmpxchg",    bs3CpuBasic2_Store_cmpxchg_c32 },
+};
+
+
+static const BS3CPUBASIC2PFTTSTCMNMODE g_aCmnModes[] =
+{
+    {
+        BS3_MODE_CODE_16,
+        {   bs3CpuBasic2_mov_ax_ds_bx__ud2_c16,     2 },
+        {   bs3CpuBasic2_mov_ds_bx_ax__ud2_c16,     2 },
+        {   bs3CpuBasic2_xchg_ds_bx_ax__ud2_c16,    2 },
+        {   bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2_c16, 3 },
+        {   bs3CpuBasic2_div_ds_bx__ud2_c16,        2 },
+    },
+    {
+        BS3_MODE_CODE_32,
+        {   bs3CpuBasic2_mov_ax_ds_bx__ud2_c32,     2 },
+        {   bs3CpuBasic2_mov_ds_bx_ax__ud2_c32,     2 },
+        {   bs3CpuBasic2_xchg_ds_bx_ax__ud2_c32,    2 },
+        {   bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2_c32, 3 },
+        {   bs3CpuBasic2_div_ds_bx__ud2_c32,        2 },
+    },
+    {
+        BS3_MODE_CODE_64,
+        {   bs3CpuBasic2_mov_ax_ds_bx__ud2_c64,     2 + 1 },
+        {   bs3CpuBasic2_mov_ds_bx_ax__ud2_c64,     2 + 1 },
+        {   bs3CpuBasic2_xchg_ds_bx_ax__ud2_c64,    2 + 1 },
+        {   bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2_c64, 3 + 1 },
+        {   bs3CpuBasic2_div_ds_bx__ud2_c64,        2 + 1 },
+    },
+    {
+        BS3_MODE_CODE_V86,
+        {   bs3CpuBasic2_mov_ax_ds_bx__ud2_c16,     2 },
+        {   bs3CpuBasic2_mov_ds_bx_ax__ud2_c16,     2 },
+        {   bs3CpuBasic2_xchg_ds_bx_ax__ud2_c16,    2 },
+        {   bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2_c16, 3 },
+        {   bs3CpuBasic2_div_ds_bx__ud2_c16,        2 },
+    },
+};
+
+
+/**
+ * Compares a CPU trap.
+ */
+static void bs3CpuBasic2Pf_CompareCtx(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pExpectCtx, int cbPcAdjust,
+                                      uint8_t bXcpt, unsigned uErrCd)
+{
+    const char     *pszHint = "xxxx";
+    uint16_t const  cErrorsBefore = Bs3TestSubErrorCount();
+    uint32_t        fExtraEfl;
+
+    CHECK_MEMBER(pszHint, "bXcpt",   "%#04x",    pThis->TrapCtx.bXcpt,             bXcpt);
+    CHECK_MEMBER(pszHint, "uErrCd",  "%#06RX16", (uint16_t)pThis->TrapCtx.uErrCd, (uint16_t)uErrCd); /* 486 only writes a word */
+
+    fExtraEfl = X86_EFL_RF;
+    if (BS3_MODE_IS_16BIT_SYS(g_bBs3CurrentMode))
+        fExtraEfl = 0;
+    else
+        fExtraEfl = X86_EFL_RF;
+    Bs3TestCheckRegCtxEx(&pThis->TrapCtx.Ctx, pExpectCtx, cbPcAdjust, 0 /*cbSpAdjust*/, fExtraEfl, pszHint, g_usBs3TestStep);
+    if (Bs3TestSubErrorCount() != cErrorsBefore)
+    {
+        Bs3TrapPrintFrame(&pThis->TrapCtx);
+#if 1
+        Bs3TestPrintf("Halting: g_uBs3CpuDetected=%#x\n", g_uBs3CpuDetected);
+        Bs3TestPrintf("Halting: bXcpt=%#x uErrCd=%#x\n", bXcpt, uErrCd);
+        BS3CPUBASIC2PF_HALT(pThis);
+#endif
+    }
+}
+
+
+/**
+ * Compares a CPU trap.
+ */
+static void bs3CpuBasic2Pf_CompareSimpleCtx(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pStartCtx, int offAddPC,
+                                            uint8_t bXcpt, unsigned uErrCd, uint64_t uCr2)
+{
+    const char     *pszHint = "xxxx";
+    uint16_t const  cErrorsBefore = Bs3TestSubErrorCount();
+    uint64_t const  uSavedCr2 = pStartCtx->cr2.u;
+    uint32_t        fExtraEfl;
+
+    CHECK_MEMBER(pszHint, "bXcpt",   "%#04x",    pThis->TrapCtx.bXcpt,             bXcpt);
+    CHECK_MEMBER(pszHint, "uErrCd",  "%#06RX16", (uint16_t)pThis->TrapCtx.uErrCd, (uint16_t)uErrCd); /* 486 only writes a word */
+
+    fExtraEfl = X86_EFL_RF;
+    if (BS3_MODE_IS_16BIT_SYS(g_bBs3CurrentMode))
+        fExtraEfl = 0;
+    else
+        fExtraEfl = X86_EFL_RF;
+    pStartCtx->cr2.u = uCr2;
+    Bs3TestCheckRegCtxEx(&pThis->TrapCtx.Ctx, pStartCtx, offAddPC, 0 /*cbSpAdjust*/, fExtraEfl, pszHint, g_usBs3TestStep);
+    pStartCtx->cr2.u = uSavedCr2;
+    if (Bs3TestSubErrorCount() != cErrorsBefore)
+    {
+        Bs3TrapPrintFrame(&pThis->TrapCtx);
+#if 1
+        Bs3TestPrintf("Halting: g_uBs3CpuDetected=%#x\n", g_uBs3CpuDetected);
+        Bs3TestPrintf("Halting: bXcpt=%#x uErrCd=%#x\n", bXcpt, uErrCd);
+        BS3CPUBASIC2PF_HALT(pThis);
+#endif
+    }
+}
+
+
+/**
+ * Checks the trap context for a simple \#PF trap.
+ */
+static void bs3CpuBasic2Pf_CompareSimplePf(PBS3CPUBASIC2PFSTATE pThis, PCBS3REGCTX pStartCtx, int offAddPC,
+                                           unsigned uErrCd, uint64_t uCr2)
+{
+    bs3CpuBasic2Pf_CompareSimpleCtx(pThis, (PBS3REGCTX)pStartCtx, offAddPC, X86_XCPT_PF, uErrCd, uCr2);
+}
+
+/**
+ * Checks the trap context for a simple \#UD trap.
+ */
+static void bs3CpuBasic2Pf_CompareSimpleUd(PBS3CPUBASIC2PFSTATE pThis, PCBS3REGCTX pStartCtx, int offAddPC)
+{
+    bs3CpuBasic2Pf_CompareSimpleCtx(pThis, (PBS3REGCTX)pStartCtx, offAddPC, X86_XCPT_UD, 0, pStartCtx->cr2.u);
+}
+
+
+/**
+ * Restores all the paging entries from backup and flushes everything.
+ */
+static void bs3CpuBasic2Pf_FlushAll(void)
+{
+    if ((g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80486)
+    {
+        uint32_t uCr4 = ASMGetCR4();
+        if (uCr4 & (X86_CR4_PGE | X86_CR4_PCIDE))
+        {
+            ASMSetCR4(uCr4 & ~(X86_CR4_PGE | X86_CR4_PCIDE));
+            ASMSetCR4(uCr4);
+            return;
+        }
+    }
+
+    ASMReloadCR3();
+}
+
+
+/**
+ * Restores all the paging entries from backup and flushes everything.
+ *
+ * @param   pThis       Test state data.
+ */
+static void bs3CpuBasic2Pf_RestoreFromBackups(PBS3CPUBASIC2PFSTATE pThis)
+{
+    Bs3MemCpy(pThis->PgInfo.u.Legacy.pPte, &pThis->PteBackup, pThis->cbPteBackup);
+    Bs3MemCpy(pThis->PgInfo.u.Legacy.pPde, pThis->au64PdeBackup, pThis->cbPdeBackup);
+    if (pThis->PgInfo.cEntries > 2)
+        pThis->PgInfo.u.Pae.pPdpe->u  = pThis->u64PdpteBackup;
+    if (pThis->PgInfo.cEntries > 3)
+        pThis->PgInfo.u.Pae.pPml4e->u = pThis->u64Pml4eBackup;
+    bs3CpuBasic2Pf_FlushAll();
+}
+
+
+/** @name BS3CPUBASIC2PFACCESSOR::pfnAccessor Implementations
+ * @{ */
+
+static void bs3CpuBasic2Pf_DoExec(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags, uint8_t bXcpt, uint8_t uPfErrCd)
+{
+    uint8_t *pbOrgTest = pThis->pbOrgTest;
+    unsigned offEnd = fFlags & BS3CB2PFACC_F_DIRECT ? X86_PAGE_SIZE + 1 : X86_PAGE_SIZE + 2;
+    unsigned off    = fFlags & BS3CB2PFACC_F_DIRECT ? offEnd - 1        : X86_PAGE_SIZE - 5;
+
+    for (; off < offEnd; off++)
+    {
+        /* Emit a little bit of code (using the original allocation mapping) and point pCtx to it. */
+        pbOrgTest[off + 0] = X86_OP_PRF_SIZE_ADDR;
+        pbOrgTest[off + 1] = X86_OP_PRF_SIZE_OP;
+        pbOrgTest[off + 2] = 0x90; /* NOP */
+        pbOrgTest[off + 3] = 0x0f; /* UD2 */
+        pbOrgTest[off + 4] = 0x0b;
+        pbOrgTest[off + 5] = 0xeb; /* JMP $-4 */
+        pbOrgTest[off + 6] = 0xfc;
+        switch (pThis->bMode & BS3_MODE_CODE_MASK)
+        {
+            default:
+                pCtx->rip.u = pThis->uTestAddr.u + off;
+                break;
+            case BS3_MODE_CODE_16:
+                Bs3SelSetup16BitCode(&Bs3GdteSpare01, pThis->uTestAddr.u32, pCtx->bCpl);
+                pCtx->rip.u = off;
+                pCtx->cs    = BS3_SEL_SPARE_01 | pCtx->bCpl;
+                break;
+            case BS3_MODE_CODE_V86:
+                /** @todo fix me.   */
+                return;
+        }
+        //Bs3TestPrintf("cs:rip=%04x:%010RX64 iRing=%d\n", pCtx->cs, pCtx->rip.u, pCtx->bCpl);
+
+        Bs3TrapSetJmpAndRestore(pCtx, &pThis->TrapCtx);
+        //Bs3TestPrintf("off=%#06x bXcpt=%#x uErrCd=%#RX64\n", off, pThis->TrapCtx.bXcpt, pThis->TrapCtx.uErrCd);
+        if (   bXcpt != X86_XCPT_PF
+            || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off < X86_PAGE_SIZE - 4))
+            bs3CpuBasic2Pf_CompareSimpleUd(pThis, pCtx, 3);
+        else if (!(fFlags & BS3CB2PFACC_F_PAGE_LEVEL) || off >= X86_PAGE_SIZE)
+            bs3CpuBasic2Pf_CompareSimplePf(pThis, pCtx, 0, uPfErrCd, pThis->uTestAddr.u + off);
+        else
+            bs3CpuBasic2Pf_CompareSimplePf(pThis, pCtx,
+                                             off + 3 == X86_PAGE_SIZE || off + 4 == X86_PAGE_SIZE
+                                           ? RT_MIN(X86_PAGE_SIZE, off + 3) - off : 0,
+                                           uPfErrCd, pThis->uTestAddr.u + RT_MIN(X86_PAGE_SIZE, off + 4));
+    }
+}
+
+
+static void bs3CpuBasic2Pf_SetCsEip(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, PCFNBS3CPUBASIC2PFTSTCODE pCode)
+{
+    switch (pThis->bMode & BS3_MODE_CODE_MASK)
+    {
+        default:
+            pCtx->rip.u = (uintptr_t)pCode->pfn;
+            break;
+
+        case BS3_MODE_CODE_16:
+        {
+            uint32_t uFar16 = Bs3SelFlatCodeToProtFar16((uintptr_t)pCode->pfn);
+            pCtx->rip.u = (uint16_t)uFar16;
+            pCtx->cs    = (uint16_t)(uFar16 >> 16) | pCtx->bCpl;
+            pCtx->cs   += (uint16_t)pCtx->bCpl << BS3_SEL_RING_SHIFT;
+            break;
+        }
+
+        case BS3_MODE_CODE_V86:
+        {
+            uint32_t uFar16 = Bs3SelFlatCodeToRealMode((uintptr_t)pCode->pfn);
+            pCtx->rip.u = (uint16_t)uFar16;
+            pCtx->cs    = (uint16_t)(uFar16 >> 16);
+            break;
+        }
+    }
+}
+
+
+/**
+ * Test a simple load instruction around the edges of page two.
+ *
+ * @param   pThis           The test stat data.
+ * @param   pCtx            The test context.
+ * @param   fFlags          BS3CB2PFACC_F_XXX.
+ * @param   bXcpt           X86_XCPT_PF if this can cause \#PFs, otherwise
+ *                          X86_XCPT_UD.
+ * @param   uPfErrCd        The error code for \#PFs.
+ */
+static void bs3CpuBasic2Pf_DoMovLoad(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags, uint8_t bXcpt, uint8_t uPfErrCd)
+{
+    static uint64_t const s_uValue = UINT64_C(0x7c4d0114428d);
+    uint64_t uExpectRax;
+    unsigned i;
+
+    /*
+     * Adjust the incoming context and calculate our expections.
+     */
+    bs3CpuBasic2Pf_SetCsEip(pThis, pCtx, &pThis->pCmnMode->MovLoad);
+    Bs3MemCpy(&pThis->ExpectCtx, pCtx, sizeof(pThis->ExpectCtx));
+    switch (pThis->bMode & BS3_MODE_CODE_MASK)
+    {
+        case BS3_MODE_CODE_16:
+        case BS3_MODE_CODE_V86:
+            uExpectRax = (uint16_t)s_uValue | (pCtx->rax.u & UINT64_C(0xffffffffffff0000));
+            break;
+        case BS3_MODE_CODE_32:
+            uExpectRax = (uint32_t)s_uValue | (pCtx->rax.u & UINT64_C(0xffffffff00000000));
+            break;
+        case BS3_MODE_CODE_64:
+            uExpectRax = s_uValue;
+            break;
+    }
+    if (uExpectRax == pCtx->rax.u)
+        pCtx->rax.u = ~pCtx->rax.u;
+
+    /*
+     * Make two approaches to the test page (the 2nd one):
+     *  - i=0: Start on the 1st page and edge into the 2nd.
+     *  - i=1: Start at the end of the 2nd page and edge off it and into the 3rd.
+     */
+    for (i = 0; i < 2; i++)
+    {
+        unsigned off    = fFlags & BS3CB2PFACC_F_DIRECT ? X86_PAGE_SIZE : X86_PAGE_SIZE * (i + 1) - pThis->cbAccess;
+        unsigned offEnd = fFlags & BS3CB2PFACC_F_DIRECT ? off + 1       : X86_PAGE_SIZE * (i + 1) + (i == 0 ? 8 : 7);
+
+        for (; off < offEnd; off++)
+        {
+            *(uint64_t *)&pThis->pbOrgTest[off] = s_uValue;
+            if (BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = off;
+            else
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = pThis->uTestAddr.u + off;
+
+            Bs3TrapSetJmpAndRestore(pCtx, &pThis->TrapCtx);
+            //Bs3TestPrintf("off=%#06x bXcpt=%#x uErrCd=%#RX64\n", off, pThis->TrapCtx.bXcpt, pThis->TrapCtx.uErrCd);
+
+            if (   bXcpt != X86_XCPT_PF
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off >= X86_PAGE_SIZE * 2)
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off <= X86_PAGE_SIZE - pThis->cbAccess) )
+            {
+                pThis->ExpectCtx.rax.u = uExpectRax;
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, pThis->pCmnMode->MovLoad.offUd2, X86_XCPT_UD, 0 /*uErrCd*/);
+                pThis->ExpectCtx.rax = pCtx->rax;
+            }
+            else
+            {
+                if (off < X86_PAGE_SIZE)
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + X86_PAGE_SIZE;
+                else
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + off;
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, 0 /*cbPcAdjust*/, bXcpt, uPfErrCd);
+                pThis->ExpectCtx.cr2 = pCtx->cr2;
+            }
+        }
+
+        if (fFlags & BS3CB2PFACC_F_DIRECT)
+            break;
+    }
+}
+
+
+/**
+ * Test a simple store instruction around the edges of page two.
+ *
+ * @param   pThis           The test stat data.
+ * @param   pCtx            The test context.
+ * @param   fFlags          BS3CB2PFACC_F_XXX.
+ * @param   bXcpt           X86_XCPT_PF if this can cause \#PFs, otherwise
+ *                          X86_XCPT_UD.
+ * @param   uPfErrCd        The error code for \#PFs.
+ */
+static void bs3CpuBasic2Pf_DoMovStore(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags,
+                                      uint8_t bXcpt, uint8_t uPfErrCd)
+{
+    static uint64_t const s_uValue        = UINT64_C(0x3af45ead86a34a26);
+    static uint64_t const s_uValueFlipped = UINT64_C(0xc50ba152795cb5d9);
+    uint64_t const uRaxSaved = pCtx->rax.u;
+    uint64_t uExpectStored;
+    unsigned i;
+
+    /*
+     * Adjust the incoming context and calculate our expections.
+     */
+    bs3CpuBasic2Pf_SetCsEip(pThis, pCtx, &pThis->pCmnMode->MovStore);
+    if ((pThis->bMode & BS3_MODE_CODE_MASK) != BS3_MODE_CODE_64)
+        pCtx->rax.u = (uint32_t)s_uValue; /* leave the upper part zero */
+    else
+        pCtx->rax.u = s_uValue;
+
+    Bs3MemCpy(&pThis->ExpectCtx, pCtx, sizeof(pThis->ExpectCtx));
+    switch (pThis->bMode & BS3_MODE_CODE_MASK)
+    {
+        case BS3_MODE_CODE_16:
+        case BS3_MODE_CODE_V86:
+            uExpectStored = (uint16_t)s_uValue | (s_uValueFlipped & UINT64_C(0xffffffffffff0000));
+            break;
+        case BS3_MODE_CODE_32:
+            uExpectStored = (uint32_t)s_uValue | (s_uValueFlipped & UINT64_C(0xffffffff00000000));
+            break;
+        case BS3_MODE_CODE_64:
+            uExpectStored = s_uValue;
+            break;
+    }
+
+    /*
+     * Make two approaches to the test page (the 2nd one):
+     *  - i=0: Start on the 1st page and edge into the 2nd.
+     *  - i=1: Start at the end of the 2nd page and edge off it and into the 3rd.
+     */
+    for (i = 0; i < 2; i++)
+    {
+        unsigned off    = fFlags & BS3CB2PFACC_F_DIRECT ? X86_PAGE_SIZE : X86_PAGE_SIZE * (i + 1) - pThis->cbAccess;
+        unsigned offEnd = fFlags & BS3CB2PFACC_F_DIRECT ? off + 1       : X86_PAGE_SIZE * (i + 1) + (i == 0 ? 8 : 7);
+        for (; off < offEnd; off++)
+        {
+            *(uint64_t *)&pThis->pbOrgTest[off] = s_uValueFlipped;
+            if (BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = off;
+            else
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = pThis->uTestAddr.u + off;
+
+            Bs3TrapSetJmpAndRestore(pCtx, &pThis->TrapCtx);
+            //Bs3TestPrintf("off=%#06x bXcpt=%#x uErrCd=%#RX64\n", off, pThis->TrapCtx.bXcpt, pThis->TrapCtx.uErrCd);
+
+            if (   bXcpt != X86_XCPT_PF
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off >= X86_PAGE_SIZE * 2)
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off <= X86_PAGE_SIZE - pThis->cbAccess) )
+            {
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, pThis->pCmnMode->MovStore.offUd2, X86_XCPT_UD, 0 /*uErrCd*/);
+                if (*(uint64_t *)&pThis->pbOrgTest[off] != uExpectStored)
+                    Bs3TestFailedF("%u - %s: Stored %#RX64, expected %#RX64",
+                                   g_usBs3TestStep, "xxxx", *(uint64_t *)&pThis->pbOrgTest[off], uExpectStored);
+            }
+            else
+            {
+                if (off < X86_PAGE_SIZE)
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + X86_PAGE_SIZE;
+                else
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + off;
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, 0 /*cbPcAdjust*/, bXcpt, uPfErrCd);
+                pThis->ExpectCtx.cr2 = pCtx->cr2;
+                if (*(uint64_t *)&pThis->pbOrgTest[off] != s_uValueFlipped)
+                    Bs3TestFailedF("%u - %s: #PF'ed store modified memory: %#RX64, expected %#RX64",
+                                   g_usBs3TestStep, "xxxx", *(uint64_t *)&pThis->pbOrgTest[off], s_uValueFlipped);
+
+            }
+        }
+
+        if (fFlags & BS3CB2PFACC_F_DIRECT)
+            break;
+    }
+
+    pCtx->rax.u = uRaxSaved;
+}
+
+
+/**
+ * Test a xchg instruction around the edges of page two.
+ *
+ * @param   pThis           The test stat data.
+ * @param   pCtx            The test context.
+ * @param   fFlags          BS3CB2PFACC_F_XXX.
+ * @param   bXcpt           X86_XCPT_PF if this can cause \#PFs, otherwise
+ *                          X86_XCPT_UD.
+ * @param   uPfErrCd        The error code for \#PFs.
+ */
+static void bs3CpuBasic2Pf_DoXchg(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags, uint8_t bXcpt, uint8_t uPfErrCd)
+{
+    static uint64_t const s_uValue        = UINT64_C(0xea58699648e2f32c);
+    static uint64_t const s_uValueFlipped = UINT64_C(0x15a79669b71d0cd3);
+    uint64_t const uRaxSaved = pCtx->rax.u;
+    uint64_t uRaxIn;
+    uint64_t uExpectedRax;
+    uint64_t uExpectStored;
+    unsigned i;
+
+    /*
+     * Adjust the incoming context and calculate our expections.
+     */
+    bs3CpuBasic2Pf_SetCsEip(pThis, pCtx, &pThis->pCmnMode->Xchg);
+    if ((pThis->bMode & BS3_MODE_CODE_MASK) != BS3_MODE_CODE_64)
+        uRaxIn = (uint32_t)s_uValue; /* leave the upper part zero */
+    else
+        uRaxIn = s_uValue;
+
+    Bs3MemCpy(&pThis->ExpectCtx, pCtx, sizeof(pThis->ExpectCtx));
+    switch (pThis->bMode & BS3_MODE_CODE_MASK)
+    {
+        case BS3_MODE_CODE_16:
+        case BS3_MODE_CODE_V86:
+            uExpectedRax  = (uint16_t)s_uValueFlipped | (uRaxIn          & UINT64_C(0xffffffffffff0000));
+            uExpectStored = (uint16_t)s_uValue        | (s_uValueFlipped & UINT64_C(0xffffffffffff0000));
+            break;
+        case BS3_MODE_CODE_32:
+            uExpectedRax  = (uint32_t)s_uValueFlipped | (uRaxIn          & UINT64_C(0xffffffff00000000));
+            uExpectStored = (uint32_t)s_uValue        | (s_uValueFlipped & UINT64_C(0xffffffff00000000));
+            break;
+        case BS3_MODE_CODE_64:
+            uExpectedRax  = s_uValueFlipped;
+            uExpectStored = s_uValue;
+            break;
+    }
+
+    /*
+     * Make two approaches to the test page (the 2nd one):
+     *  - i=0: Start on the 1st page and edge into the 2nd.
+     *  - i=1: Start at the end of the 2nd page and edge off it and into the 3rd.
+     */
+    for (i = 0; i < 2; i++)
+    {
+        unsigned off    = fFlags & BS3CB2PFACC_F_DIRECT ? X86_PAGE_SIZE : X86_PAGE_SIZE * (i + 1) - pThis->cbAccess;
+        unsigned offEnd = fFlags & BS3CB2PFACC_F_DIRECT ? off + 1       : X86_PAGE_SIZE * (i + 1) + (i == 0 ? 8 : 7);
+        for (; off < offEnd; off++)
+        {
+            *(uint64_t *)&pThis->pbOrgTest[off] = s_uValueFlipped;
+            pCtx->rax.u = uRaxIn;
+            if (BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = off;
+            else
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = pThis->uTestAddr.u + off;
+
+            Bs3TrapSetJmpAndRestore(pCtx, &pThis->TrapCtx);
+            //Bs3TestPrintf("off=%#06x bXcpt=%#x uErrCd=%#RX64\n", off, pThis->TrapCtx.bXcpt, pThis->TrapCtx.uErrCd);
+
+            if (   bXcpt != X86_XCPT_PF
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off >= X86_PAGE_SIZE * 2)
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off <= X86_PAGE_SIZE - pThis->cbAccess) )
+            {
+                pThis->ExpectCtx.rax.u = uExpectedRax;
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, pThis->pCmnMode->Xchg.offUd2, X86_XCPT_UD, 0 /*uErrCd*/);
+                if (*(uint64_t *)&pThis->pbOrgTest[off] != uExpectStored)
+                    Bs3TestFailedF("%u - %s: Stored %#RX64, expected %#RX64",
+                                   g_usBs3TestStep, "xxxx", *(uint64_t *)&pThis->pbOrgTest[off], uExpectStored);
+            }
+            else
+            {
+                pThis->ExpectCtx.rax.u = uRaxIn;
+                if (off < X86_PAGE_SIZE)
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + X86_PAGE_SIZE;
+                else
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + off;
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, 0 /*cbPcAdjust*/, bXcpt, uPfErrCd);
+                pThis->ExpectCtx.cr2 = pCtx->cr2;
+                if (*(uint64_t *)&pThis->pbOrgTest[off] != s_uValueFlipped)
+                    Bs3TestFailedF("%u - %s: #PF'ed store modified memory: %#RX64, expected %#RX64",
+                                   g_usBs3TestStep, "xxxx", *(uint64_t *)&pThis->pbOrgTest[off], s_uValueFlipped);
+            }
+        }
+
+        if (fFlags & BS3CB2PFACC_F_DIRECT)
+            break;
+    }
+
+    pCtx->rax.u = uRaxSaved;
+}
+
+
+/**
+ * Test a cmpxchg instruction around the edges of page two.
+ *
+ * @param   pThis           The test stat data.
+ * @param   pCtx            The test context.
+ * @param   fFlags          BS3CB2PFACC_F_XXX.
+ * @param   bXcpt           X86_XCPT_PF if this can cause \#PFs, otherwise
+ *                          X86_XCPT_UD.
+ * @param   uPfErrCd        The error code for \#PFs.
+ * @param   fMissmatch      Whether to fail and not store (@c true), or succeed
+ *                          and do the store.
+ */
+static void bs3CpuBasic2Pf_DoCmpXchg(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags,
+                                     uint8_t bXcpt, uint8_t uPfErrCd, bool fMissmatch)
+{
+    static uint64_t const s_uValue        = UINT64_C(0xea58699648e2f32c);
+    static uint64_t const s_uValueFlipped = UINT64_C(0x15a79669b71d0cd3);
+    static uint64_t const s_uValueOther   = UINT64_C(0x2171239bcb044c81);
+    uint64_t const uRaxSaved = pCtx->rax.u;
+    uint64_t const uRcxSaved = pCtx->rcx.u;
+    uint64_t uRaxIn;
+    uint64_t uExpectedRax;
+    uint32_t uExpectedFlags;
+    uint64_t uExpectStored;
+    unsigned i;
+
+    /*
+     * Adjust the incoming context and calculate our expections.
+     * Hint: CMPXCHG  [xBX],xCX     ; xAX compare and update implicit, ZF set to !fMissmatch.
+     */
+    bs3CpuBasic2Pf_SetCsEip(pThis, pCtx, &pThis->pCmnMode->CmpXchg);
+    if ((pThis->bMode & BS3_MODE_CODE_MASK) != BS3_MODE_CODE_64)
+    {
+        uRaxIn      = (uint32_t)(fMissmatch ? s_uValueOther : s_uValueFlipped); /* leave the upper part zero */
+        pCtx->rcx.u = (uint32_t)s_uValue;                                        /* ditto */
+    }
+    else
+    {
+        uRaxIn      = fMissmatch ? s_uValueOther : s_uValueFlipped;
+        pCtx->rcx.u = s_uValue;
+    }
+    if (fMissmatch)
+        pCtx->rflags.u32 |= X86_EFL_ZF;
+    else
+        pCtx->rflags.u32 &= ~X86_EFL_ZF;
+
+    Bs3MemCpy(&pThis->ExpectCtx, pCtx, sizeof(pThis->ExpectCtx));
+    uExpectedFlags = pCtx->rflags.u32 & ~(X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF | X86_EFL_ZF);
+    switch (pThis->bMode & BS3_MODE_CODE_MASK)
+    {
+        case BS3_MODE_CODE_16:
+        case BS3_MODE_CODE_V86:
+            uExpectedRax  = (uint16_t)s_uValueFlipped | (uRaxIn          & UINT64_C(0xffffffffffff0000));
+            uExpectStored = (uint16_t)s_uValue        | (s_uValueFlipped & UINT64_C(0xffffffffffff0000));
+            uExpectedFlags |= !fMissmatch ? X86_EFL_ZF | X86_EFL_PF : X86_EFL_AF;
+            break;
+        case BS3_MODE_CODE_32:
+            uExpectedRax  = (uint32_t)s_uValueFlipped | (uRaxIn          & UINT64_C(0xffffffff00000000));
+            uExpectStored = (uint32_t)s_uValue        | (s_uValueFlipped & UINT64_C(0xffffffff00000000));
+            uExpectedFlags |= !fMissmatch ? X86_EFL_ZF | X86_EFL_PF : X86_EFL_AF;
+            break;
+        case BS3_MODE_CODE_64:
+            uExpectedRax  = s_uValueFlipped;
+            uExpectStored = s_uValue;
+            uExpectedFlags |= !fMissmatch ? X86_EFL_ZF | X86_EFL_PF : X86_EFL_AF;
+            break;
+    }
+    if (fMissmatch)
+        uExpectStored = s_uValueFlipped;
+
+    /*
+     * Make two approaches to the test page (the 2nd one):
+     *  - i=0: Start on the 1st page and edge into the 2nd.
+     *  - i=1: Start at the end of the 2nd page and edge off it and into the 3rd.
+     */
+    for (i = 0; i < 2; i++)
+    {
+        unsigned off    = fFlags & BS3CB2PFACC_F_DIRECT ? X86_PAGE_SIZE : X86_PAGE_SIZE * (i + 1) - pThis->cbAccess;
+        unsigned offEnd = fFlags & BS3CB2PFACC_F_DIRECT ? off + 1       : X86_PAGE_SIZE * (i + 1) + (i == 0 ? 8 : 7);
+        for (; off < offEnd; off++)
+        {
+            *(uint64_t *)&pThis->pbOrgTest[off] = s_uValueFlipped;
+            pCtx->rax.u = uRaxIn;
+            if (BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = off;
+            else
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = pThis->uTestAddr.u + off;
+
+            Bs3TrapSetJmpAndRestore(pCtx, &pThis->TrapCtx);
+            //Bs3TestPrintf("off=%#06x bXcpt=%#x uErrCd=%#RX64\n", off, pThis->TrapCtx.bXcpt, pThis->TrapCtx.uErrCd);
+
+            if (   bXcpt != X86_XCPT_PF
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off >= X86_PAGE_SIZE * 2)
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off <= X86_PAGE_SIZE - pThis->cbAccess) )
+            {
+                pThis->ExpectCtx.rax.u = uExpectedRax;
+                pThis->ExpectCtx.rflags.u32 = uExpectedFlags;
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, pThis->pCmnMode->CmpXchg.offUd2, X86_XCPT_UD, 0 /*uErrCd*/);
+                if (*(uint64_t *)&pThis->pbOrgTest[off] != uExpectStored)
+                    Bs3TestFailedF("%u - %s: Stored %#RX64, expected %#RX64",
+                                   g_usBs3TestStep, "xxxx", *(uint64_t *)&pThis->pbOrgTest[off], uExpectStored);
+            }
+            else
+            {
+                pThis->ExpectCtx.rax.u = uRaxIn;
+                pThis->ExpectCtx.rflags = pCtx->rflags;
+                if (off < X86_PAGE_SIZE)
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + X86_PAGE_SIZE;
+                else
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + off;
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, 0 /*cbPcAdjust*/, bXcpt, uPfErrCd);
+                pThis->ExpectCtx.cr2 = pCtx->cr2;
+                if (*(uint64_t *)&pThis->pbOrgTest[off] != s_uValueFlipped)
+                    Bs3TestFailedF("%u - %s: #PF'ed store modified memory: %#RX64, expected %#RX64",
+                                   g_usBs3TestStep, "xxxx", *(uint64_t *)&pThis->pbOrgTest[off], s_uValueFlipped);
+            }
+        }
+
+        if (fFlags & BS3CB2PFACC_F_DIRECT)
+            break;
+    }
+
+    pCtx->rax.u = uRaxSaved;
+    pCtx->rcx.u = uRcxSaved;
+}
+
+
+static void bs3CpuBasic2Pf_DoCmpXchgMiss(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags,
+                                         uint8_t bXcpt, uint8_t uPfErrCd)
+{
+    bs3CpuBasic2Pf_DoCmpXchg(pThis, pCtx, fFlags, bXcpt, uPfErrCd, true /*fMissmatch*/ );
+}
+
+
+static void bs3CpuBasic2Pf_DoCmpXchgMatch(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags,
+                                          uint8_t bXcpt, uint8_t uPfErrCd)
+{
+    bs3CpuBasic2Pf_DoCmpXchg(pThis, pCtx, fFlags, bXcpt, uPfErrCd , false /*fMissmatch*/ );
+}
+
+
+/**
+ * @interface_method_impl{BS3CPUBASIC2PFACCESSOR,pfnAccessor,
+ *      DIV [MEM=0] for checking the accessed bit}
+ */
+static void bs3CpuBasic2Pf_DoDivByZero(PBS3CPUBASIC2PFSTATE pThis, PBS3REGCTX pCtx, uint32_t fFlags,
+                                       uint8_t bXcpt, uint8_t uPfErrCd)
+{
+    static uint64_t const   s_uFiller = UINT64_C(0x9856703711f4069e);
+    uint64_t                uZeroAndFill;
+    unsigned i;
+
+    /*
+     * Adjust the incoming context and calculate our expections.
+     */
+    bs3CpuBasic2Pf_SetCsEip(pThis, pCtx, &pThis->pCmnMode->DivMem);
+
+    Bs3MemCpy(&pThis->ExpectCtx, pCtx, sizeof(pThis->ExpectCtx));
+    switch (pThis->bMode & BS3_MODE_CODE_MASK)
+    {
+        case BS3_MODE_CODE_16:
+        case BS3_MODE_CODE_V86:
+            uZeroAndFill = s_uFiller & UINT64_C(0xffffffffffff0000);
+            break;
+        case BS3_MODE_CODE_32:
+            uZeroAndFill = s_uFiller & UINT64_C(0xffffffff00000000);
+            break;
+        case BS3_MODE_CODE_64:
+            uZeroAndFill = 0;
+            break;
+    }
+
+    /*
+     * Make two approaches to the test page (the 2nd one):
+     *  - i=0: Start on the 1st page and edge into the 2nd.
+     *  - i=1: Start at the end of the 2nd page and edge off it and into the 3rd.
+     */
+    for (i = 0; i < 2; i++)
+    {
+        unsigned off    = fFlags & BS3CB2PFACC_F_DIRECT ? X86_PAGE_SIZE : X86_PAGE_SIZE * (i + 1) - pThis->cbAccess;
+        unsigned offEnd = fFlags & BS3CB2PFACC_F_DIRECT ? off + 1       : X86_PAGE_SIZE * (i + 1) + (i == 0 ? 8 : 7);
+        for (; off < offEnd; off++)
+        {
+            *(uint64_t *)&pThis->pbOrgTest[off] = uZeroAndFill;
+            if (BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = off;
+            else
+                pThis->ExpectCtx.rbx.u = pCtx->rbx.u = pThis->uTestAddr.u + off;
+
+            Bs3TrapSetJmpAndRestore(pCtx, &pThis->TrapCtx);
+            //if (pThis->bMode == BS3_MODE_PP16_32) Bs3TestPrintf("off=%#06x bXcpt=%#x uErrCd=%#RX64\n", off, pThis->TrapCtx.bXcpt, pThis->TrapCtx.uErrCd);
+
+            if (   bXcpt != X86_XCPT_PF
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off >= X86_PAGE_SIZE * 2)
+                || ((fFlags & BS3CB2PFACC_F_PAGE_LEVEL) && off <= X86_PAGE_SIZE - pThis->cbAccess) )
+            {
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, 0 /*cbPcAdjust*/, X86_XCPT_DE, 0 /*uErrCd*/);
+                if (*(uint64_t *)&pThis->pbOrgTest[off] != uZeroAndFill)
+                    Bs3TestFailedF("%u - %s: Modified source op: %#RX64, expected %#RX64",
+                                   g_usBs3TestStep, "xxxx", *(uint64_t *)&pThis->pbOrgTest[off], uZeroAndFill);
+            }
+            else
+            {
+                if (off < X86_PAGE_SIZE)
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + X86_PAGE_SIZE;
+                else
+                    pThis->ExpectCtx.cr2.u = pThis->uTestAddr.u + off;
+                bs3CpuBasic2Pf_CompareCtx(pThis, &pThis->ExpectCtx, 0 /*cbPcAdjust*/, bXcpt, uPfErrCd);
+                pThis->ExpectCtx.cr2 = pCtx->cr2;
+                if (*(uint64_t *)&pThis->pbOrgTest[off] != uZeroAndFill)
+                    Bs3TestFailedF("%u - %s: Modified source op: %#RX64, expected %#RX64",
+                                   g_usBs3TestStep, "xxxx", *(uint64_t *)&pThis->pbOrgTest[off], uZeroAndFill);
+            }
+        }
+
+        if (fFlags & BS3CB2PFACC_F_DIRECT)
+            break;
+    }
+}
+
+
+static BS3CPUBASIC2PFACCESSOR const g_aAccessors[] =
+{
+    {   "DoExec",           bs3CpuBasic2Pf_DoExec,                          X86_TRAP_PF_ID,     X86_XCPT_UD },
+    {   "DoMovLoad",        bs3CpuBasic2Pf_DoMovLoad,                       0,                  X86_XCPT_UD },
+    {   "DoMovStore",       bs3CpuBasic2Pf_DoMovStore,                      X86_TRAP_PF_RW,     X86_XCPT_UD },
+    {   "DoXchg",           bs3CpuBasic2Pf_DoXchg,                          X86_TRAP_PF_RW,     X86_XCPT_UD },
+    {   "DoCmpXchgMiss",    bs3CpuBasic2Pf_DoCmpXchgMiss,                   X86_TRAP_PF_RW,     X86_XCPT_UD },
+    {   "DoCmpXhcgMatch",   bs3CpuBasic2Pf_DoCmpXchgMatch,                  X86_TRAP_PF_RW,     X86_XCPT_UD },
+    {   "DoDivByZero",      bs3CpuBasic2Pf_DoDivByZero,                     0,                  X86_XCPT_DE },
+};
+
+/** @} */
+
+
+/** @name BS3CPUBASIC2PFMODPT::pfnModify implementations.
+ * @{ */
+
+
+static void bs3CpuBasic2Pf_ClearMask(PBS3CPUBASIC2PFSTATE pThis, unsigned iStore, PCBS3CPUBASIC2PFMODPT pEntry,
+                                     uint32_t fClearMask, uint32_t fSetMask)
+{
+    if (pThis->PgInfo.cbEntry == 4)
+    {
+        uint32_t const uOrg = pThis->PteBackup.Legacy[1];
+        uint32_t       uNew = ((uOrg & ~fClearMask) | fSetMask) & ~(uint32_t)pEntry->uModifyArg;
+        uint32_t const uOld = pThis->PgInfo.u.Legacy.pPte[1].u;
+        g_aStoreMethods[iStore].pfnStore(pThis->PgInfo.u.Legacy.pPte + 1, uNew, uOld);
+    }
+    else
+    {
+        uint64_t const uOrg = pThis->PteBackup.Pae[1];
+        uint64_t       uNew = ((uOrg & ~(uint64_t)fClearMask) | fSetMask) & ~(uint64_t)pEntry->uModifyArg;
+        uint64_t const uOld = pThis->PgInfo.u.Pae.pPte[1].u;
+
+        g_aStoreMethods[iStore].pfnStore(&pThis->PgInfo.u.Pae.pPte[1].au32[0], (uint32_t)uNew, (uint32_t)uOld);
+        if ((uint32_t)(uNew >> 32) != (uint32_t)(uOld >> 32))
+            g_aStoreMethods[iStore].pfnStore(&pThis->PgInfo.u.Pae.pPte[1].au32[1],
+                                             (uint32_t)(uNew >> 32), (uint32_t)(uOld >> 32));
+    }
+}
+
+static void bs3CpuBasic2Pf_SetBit(PBS3CPUBASIC2PFSTATE pThis, unsigned iStore, PCBS3CPUBASIC2PFMODPT pEntry,
+                                  uint32_t fClearMask, uint32_t fSetMask)
+{
+    if (pThis->PgInfo.cbEntry == 4)
+    {
+        uint32_t const uOrg = pThis->PteBackup.Legacy[1];
+        uint32_t       uNew = (uOrg & ~fClearMask) | fSetMask | RT_BIT_32(pEntry->uModifyArg);
+        uint32_t const uOld = pThis->PgInfo.u.Legacy.pPte[1].u;
+        g_aStoreMethods[iStore].pfnStore(pThis->PgInfo.u.Legacy.pPte + 1, uNew, uOld);
+    }
+    else
+    {
+        uint64_t const uOrg = pThis->PteBackup.Pae[1];
+        uint64_t       uNew = ((uOrg & ~(uint64_t)fClearMask) | fSetMask) | RT_BIT_64(pEntry->uModifyArg);
+        uint64_t const uOld = pThis->PgInfo.u.Pae.pPte[1].u;
+
+        if (pEntry->uModifyArg < 32 || (uint32_t)uNew != (uint32_t)uOld)
+            g_aStoreMethods[iStore].pfnStore(&pThis->PgInfo.u.Pae.pPte[1].au32[0], (uint32_t)uNew, (uint32_t)uOld);
+        if (pEntry->uModifyArg >= 32 || (uint32_t)(uNew >> 32) != (uint32_t)(uOld >> 32))
+            g_aStoreMethods[iStore].pfnStore(&pThis->PgInfo.u.Pae.pPte[1].au32[1],
+                                             (uint32_t)(uNew >> 32), (uint32_t)(uOld >> 32));
+    }
+}
+
+static void bs3CpuBasic2Pf_NoChange(PBS3CPUBASIC2PFSTATE pThis, unsigned iStore, PCBS3CPUBASIC2PFMODPT pEntry,
+                                    uint32_t fClearMask, uint32_t fSetMask)
+{
+    if (pThis->PgInfo.cbEntry == 4)
+    {
+        uint32_t const uOrg = pThis->PteBackup.Legacy[1];
+        uint32_t       uNew = (uOrg & ~fClearMask) | fSetMask;
+        uint32_t const uOld = pThis->PgInfo.u.Legacy.pPte[1].u;
+        if (uNew != uOld)
+            g_aStoreMethods[iStore].pfnStore(&pThis->PgInfo.u.Legacy.pPte[1], uNew, uOld);
+    }
+    else
+    {
+        uint64_t const uOrg = pThis->PteBackup.Pae[1];
+        uint64_t       uNew = (uOrg & ~(uint64_t)fClearMask) | fSetMask;
+        uint64_t const uOld = pThis->PgInfo.u.Pae.pPte[1].u;
+        if (uNew != uOld)
+        {
+            if ((uint32_t)uNew != (uint32_t)uOld)
+                g_aStoreMethods[iStore].pfnStore(&pThis->PgInfo.u.Pae.pPte[1].au32[0], (uint32_t)uNew, (uint32_t)uOld);
+            if ((uint32_t)(uNew >> 32) != (uint32_t)(uOld >> 32))
+                g_aStoreMethods[iStore].pfnStore(&pThis->PgInfo.u.Pae.pPte[1].au32[1],
+                                                 (uint32_t)(uNew >> 32), (uint32_t)(uOld >> 32));
+        }
+    }
+}
+
+/** @} */
+
+
+/** @name BS3CPUBASIC2PFMODPT::pfnApplicable implementations.
+ * @{ */
+
+static bool bs3CpuBasic2Pf_IsPteBitReserved(PBS3CPUBASIC2PFSTATE pThis, PCBS3CPUBASIC2PFMODPT pEntry)
+{
+    if (pThis->PgInfo.cbEntry == 8)
+    {
+        /* Bits 52..63 or 62 (NXE=1). */
+        if (pThis->PgInfo.cEntries == 3)
+        {
+            if ((uint32_t)(pEntry->uModifyArg - 52U) < (uint32_t)(12 - pThis->fNxe))
+                return true;
+        }
+        else if (pEntry->uModifyArg == 63 && !pThis->fNxe)
+            return true;
+
+        /* Reserved physical address bits. */
+        if (pEntry->uModifyArg < 52)
+        {
+            if ((uint32_t)pEntry->uModifyArg >= (uint32_t)pThis->cBitsPhysWidth)
+                return true;
+        }
+    }
+    return false;
+}
+
+static bool bs3CpuBasic2Pf_IsPteBitSoftwareUsable(PBS3CPUBASIC2PFSTATE pThis, PCBS3CPUBASIC2PFMODPT pEntry)
+{
+    if (pThis->PgInfo.cbEntry == 8)
+    {
+        if (pThis->PgInfo.cEntries != 3)
+        {
+            if ((uint32_t)(pEntry->uModifyArg - 52U) < (uint32_t)11)
+                return true;
+        }
+    }
+    return false;
+}
+
+
+static bool bs3CpuBasic2Pf_IsNxe(PBS3CPUBASIC2PFSTATE pThis, PCBS3CPUBASIC2PFMODPT pEntry)
+{
+    return pThis->fNxe && pThis->PgInfo.cbEntry == 8;
+}
+
+/** @} */
+
+
+static const BS3CPUBASIC2PFMODPT g_aPteWorkers[] =
+{
+/*  { pszName,     P  U  W  NX RSV   ModiyfArg  pfnModify,                   pfnApplicable }, */
+    { "org",       1, 1, 1, 0, 0,   0,          bs3CpuBasic2Pf_NoChange,     NULL },
+    { "!US",       1, 0, 1, 0, 0,   X86_PTE_US, bs3CpuBasic2Pf_ClearMask,    NULL },
+    { "!RW",       1, 1, 0, 0, 0,   X86_PTE_RW, bs3CpuBasic2Pf_ClearMask,    NULL },
+    { "!RW+!US",   1, 0, 0, 0, 0,   X86_PTE_RW | X86_PTE_US, bs3CpuBasic2Pf_ClearMask,    NULL },
+    { "!P",        0, 0, 0, 0, 0,   X86_PTE_P,  bs3CpuBasic2Pf_ClearMask,    NULL },
+    { "NX",        1, 1, 1, 1, 0,   63,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsNxe },
+    { "RSVPH[32]", 0, 0, 0, 0, 1,   32,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[33]", 0, 0, 0, 0, 1,   33,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[34]", 0, 0, 0, 0, 1,   34,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[35]", 0, 0, 0, 0, 1,   35,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[36]", 0, 0, 0, 0, 1,   36,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[37]", 0, 0, 0, 0, 1,   37,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[38]", 0, 0, 0, 0, 1,   38,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[39]", 0, 0, 0, 0, 1,   39,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[40]", 0, 0, 0, 0, 1,   40,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[41]", 0, 0, 0, 0, 1,   41,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[42]", 0, 0, 0, 0, 1,   42,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[43]", 0, 0, 0, 0, 1,   43,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[44]", 0, 0, 0, 0, 1,   44,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[45]", 0, 0, 0, 0, 1,   45,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[46]", 0, 0, 0, 0, 1,   46,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[47]", 0, 0, 0, 0, 1,   47,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[48]", 0, 0, 0, 0, 1,   48,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[49]", 0, 0, 0, 0, 1,   49,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[50]", 0, 0, 0, 0, 1,   50,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSVPH[51]", 0, 0, 0, 0, 1,   51,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[52]",   0, 0, 0, 0, 1,   52,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[53]",   0, 0, 0, 0, 1,   53,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[54]",   0, 0, 0, 0, 1,   54,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[55]",   0, 0, 0, 0, 1,   55,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[56]",   0, 0, 0, 0, 1,   56,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[57]",   0, 0, 0, 0, 1,   57,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[58]",   0, 0, 0, 0, 1,   58,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[59]",   0, 0, 0, 0, 1,   59,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[60]",   0, 0, 0, 0, 1,   60,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[61]",   0, 0, 0, 0, 1,   61,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[62]",   0, 0, 0, 0, 1,   62,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[62]",   0, 0, 0, 0, 1,   62,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "RSV[63]",   0, 0, 0, 0, 1,   63,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitReserved },
+    { "!RSV[52]",  1, 1, 1, 0, 0,   52,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[53]",  1, 1, 1, 0, 0,   53,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[54]",  1, 1, 1, 0, 0,   54,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[55]",  1, 1, 1, 0, 0,   55,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[56]",  1, 1, 1, 0, 0,   56,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[57]",  1, 1, 1, 0, 0,   57,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[58]",  1, 1, 1, 0, 0,   58,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[59]",  1, 1, 1, 0, 0,   59,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[60]",  1, 1, 1, 0, 0,   60,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[61]",  1, 1, 1, 0, 0,   61,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+    { "!RSV[62]",  1, 1, 1, 0, 0,   62,         bs3CpuBasic2Pf_SetBit,       bs3CpuBasic2Pf_IsPteBitSoftwareUsable },
+
+};
+
+
+/**
+ * Worker for bs3CpuBasic2_RaiseXcpt0e_c32 that does the actual testing.
+ *
+ * Caller does all the cleaning up.
+ *
+ * @returns Error count.
+ * @param   pThis       Test state data.
+ * @param   fNxe        Whether NX is enabled.
+ */
+static uint8_t bs3CpuBasic2_RaiseXcpt0eWorker(PBS3CPUBASIC2PFSTATE register pThis, bool const fWp, bool const fNxe)
+{
+    unsigned            iLevel;
+    unsigned            iRing;
+    unsigned            iStore;
+    unsigned            iAccessor;
+    unsigned            iOuter;
+    unsigned            cPml4Tests;
+    unsigned            cPdPtrTests;
+    uint32_t const      fPfIdMask = fNxe ? UINT32_MAX : ~X86_TRAP_PF_ID;
+    BS3REGCTX           aCtxts[4];
+
+    pThis->fWp  = fWp;
+    pThis->fNxe = fNxe;
+
+    /** @todo figure out V8086 testing. */
+    if ((pThis->bMode & BS3_MODE_CODE_MASK) == BS3_MODE_CODE_V86)
+        return BS3TESTDOMODE_SKIPPED;
+
+
+    /* paranoia: Touch the various big stack structures to ensure the compiler has allocated stack for them. */
+    for (iRing = 0; iRing < RT_ELEMENTS(aCtxts); iRing++)
+        Bs3MemZero(&aCtxts[iRing], sizeof(aCtxts[iRing]));
+
+    /*
+     * Set up a few contexts for testing this stuff.
+     */
+    Bs3RegCtxSaveEx(&aCtxts[0], pThis->bMode, 2048);
+    for (iRing = 1; iRing < 4; iRing++)
+    {
+        aCtxts[iRing] = aCtxts[0];
+        Bs3RegCtxConvertToRingX(&aCtxts[iRing], iRing);
+    }
+
+    if (!BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+    {
+        for (iRing = 0; iRing < 4; iRing++)
+            aCtxts[iRing].rbx.u = pThis->uTestAddr.u;
+    }
+    else
+    {
+        for (iRing = 0; iRing < 4; iRing++)
+        {
+            aCtxts[iRing].ds    = pThis->uSel16TestData;
+            aCtxts[iRing].rbx.u = 0;
+        }
+    }
+
+    /*
+     * Check basic operation:
+     */
+    for (iRing = 0; iRing < 4; iRing++)
+        for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+            g_aAccessors[iAccessor].pfnAccessor(pThis, &aCtxts[iRing], BS3CB2PFACC_F_PAGE_LEVEL, X86_XCPT_UD, UINT8_MAX);
+
+    /*
+     * Some PTE checks. We only mess with the 2nd page.
+     */
+    for (iOuter = 0; iOuter < 2; iOuter++)
+    {
+        uint32_t const  fAccessor = (iOuter == 0 ? BS3CB2PFACC_F_DIRECT : 0) | BS3CB2PFACC_F_PAGE_LEVEL;
+        unsigned        iPteWrk;
+
+        bs3CpuBasic2Pf_FlushAll();
+        for (iPteWrk = 0; iPteWrk < RT_ELEMENTS(g_aPteWorkers); iPteWrk++)
+        {
+            BS3CPUBASIC2PFMODPT         EffWrk;
+            const BS3CPUBASIC2PFMODPT  *pPteWrk = &g_aPteWorkers[iPteWrk];
+            if (pPteWrk->pfnApplicable && !pPteWrk->pfnApplicable(pThis, pPteWrk))
+                continue;
+
+            pThis->pszPteWorker = pPteWrk->pszName;
+
+            EffWrk = *pPteWrk;
+
+#if 1
+            /*
+             * Do the modification once, then test all different accesses
+             * without flushing the TLB or anything in-between.
+             */
+            for (iStore = 0; iStore < RT_ELEMENTS(g_aStoreMethods); iStore++)
+            {
+                pThis->pszStore = g_aStoreMethods[iStore].pszName;
+                pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, 0);
+
+                for (iRing = 0; iRing < 4; iRing++)
+                {
+                    PBS3REGCTX const pCtx = &aCtxts[iRing];
+                    if (   EffWrk.fReserved
+                        || !EffWrk.fPresent
+                        || (!EffWrk.fUser && iRing == 3))
+                    {
+                        uint32_t const fPfBase = ( EffWrk.fReserved ? X86_TRAP_PF_P | X86_TRAP_PF_RSVD
+                                                  : EffWrk.fPresent ? X86_TRAP_PF_P : 0)
+                                               | (iRing == 3 ? X86_TRAP_PF_US : 0);
+                        for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+                        {
+                            pThis->pszAccessor = g_aAccessors[iAccessor].pszName;
+                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF,
+                                                                fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask));
+                        }
+                    }
+                    else
+                    {
+                        uint32_t const fPfBase = X86_TRAP_PF_P | (iRing == 3 ? X86_TRAP_PF_US : 0);
+                        for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+                        {
+                            pThis->pszAccessor = g_aAccessors[iAccessor].pszName;
+                            if (   (   (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_ID)
+                                    && EffWrk.fNoExecute)
+                                || (   (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_RW)
+                                    && !EffWrk.fWriteable
+                                    && (fWp || iRing == 3)) )
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF,
+                                                                    fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask));
+                            else
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                        }
+                    }
+                }
+
+                /* Reset the paging + full flush. */
+                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+            }
+#endif
+
+#define CHECK_AD_BITS(a_fExpectedAD) \
+    do { \
+        uint32_t fActualAD = (   pThis->PgInfo.cbEntry == 8 \
+                              ? pThis->PgInfo.u.Pae.pPte[1].au32[0] : pThis->PgInfo.u.Legacy.pPte[1].au32[0]) \
+                           & (X86_PTE_A | X86_PTE_D); \
+        if (fActualAD != (a_fExpectedAD)) \
+        { \
+            Bs3TestFailedF("%u - %s/%u: unexpected A/D bits: %#x, expected %#x\n", \
+                           g_usBs3TestStep, "xxxx", __LINE__, fActualAD, a_fExpectedAD); \
+            BS3CPUBASIC2PF_HALT(pThis); \
+        } \
+    } while (0)
+
+            /*
+             * Again, but redoing everything for each accessor.
+             */
+            for (iStore = 0; iStore < RT_ELEMENTS(g_aStoreMethods); iStore++)
+            {
+                pThis->pszStore = g_aStoreMethods[iStore].pszName;
+
+                for (iRing = 0; iRing < 4; iRing++)
+                {
+                    PBS3REGCTX const pCtx = &aCtxts[iRing];
+
+                    if (   EffWrk.fReserved
+                        || !EffWrk.fPresent
+                        || (!EffWrk.fUser && iRing == 3))
+                    {
+                        uint32_t const fPfBase = ( EffWrk.fReserved ? X86_TRAP_PF_P | X86_TRAP_PF_RSVD
+                                                  : EffWrk.fPresent ? X86_TRAP_PF_P : 0)
+                                               | (iRing == 3 ? X86_TRAP_PF_US : 0);
+                        for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+                        {
+                            pThis->pszAccessor = g_aAccessors[iAccessor].pszName;
+
+                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, 0);
+                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF,
+                                                                fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask));
+                            CHECK_AD_BITS(0);
+                            bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+
+                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF,
+                                                                fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask));
+                            CHECK_AD_BITS(0);
+                            bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+                        }
+                    }
+                    else
+                    {
+                        uint32_t const fPfBase = X86_TRAP_PF_P | (iRing == 3 ? X86_TRAP_PF_US : 0);
+                        for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+                        {
+                            pThis->pszAccessor = g_aAccessors[iAccessor].pszName;
+                            if (   (   (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_ID)
+                                    && EffWrk.fNoExecute)
+                                || (   (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_RW)
+                                    && !EffWrk.fWriteable
+                                    && (fWp || iRing == 3)) )
+                            {
+                                uint32_t const fErrCd = fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask);
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                CHECK_AD_BITS(0);
+                                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, X86_PTE_A | X86_PTE_D);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                CHECK_AD_BITS(X86_PTE_A | X86_PTE_D);
+                                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A, X86_PTE_D);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                CHECK_AD_BITS(X86_PTE_D);
+                                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_D, X86_PTE_A);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                CHECK_AD_BITS(X86_PTE_A);
+                                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+                            }
+                            else
+                            {
+                                uint32_t const fExpectedAD = (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_RW)
+                                                           ? X86_PTE_A | X86_PTE_D : X86_PTE_A;
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                CHECK_AD_BITS(fExpectedAD);
+                                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, X86_PTE_A | X86_PTE_D);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                CHECK_AD_BITS(X86_PTE_A | X86_PTE_D);
+                                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A, X86_PTE_D);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                CHECK_AD_BITS(fExpectedAD | X86_PTE_D);
+                                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_D, X86_PTE_A);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                CHECK_AD_BITS(fExpectedAD | X86_PTE_A);
+                                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+                            }
+                        }
+                    }
+                }
+            }
+
+            /*
+             * Again, but using invalidate page.
+             */
+            if (pThis->fUseInvlPg)
+            {
+                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+
+                for (iStore = 0; iStore < RT_ELEMENTS(g_aStoreMethods); iStore++)
+                {
+                    pThis->pszStore = g_aStoreMethods[iStore].pszName;
+
+                    for (iRing = 0; iRing < 4; iRing++)
+                    {
+                        PBS3REGCTX const pCtx = &aCtxts[iRing];
+
+                        if (   EffWrk.fReserved
+                            || !EffWrk.fPresent
+                            || (!EffWrk.fUser && iRing == 3))
+                        {
+                            uint32_t const fPfBase = ( EffWrk.fReserved ? X86_TRAP_PF_P | X86_TRAP_PF_RSVD
+                                                      : EffWrk.fPresent ? X86_TRAP_PF_P : 0)
+                                                   | (iRing == 3 ? X86_TRAP_PF_US : 0);
+                            for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+                            {
+                                pThis->pszAccessor = g_aAccessors[iAccessor].pszName;
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, 0);
+                                ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF,
+                                                                    fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask));
+                                CHECK_AD_BITS(0);
+
+                                pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                                ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF,
+                                                                    fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask));
+                                CHECK_AD_BITS(0);
+                            }
+                        }
+                        else
+                        {
+                            uint32_t const fPfBase = X86_TRAP_PF_P | (iRing == 3 ? X86_TRAP_PF_US : 0);
+                            for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+                            {
+                                pThis->pszAccessor = g_aAccessors[iAccessor].pszName;
+                                if (   (   (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_ID)
+                                        && EffWrk.fNoExecute)
+                                    || (   (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_RW)
+                                        && !EffWrk.fWriteable
+                                        && (fWp || iRing == 3)) )
+                                {
+                                    uint32_t const fErrCd = fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask);
+
+                                    pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                                    ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                    g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                    CHECK_AD_BITS(0);
+
+                                    pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, X86_PTE_A | X86_PTE_D);
+                                    ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                    g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                    CHECK_AD_BITS(X86_PTE_A | X86_PTE_D);
+
+                                    pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A, X86_PTE_D);
+                                    ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                    g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                    CHECK_AD_BITS(X86_PTE_D);
+
+                                    pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_D, X86_PTE_A);
+                                    ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                    g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                    CHECK_AD_BITS(X86_PTE_A);
+                                }
+                                else
+                                {
+                                    uint32_t const fExpectedAD = (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_RW)
+                                                               ? X86_PTE_A | X86_PTE_D : X86_PTE_A;
+
+                                    pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                                    ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                    g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                    CHECK_AD_BITS(fExpectedAD);
+
+                                    pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, X86_PTE_A | X86_PTE_D);
+                                    ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                    g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                    CHECK_AD_BITS(X86_PTE_A | X86_PTE_D);
+
+                                    pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A, X86_PTE_D);
+                                    ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                    g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                    CHECK_AD_BITS(fExpectedAD | X86_PTE_D);
+
+                                    pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_D, X86_PTE_A);
+                                    ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                    g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                    CHECK_AD_BITS(fExpectedAD | X86_PTE_A);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                bs3CpuBasic2Pf_RestoreFromBackups(pThis);
+            }
+        }
+    }
+
+
+    /*
+     * Do all 4 paging levels.  We start out with full access to the page and
+     * restrict it in various ways.
+     *
+     * (On the final level we only mess with the 2nd page for now.)
+     */
+    cPdPtrTests = 1;
+    cPml4Tests  = 1;
+    if (pThis->uTestAddr.u >= UINT64_C(0x8000000000))
+    {
+        cPml4Tests   = 2;
+        cPdPtrTests  = 2;
+    }
+    else if (pThis->PgInfo.cEntries == 3)
+        cPdPtrTests  = 2;
+
+#if 0
+    /* Loop 1: Accessor flags. */
+    for (iOuter = 0; iOuter < 2; iOuter++)
+    {
+        uint32_t const fAccessor = (iOuter == 0 ? BS3CB2PFACC_F_DIRECT : 0) | BS3CB2PFACC_F_PAGE_LEVEL;
+
+        /* Loop 2: Paging store method. */
+        for (iStore = 0; iStore < RT_ELEMENTS(g_aStoreMethods); iStore++)
+        {
+            unsigned iPml4Test;
+            int8_t   cReserved   = 0;
+            int8_t   cNotPresent = 0;
+            int8_t   cNotWrite   = 0;
+            int8_t   cNotUser    = 0;
+            int8_t   cExecute    = 0;
+
+            /* Loop 3: Page map level 4 */
+            for (iPml4Test = 0; iPml4Test < cPml4Tests; iPml4Test++)
+            {
+                unsigned iPdPtrTest;
+
+                /* Loop 4: Page directory pointer table. */
+                for (iPdPtrTest = 0; iPdPtrTest < cPdPtrTests; iPdPtrTest++)
+                {
+                    unsigned iPdTest;
+
+                    /* Loop 5: Page directory. */
+                    for (iPdTest = 0; iPdTest < 2; iPdTest++)
+                    {
+                        unsigned iPtTest;
+
+                        /* Loop 6: Page table. */
+                        for (iPtTest = 0; iPtTest < 2; iPtTest++)
+                        {
+                            /* Loop 7: Accessor ring. */
+                            for (iRing = 0; iRing < 4; iRing++)
+                            {
+                                PBS3REGCTX const pCtx = &aCtxts[iRing];
+
+                                if (   EffWrk.fReserved
+                                    || !EffWrk.fPresent
+                                    || (!EffWrk.fUser && iRing == 3))
+                                {
+                                    uint32_t const fPfBase = ( EffWrk.fReserved ? X86_TRAP_PF_P | X86_TRAP_PF_RSVD
+                                                              : EffWrk.fPresent ? X86_TRAP_PF_P : 0)
+                                                           | (iRing == 3 ? X86_TRAP_PF_US : 0);
+                                    for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+                                    {
+                                        pThis->pszAccessor = g_aAccessors[iAccessor].pszName;
+
+                                        pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, 0);
+                                        ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                        g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF,
+                                                                            fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask));
+                                        CHECK_AD_BITS(0);
+
+                                        pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                                        ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                        g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF,
+                                                                            fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask));
+                                        CHECK_AD_BITS(0);
+                                    }
+                                }
+                                else
+                                {
+                                    uint32_t const fPfBase = X86_TRAP_PF_P | (iRing == 3 ? X86_TRAP_PF_US : 0);
+                                    for (iAccessor = 0; iAccessor < RT_ELEMENTS(g_aAccessors); iAccessor++)
+                                    {
+                                        pThis->pszAccessor = g_aAccessors[iAccessor].pszName;
+                                        if (   (   (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_ID)
+                                                && EffWrk.fNoExecute)
+                                            || (   (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_RW)
+                                                && !EffWrk.fWriteable
+                                                && (fWp || iRing == 3)) )
+                                        {
+                                            uint32_t const fErrCd = fPfBase | (g_aAccessors[iAccessor].fAccess & fPfIdMask);
+
+                                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                                            ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                            CHECK_AD_BITS(0);
+
+                                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, X86_PTE_A | X86_PTE_D);
+                                            ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                            CHECK_AD_BITS(X86_PTE_A | X86_PTE_D);
+
+                                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A, X86_PTE_D);
+                                            ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                            CHECK_AD_BITS(X86_PTE_D);
+
+                                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_D, X86_PTE_A);
+                                            ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_PF, fErrCd);
+                                            CHECK_AD_BITS(X86_PTE_A);
+                                        }
+                                        else
+                                        {
+                                            uint32_t const fExpectedAD = (g_aAccessors[iAccessor].fAccess & X86_TRAP_PF_RW)
+                                                                       ? X86_PTE_A | X86_PTE_D : X86_PTE_A;
+
+                                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A | X86_PTE_D, 0);
+                                            ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                            CHECK_AD_BITS(fExpectedAD);
+
+                                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, 0, X86_PTE_A | X86_PTE_D);
+                                            ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                            CHECK_AD_BITS(X86_PTE_A | X86_PTE_D);
+
+                                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_A, X86_PTE_D);
+                                            ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                            CHECK_AD_BITS(fExpectedAD | X86_PTE_D);
+
+                                            pPteWrk->pfnModify(pThis, iStore, pPteWrk, X86_PTE_D, X86_PTE_A);
+                                            ASMInvalidatePage(pThis->uTestAddr.u + X86_PAGE_SIZE);
+                                            g_aAccessors[iAccessor].pfnAccessor(pThis, pCtx, fAccessor, X86_XCPT_UD, UINT8_MAX);
+                                            CHECK_AD_BITS(fExpectedAD | X86_PTE_A);
+                                        }
+                                    }
+                                }
+                            }
+
+                        }
+                    }
+                }
+            }
+
+        }
+    }
+#endif
+
+    /*
+     * Check reserved bits on each paging level.
+     */
+
+    /* Loop 1: Accessor flags (only direct for now). */
+    for (iOuter = 0; iOuter < 1; iOuter++)
+    {
+        uint32_t const fAccessor = BS3CB2PFACC_F_DIRECT;
+
+        /* Loop 2: Paging store method. */
+        for (iStore = 0; iStore < RT_ELEMENTS(g_aStoreMethods); iStore++)
+        {
+            /* Loop 3: Accessor ring. */
+            for (iRing = 0; iRing < 4; iRing++)
+            {
+                /* Loop 4: Which level we mess up. */
+                for (iLevel = 0; iLevel < pThis->PgInfo.cEntries; iLevel++)
+                {
+#if 0
+                    const BS3CPUBASIC2PFMODPT *pPteWrk = &g_aPteWorkers[iPteWrk];
+                    if (pThis->PgInfo.)
+                    {
+                    }
+#endif
+
+
+                }
+            }
+        }
+    }
+
+
+
+    return 0;
+}
+
+
+BS3_DECL_CALLBACK(uint8_t)  bs3CpuBasic2_RaiseXcpt0e_c32(uint8_t bMode)
+{
+    void               *pvTestUnaligned;
+    uint32_t            cbTestUnaligned = _8M;
+    uint8_t             bRet = 1;
+    int                 rc;
+    BS3CPUBASIC2PFSTATE State;
+
+    /*
+     * Initalize the state data.
+     */
+    Bs3MemZero(&State, sizeof(State));
+    State.bMode = bMode;
+    switch (bMode & BS3_MODE_CODE_MASK)
+    {
+        case BS3_MODE_CODE_16:  State.cbAccess = sizeof(uint16_t); break;
+        case BS3_MODE_CODE_V86: State.cbAccess = sizeof(uint16_t); break;
+        case BS3_MODE_CODE_32:  State.cbAccess = sizeof(uint32_t); break;
+        case BS3_MODE_CODE_64:  State.cbAccess = sizeof(uint64_t); break;
+    }
+    State.pCmnMode = &g_aCmnModes[0];
+    while (State.pCmnMode->bMode != (bMode & BS3_MODE_CODE_MASK))
+        State.pCmnMode++;
+    State.fUseInvlPg = (g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80486;
+
+    /* Figure physical addressing width. */
+    State.cBitsPhysWidth = 32;
+    if (   (g_uBs3CpuDetected & BS3CPU_F_CPUID)
+        && (ASMCpuId_EDX(1) & (X86_CPUID_FEATURE_EDX_PSE36 | X86_CPUID_FEATURE_EDX_PAE)) )
+        State.cBitsPhysWidth = 36;
+
+    if (   (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
+        && ASMCpuId_EAX(0x80000000) >= 0x80000008)
+    {
+        uint8_t cBits = (uint8_t)ASMCpuId_EAX(0x80000008);
+        if (cBits >= 32 && cBits <= 52)
+            State.cBitsPhysWidth = cBits;
+        else
+            Bs3TestPrintf("CPUID 0x80000008: Physical bitcount out of range: %u\n", cBits);
+    }
+    //Bs3TestPrintf("Physical bitcount: %u\n", State.cBitsPhysWidth);
+
+    /*
+     * Allocate a some memory we can play around with, then carve a size aligned
+     * chunk out of it so we might be able to maybe play with 2/4MB pages too.
+     */
+    cbTestUnaligned = _8M * 2;
+    while ((pvTestUnaligned = Bs3MemAlloc(BS3MEMKIND_FLAT32, cbTestUnaligned)) == NULL)
+    {
+        cbTestUnaligned >>= 1;
+        if (cbTestUnaligned <= _16K)
+        {
+            Bs3TestFailed("Failed to allocate memory to play around with\n");
+            return 1;
+        }
+    }
+
+    /* align. */
+    if ((uintptr_t)pvTestUnaligned & (cbTestUnaligned - 1))
+    {
+        State.cbTest    = cbTestUnaligned >> 1;
+        State.pbOrgTest = (uint8_t *)(((uintptr_t)pvTestUnaligned + State.cbTest - 1) & ~(State.cbTest - 1));
+    }
+    else
+    {
+        State.pbOrgTest = pvTestUnaligned;
+        State.cbTest    = cbTestUnaligned;
+    }
+    State.cTestPages = State.cbTest >> X86_PAGE_SHIFT;
+
+    /*
+     * Alias this memory far away from where our code and data lives.
+     */
+    if (bMode & BS3_MODE_CODE_64)
+        State.uTestAddr.u = UINT64_C(0x0000648680000000);
+    else
+        State.uTestAddr.u = UINT32_C(0x80000000);
+    rc = Bs3PagingAlias(State.uTestAddr.u, (uintptr_t)State.pbOrgTest, State.cbTest, X86_PTE_P | X86_PTE_RW | X86_PTE_US);
+    if (RT_SUCCESS(rc))
+    {
+        rc = Bs3PagingQueryAddressInfo(State.uTestAddr.u, &State.PgInfo);
+        if (RT_SUCCESS(rc))
+        {
+if (bMode & BS3_MODE_CODE_64) ASMHalt();
+            /* Set values that derives from the test memory size and paging info. */
+            if (State.PgInfo.cEntries == 2)
+            {
+                State.cTestPdes    = (State.cTestPages + X86_PG_ENTRIES - 1) / X86_PG_ENTRIES;
+                State.cTest1stPtes = RT_MIN(State.cTestPages, X86_PG_ENTRIES);
+                State.cbPdeBackup = State.cTestPdes    * (X86_PAGE_SIZE / X86_PG_ENTRIES);
+                State.cbPteBackup = State.cTest1stPtes * (X86_PAGE_SIZE / X86_PG_ENTRIES);
+            }
+            else
+            {
+                State.cTestPdes    = (State.cTestPages + X86_PG_PAE_ENTRIES - 1) / X86_PG_PAE_ENTRIES;
+                State.cTest1stPtes = RT_MIN(State.cTestPages, X86_PG_PAE_ENTRIES);
+                State.cbPdeBackup  = State.cTestPdes    * (X86_PAGE_SIZE / X86_PG_PAE_ENTRIES);
+                State.cbPteBackup  = State.cTest1stPtes * (X86_PAGE_SIZE / X86_PG_PAE_ENTRIES);
+            }
+#ifdef BS3CPUBASIC2PF_FASTER
+            State.cbPteBackup = State.PgInfo.cbEntry * 4;
+#endif
+            if (State.cTestPdes <= RT_ELEMENTS(State.au64PdeBackup))
+            {
+                uint32_t cr0 = ASMGetCR0();
+
+                /* Back up the structures. */
+                Bs3MemCpy(&State.PteBackup, State.PgInfo.u.Legacy.pPte, State.cbPteBackup);
+                Bs3MemCpy(State.au64PdeBackup, State.PgInfo.u.Legacy.pPde, State.cbPdeBackup);
+                if (State.PgInfo.cEntries > 2)
+                    State.u64PdpteBackup = State.PgInfo.u.Pae.pPdpe->u;
+                if (State.PgInfo.cEntries > 3)
+                    State.u64Pml4eBackup = State.PgInfo.u.Pae.pPml4e->u;
+
+                /*
+                 * Setup a 16-bit selector for accessing the alias.
+                 */
+                Bs3SelSetup16BitData(&Bs3GdteSpare00, State.uTestAddr.u32);
+                State.uSel16TestData = BS3_SEL_SPARE_00 | 3;
+
+                /*
+                 * Do the testing.
+                 */
+                ASMSetCR0(ASMGetCR0() & ~X86_CR0_WP);
+                bRet = bs3CpuBasic2_RaiseXcpt0eWorker(&State, false /*fWp*/, false /*fNxe*/);
+                if (bRet == 0 && (g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80486)
+                {
+                    ASMSetCR0(ASMGetCR0() | X86_CR0_WP);
+                    bRet = bs3CpuBasic2_RaiseXcpt0eWorker(&State, true /*fWp*/, false /*fNxe*/);
+                }
+
+                /* Do again with NX enabled. */
+                if (bRet == 0 && (g_uBs3CpuDetected & BS3CPU_F_NX))
+                {
+                    ASMWrMsr(MSR_K6_EFER, ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_NXE);
+                    ASMSetCR0(ASMGetCR0() & ~X86_CR0_WP);
+                    bRet = bs3CpuBasic2_RaiseXcpt0eWorker(&State, false /*fWp*/, State.PgInfo.cbEntry == 8 /*fNxe*/);
+                    ASMSetCR0(ASMGetCR0() | X86_CR0_WP);
+                    bRet = bs3CpuBasic2_RaiseXcpt0eWorker(&State, true /*fWp*/, State.PgInfo.cbEntry == 8 /*fNxe*/);
+                    ASMWrMsr(MSR_K6_EFER, ASMRdMsr(MSR_K6_EFER) & ~MSR_K6_EFER_NXE);
+                }
+                bs3CpuBasic2Pf_RestoreFromBackups(&State);
+                ASMSetCR0((ASMGetCR0() & ~X86_CR0_WP) | (cr0 & X86_CR0_WP));
+            }
+            else
+                Bs3TestFailedF("cTestPdes=%u!\n", State.cTestPdes);
+        }
+        else
+            Bs3TestFailedF("Bs3PagingQueryAddressInfo failed: %d\n", rc);
+        Bs3PagingUnalias(State.uTestAddr.u, State.cbTest);
+    }
+    else
+        Bs3TestFailedF("Bs3PagingAlias failed! rc=%d\n", rc);
+    Bs3MemFree(pvTestUnaligned, cbTestUnaligned);
+    return bRet;
+}
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.mac b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.mac
index a859bdb..3be5fa5 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.mac
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.mac
@@ -316,6 +316,88 @@ AssertCompile(.again - BS3_CMN_NM(bs3CpuBasic2_lgdt_opsize_ss_bx__sgdt_es_di__lg
 BS3_PROC_END_CMN   bs3CpuBasic2_lgdt_opsize_ss_bx__sgdt_es_di__lgdt_es_si__ud2
  %endif
 
+;
+; #PF
+;
+
+; For testing read access.
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_mov_ax_ds_bx__ud2, BS3_PBC_NEAR
+        mov     xAX, [xBX]
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 2 + (TMPL_BITS == 64))
+BS3_PROC_END_CMN   bs3CpuBasic2_mov_ax_ds_bx__ud2
+
+
+; For testing write access.
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_mov_ds_bx_ax__ud2, BS3_PBC_NEAR
+        mov     [xBX], xAX
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 2 + (TMPL_BITS == 64))
+BS3_PROC_END_CMN   bs3CpuBasic2_mov_ds_bx_ax__ud2
+
+
+; For testing read+write access.
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_xchg_ds_bx_ax__ud2, BS3_PBC_NEAR
+        xchg    [xBX], xAX
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 2 + (TMPL_BITS == 64))
+BS3_PROC_END_CMN   bs3CpuBasic2_xchg_ds_bx_ax__ud2
+
+
+; Another read+write access test.
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2, BS3_PBC_NEAR
+        cmpxchg  [xBX], xCX
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 3 + (TMPL_BITS == 64))
+BS3_PROC_END_CMN   bs3CpuBasic2_cmpxchg_ds_bx_cx__ud2
+
+
+; For testing read access from an aborted instruction: DIV by zero
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_div_ds_bx__ud2, BS3_PBC_NEAR
+        div     xPRE [xBX]
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 2 + (TMPL_BITS == 64))
+BS3_PROC_END_CMN   bs3CpuBasic2_div_ds_bx__ud2
+
+
+; Two memory operands: push [mem]
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_push_ds_bx__ud2, BS3_PBC_NEAR
+        push    xPRE [xBX]
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 2)
+BS3_PROC_END_CMN   bs3CpuBasic2_push_ds_bx__ud2
+
+; Two memory operands: pop  [mem]
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_push_ax__pop_ds_bx__ud2, BS3_PBC_NEAR
+        push    xAX
+        pop     xPRE [xBX]
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 3)
+BS3_PROC_END_CMN   bs3CpuBasic2_push_ax__pop_ds_bx__ud2
+
+; Two memory operands: call [mem]
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_call_ds_bx__ud2, BS3_PBC_NEAR
+        call    xPRE [xBX]
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 2)
+BS3_PROC_END_CMN   bs3CpuBasic2_call_ds_bx__ud2
+
+; For testing #GP vs #PF write
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_insb__ud2, BS3_PBC_NEAR
+        insb
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 1)
+BS3_PROC_END_CMN   bs3CpuBasic2_insb__ud2
+
 
 %endif ; BS3_INSTANTIATING_CMN
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2.c b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2.c
index 2733772..0540ae8 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2.c
@@ -44,6 +44,8 @@ FNBS3TESTDOMODE             bs3CpuBasic2_lidt_f16;
 FNBS3TESTDOMODE             bs3CpuBasic2_lgdt_f16;
 FNBS3TESTDOMODE             bs3CpuBasic2_iret_f16;
 
+BS3_DECL_CALLBACK(void)     bs3CpuBasic2_Do32BitTests_pe32();
+
 
 /*********************************************************************************************************************************
 *   Global Variables                                                                                                             *
@@ -51,18 +53,16 @@ FNBS3TESTDOMODE             bs3CpuBasic2_iret_f16;
 static const BS3TESTMODEENTRY g_aModeTest[] =
 {
     BS3TESTMODEENTRY_MODE("tss / gate / esp", bs3CpuBasic2_TssGateEsp),
-    BS3TESTMODEENTRY_MODE("raise xcpt #1", bs3CpuBasic2_RaiseXcpt1),
+    //BS3TESTMODEENTRY_MODE("raise xcpt #1", bs3CpuBasic2_RaiseXcpt1), // !long mode is broken!
 };
 
 static const BS3TESTMODEBYONEENTRY g_aModeByOneTests[] =
 {
     { "iret", bs3CpuBasic2_iret_f16, 0 },
-#if 0
     { "sidt", bs3CpuBasic2_sidt_f16, 0 },
     { "sgdt", bs3CpuBasic2_sgdt_f16, 0 },
     { "lidt", bs3CpuBasic2_lidt_f16, 0 },
     { "lgdt", bs3CpuBasic2_lgdt_f16, 0 },
-#endif
 };
 
 
@@ -72,11 +72,21 @@ BS3_DECL(void) Main_rm()
     Bs3TestInit("bs3-cpu-basic-2");
     Bs3TestPrintf("g_uBs3CpuDetected=%#x\n", g_uBs3CpuDetected);
 
+    /*
+     * Do tests driven from 16-bit code.
+     */
     NOREF(g_aModeTest); NOREF(g_aModeByOneTests); /* for when commenting out bits */
-    //Bs3TestDoModes_rm(g_aModeTest, RT_ELEMENTS(g_aModeTest));
+#if 0
+    Bs3TestDoModes_rm(g_aModeTest, RT_ELEMENTS(g_aModeTest));
     Bs3TestDoModesByOne_rm(g_aModeByOneTests, RT_ELEMENTS(g_aModeByOneTests), 0);
+#endif
+
+    /*
+     * Do tests driven from 32-bit code (bs3-cpu-basic-2-32.c32 via assembly).
+     */
+    Bs3SwitchTo32BitAndCallC_rm(bs3CpuBasic2_Do32BitTests_pe32, 0);
 
     Bs3TestTerm();
-for (;;) { ASMHalt(); }
+//for (;;) { ASMHalt(); }
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm b/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
index d9ea987..a641808 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
@@ -35,4 +35,5 @@
 ; Instantiate code templates.
 ;
 BS3_INSTANTIATE_TEMPLATE_ESSENTIALS      "bs3-cpu-decoding-1-template.mac"
+BS3_INSTANTIATE_COMMON_TEMPLATE          "bs3-cpu-decoding-1-template.mac"
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.mac b/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.mac
index f29a5e4..1173e8f 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.mac
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.mac
@@ -4,7 +4,7 @@
 ;
 
 ;
-; Copyright (C) 2007-2016 Oracle Corporation
+; Copyright (C) 2007-2017 Oracle Corporation
 ;
 ; This file is part of VirtualBox Open Source Edition (OSE), as
 ; available from http://www.virtualbox.org. This file is free software;
@@ -37,5 +37,78 @@
 TMPL_BEGIN_TEXT
 
 
+%ifdef BS3_INSTANTIATING_CMN
+
+BS3_PROC_BEGIN_CMN bs3CpuDecoding1_LoadXmm0, BS3_PBC_NEAR
+        BS3_CALL_CONV_PROLOG 1
+        push    xBP
+        mov     xBP, xSP
+
+%if TMPL_BITS == 16
+        push    es
+        push    bx
+        les     bx, [xBP + xCB + cbCurRetAddr]
+        movupd  xmm0, [es:bx]
+        pop     bx
+        pop     es
+%else
+        mov     xAX, [xBP + xCB + cbCurRetAddr]
+        movupd  xmm0, [xAX]
+%endif
+
+        leave
+        BS3_CALL_CONV_EPILOG 1
+        BS3_HYBRID_RET
+BS3_PROC_END_CMN   bs3CpuDecoding1_LoadXmm0
+
+
+BS3_PROC_BEGIN_CMN bs3CpuDecoding1_LoadXmm1, BS3_PBC_NEAR
+        BS3_CALL_CONV_PROLOG 1
+        push    xBP
+        mov     xBP, xSP
+
+%if TMPL_BITS == 16
+        push    es
+        push    bx
+        les     bx, [xBP + xCB + cbCurRetAddr]
+        movupd  xmm1, [es:bx]
+        pop     bx
+        pop     es
+%else
+        mov     xAX, [xBP + xCB + cbCurRetAddr]
+        movupd  xmm1, [xAX]
+%endif
+
+        leave
+        BS3_CALL_CONV_EPILOG 1
+        BS3_HYBRID_RET
+BS3_PROC_END_CMN   bs3CpuDecoding1_LoadXmm1
+
+
+BS3_PROC_BEGIN_CMN bs3CpuDecoding1_SaveXmm0, BS3_PBC_NEAR
+        BS3_CALL_CONV_PROLOG 1
+        push    xBP
+        mov     xBP, xSP
+
+%if TMPL_BITS == 16
+        push    es
+        push    bx
+        les     bx, [xBP + xCB + cbCurRetAddr]
+        movupd  [es:bx], xmm0
+        pop     bx
+        pop     es
+%else
+        mov     xAX, [xBP + xCB + cbCurRetAddr]
+        movupd  [xAX], xmm0
+%endif
+
+        leave
+        BS3_CALL_CONV_EPILOG 1
+        BS3_HYBRID_RET
+BS3_PROC_END_CMN   bs3CpuDecoding1_SaveXmm0
+
+
+%endif
+
 %include "bs3kit-template-footer.mac"   ; reset environment
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1.c32 b/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1.c32
index 10e029a..8a794ef 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1.c32
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1.c32
@@ -31,15 +31,25 @@
 #include <bs3kit.h>
 #include <iprt/asm-amd64-x86.h>
 
+
+/* bs3-cpu-decoding-1-template.mac: */
+BS3_DECL_NEAR(void) BS3_CMN_NM(bs3CpuDecoding1_LoadXmm0)(PCRTUINT128U);
+BS3_DECL_NEAR(void) BS3_CMN_NM(bs3CpuDecoding1_LoadXmm1)(PCRTUINT128U);
+BS3_DECL_NEAR(void) BS3_CMN_NM(bs3CpuDecoding1_SaveXmm0)(PRTUINT128U);
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
 /**
  * Simple test.
  */
 typedef struct CPUDECODE1TST
 {
-    uint8_t fFlags;
-    uint8_t cbUd;
-    uint8_t cbOpcodes;
-    uint8_t abOpcodes[21];
+    uint16_t fFlags;
+    uint8_t  cbOpcodes;
+    uint8_t  abOpcodes[20];
+    uint8_t  cbUd;
 } CPUDECODE1TST;
 typedef CPUDECODE1TST BS3_FAR *PCPUDECODE1TST;
 
@@ -52,79 +62,183 @@ typedef CPUDECODE1TST BS3_FAR *PCPUDECODE1TST;
 #define P_OZ  X86_OP_PRF_SIZE_OP
 #define P_AZ  X86_OP_PRF_SIZE_ADDR
 #define P_LK  X86_OP_PRF_LOCK
-#define P_RZ  X86_OP_PRF_REPZ
 #define P_RN  X86_OP_PRF_REPNZ
+#define P_RZ  X86_OP_PRF_REPZ
+
+#define RM_EAX_EAX              ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
+#define RM_ECX_EAX              ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xCX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
+#define RM_EDX_EAX              ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
+#define RM_EBX_EAX              ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
+#define RM_ESP_EAX              ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSP <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
+#define RM_EBP_EAX              ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBP <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
+#define RM_ESI_EAX              ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSI <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
+#define RM_EDI_EAX              ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDI <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
+
+#define RM_EAX_DEREF_EBX        ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ECX_DEREF_EBX        ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xCX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EDX_DEREF_EBX        ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EBX_DEREF_EBX        ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ESP_DEREF_EBX        ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSP <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EBP_DEREF_EBX        ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBP <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ESI_DEREF_EBX        ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSI <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EDI_DEREF_EBX        ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDI <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
 
-#define RM_EAX_EAX          ((3 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xAX))
-#define RM_EAX_DEREF_EBX    ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EAX_DEREF_EBX_DISP8  ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ECX_DEREF_EBX_DISP8  ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xCX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EDX_DEREF_EBX_DISP8  ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EBX_DEREF_EBX_DISP8  ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ESP_DEREF_EBX_DISP8  ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSP <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EBP_DEREF_EBX_DISP8  ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBP <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ESI_DEREF_EBX_DISP8  ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSI <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EDI_DEREF_EBX_DISP8  ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDI <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
 
-#define F_486   0
-#define F_SSE2  1
-#define F_SSE3  2
-#define F_SSE42 4
-#define F_MOVBE 80
+#define RM_EAX_DEREF_EBX_DISP32 ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ECX_DEREF_EBX_DISP32 ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xCX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EDX_DEREF_EBX_DISP32 ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EBX_DEREF_EBX_DISP32 ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBX <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ESP_DEREF_EBX_DISP32 ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSP <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EBP_DEREF_EBX_DISP32 ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBP <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_ESI_DEREF_EBX_DISP32 ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSI <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
+#define RM_EDI_DEREF_EBX_DISP32 ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDI <<  X86_MODRM_REG_SHIFT) | (X86_GREG_xBX))
 
+#define RM_EAX_SIB              ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ECX_SIB              ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xCX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EDX_SIB              ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EBX_SIB              ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ESP_SIB              ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSP <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EBP_SIB              ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBP <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ESI_SIB              ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSI <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EDI_SIB              ((0 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDI <<  X86_MODRM_REG_SHIFT) | 4)
+
+#define RM_EAX_SIB_DISP8        ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ECX_SIB_DISP8        ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xCX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EDX_SIB_DISP8        ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EBX_SIB_DISP8        ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ESP_SIB_DISP8        ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSP <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EBP_SIB_DISP8        ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBP <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ESI_SIB_DISP8        ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSI <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EDI_SIB_DISP8        ((1 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDI <<  X86_MODRM_REG_SHIFT) | 4)
+
+#define RM_EAX_SIB_DISP32       ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xAX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ECX_SIB_DISP32       ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xCX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EDX_SIB_DISP32       ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EBX_SIB_DISP32       ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBX <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ESP_SIB_DISP32       ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSP <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EBP_SIB_DISP32       ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xBP <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_ESI_SIB_DISP32       ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xSI <<  X86_MODRM_REG_SHIFT) | 4)
+#define RM_EDI_SIB_DISP32       ((2 << X86_MODRM_MOD_SHIFT) | (X86_GREG_xDI <<  X86_MODRM_REG_SHIFT) | 4)
+
+#define RM_XMM0_XMM1            ((3 << X86_MODRM_MOD_SHIFT) | (0            <<  X86_MODRM_REG_SHIFT) | 1)
+
+#define SIB_EBX_X1_NONE         ((0 << X86_SIB_SCALE_SHIFT) | (4            <<  X86_SIB_INDEX_SHIFT) | (X86_GREG_xBX))
+#define SIB_EBX_X2_NONE         ((1 << X86_SIB_SCALE_SHIFT) | (4            <<  X86_SIB_INDEX_SHIFT) | (X86_GREG_xBX))
+#define SIB_EBX_X4_NONE         ((2 << X86_SIB_SCALE_SHIFT) | (4            <<  X86_SIB_INDEX_SHIFT) | (X86_GREG_xBX))
+#define SIB_EBX_X8_NONE         ((3 << X86_SIB_SCALE_SHIFT) | (4            <<  X86_SIB_INDEX_SHIFT) | (X86_GREG_xBX))
+
+#define F_486   UINT16_C(0x0000)
+#define F_SSE2  UINT16_C(0x0001)
+#define F_SSE3  UINT16_C(0x0002)
+#define F_SSE42 UINT16_C(0x0004)
+#define F_MOVBE UINT16_C(0x0080)
+#define F_CBUD  UINT16_C(0x4000)
+#define F_UD    UINT16_C(0x8000)
+#define F_OK    UINT16_C(0x0000)
+
+
+/**
+ * This is an exploratory testcase.  It tries to figure out how exactly the
+ * different Intel and AMD CPUs implements SSE and similar instructions that
+ * uses the size, repz, repnz and lock prefixes in the encoding.
+ */
 CPUDECODE1TST const g_aSimpleTests[] =
 {
     /*
      *  fFlags, cbUd, cbOpcodes, abOpcodes
      */
-#if 1
+#if 0
     /* Using currently undefined 0x0f 0x7a sequences. */
-    {        0, 3,   3,               { 0x0f, 0x7a, RM_EAX_EAX, } },
-    {        0, 3+1, 3+1,       { P_LK, 0x0f, 0x7a, RM_EAX_EAX, } },
-    {        0, 3+1, 3+1,       { P_RN, 0x0f, 0x7a, RM_EAX_EAX, } },
-    {        0, 3+1, 3+1,       { P_RZ, 0x0f, 0x7a, RM_EAX_EAX, } },
-    {        0, 3+2, 3+2, { P_LK, P_LK, 0x0f, 0x7a, RM_EAX_EAX, } },
+    {           F_UD, 3,               { 0x0f, 0x7a, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_LK, 0x0f, 0x7a, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_RZ, 0x0f, 0x7a, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_RN, 0x0f, 0x7a, RM_EAX_EAX, } },
+    {           F_UD, 3+2, { P_LK, P_LK, 0x0f, 0x7a, RM_EAX_EAX, } },
+    {           F_UD, 4,               { 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP8, 0 } },
+    {           F_UD, 4+1,       { P_LK, 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP8, 0 } },
+    {           F_UD, 4+1,       { P_RZ, 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP8, 0 } },
+    {           F_UD, 4+1,       { P_RN, 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP8, 0 } },
+    {           F_UD, 4+2, { P_LK, P_LK, 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP8, 0 } },
+    {           F_UD, 7,               { 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0 } },
+    {           F_UD, 7+1,       { P_LK, 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0 } },
+    {           F_UD, 7+1,       { P_RZ, 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0 } },
+    {           F_UD, 7+1,       { P_RN, 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0 } },
+    {           F_UD, 7+2, { P_LK, P_LK, 0x0f, 0x7a, RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0 } },
+#endif
+#if 0
+    /* Ditto for currently undefined sequence: 0x0f 0x7b */
+    {           F_UD, 3,               { 0x0f, 0x7b, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_LK, 0x0f, 0x7b, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_RZ, 0x0f, 0x7b, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_RN, 0x0f, 0x7b, RM_EAX_EAX, } },
+    {           F_UD, 3+2, { P_LK, P_LK, 0x0f, 0x7b, RM_EAX_EAX, } },
+#endif
+#if 1
+    /* Ditto for currently undefined sequence: 0x0f 0x24 */
+    {           F_UD, 3,               { 0x0f, 0x24, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_LK, 0x0f, 0x24, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_RZ, 0x0f, 0x24, RM_EAX_EAX, } },
+    {           F_UD, 3+1,       { P_RN, 0x0f, 0x24, RM_EAX_EAX, } },
+    {           F_UD, 3+2, { P_LK, P_LK, 0x0f, 0x24, RM_EAX_EAX, } },
 #endif
 #if 0
     /* The XADD instruction has empty lines for 66, f3 and f2 prefixes.
        AMD doesn't do anything special for XADD Ev,Gv as the intel table would indicate. */
-    {    F_486,   99,  3,             { 0x0f, 0xc1, RM_EAX_EAX, } },
-    {    F_486,   99,  4,       { P_OZ, 0x0f, 0xc1, RM_EAX_EAX, } },
-    {    F_486,   99,  4,       { P_RN, 0x0f, 0xc1, RM_EAX_EAX, } },
-    {    F_486,   99,  5, { P_OZ, P_RN, 0x0f, 0xc1, RM_EAX_EAX, } },
-    {    F_486,   99,  5, { P_RN, P_OZ, 0x0f, 0xc1, RM_EAX_EAX, } },
-    {    F_486,   99,  4,       { P_RZ, 0x0f, 0xc1, RM_EAX_EAX, } },
-    {    F_486,   99,  5, { P_OZ, P_RZ, 0x0f, 0xc1, RM_EAX_EAX, } },
-    {    F_486,   99,  5, { P_RZ, P_OZ, 0x0f, 0xc1, RM_EAX_EAX, } },
+    {    F_486 | F_OK,  3,             { 0x0f, 0xc1, RM_EAX_EAX, } },
+    {    F_486 | F_OK,  4,       { P_OZ, 0x0f, 0xc1, RM_EAX_EAX, } },
+    {    F_486 | F_OK,  4,       { P_RZ, 0x0f, 0xc1, RM_EAX_EAX, } },
+    {    F_486 | F_OK,  5, { P_OZ, P_RZ, 0x0f, 0xc1, RM_EAX_EAX, } },
+    {    F_486 | F_OK,  5, { P_RZ, P_OZ, 0x0f, 0xc1, RM_EAX_EAX, } },
+    {    F_486 | F_OK,  4,       { P_RN, 0x0f, 0xc1, RM_EAX_EAX, } },
+    {    F_486 | F_OK,  5, { P_OZ, P_RN, 0x0f, 0xc1, RM_EAX_EAX, } },
+    {    F_486 | F_OK,  5, { P_RN, P_OZ, 0x0f, 0xc1, RM_EAX_EAX, } },
 #endif
 #if 0
     /* The movnti instruction is confined to the unprefixed lined in the intel manuals. Check how the other lines work. */
-    {   F_SSE2,    3,  3,             { 0x0f, 0xc3, RM_EAX_EAX, } },        /* invalid - reg,reg */
-    {   F_SSE2,   99,  3,             { 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },
-    {   F_SSE2,    4,  4,       { P_OZ, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
-    {   F_SSE2,    4,  4,       { P_RN, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
-    {   F_SSE2,    4,  4,       { P_RZ, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
-    {   F_SSE2,    4,  4,       { P_LK, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
-    {   F_SSE2,    5,  5, { P_RZ, P_LK, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
+    {   F_SSE2 | F_UD,  3,             { 0x0f, 0xc3, RM_EAX_EAX, } },        /* invalid - reg,reg */
+    {   F_SSE2 | F_OK,  3,             { 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },
+    {   F_SSE2 | F_UD,  4,       { P_OZ, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
+    {   F_SSE2 | F_UD,  4,       { P_RZ, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
+    {   F_SSE2 | F_UD,  4,       { P_RN, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
+    {   F_SSE2 | F_UD,  4,       { P_LK, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
+    {   F_SSE2 | F_UD,  5, { P_RN, P_LK, 0x0f, 0xc3, RM_EAX_DEREF_EBX, } },  /* invalid */
 #endif
-#if 1
+#if 0
     /* The lddqu instruction requires a 0xf2 prefix, intel only lists 0x66 and empty
        prefix for it.  Check what they really mean by that*/
-    {   F_SSE3,    4,  4,            { P_RZ, 0x0f, 0xf0, RM_EAX_EAX, } },          /* invalid - reg, reg  */
-    {   F_SSE3,   99,  4,            { P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,   99,  5,      { P_RZ, P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,    3,  3,      {             0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,    4,  4,      {       P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,    4,  4,      {       P_OZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,    4,  4,      {       P_LK, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,    5,  5,      { P_RZ, P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,   99,  5,      { P_RZ, P_OZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } }, // AMD,why?
-    {   F_SSE3,    5,  5,      { P_RZ, P_LK, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,   99,  5,      { P_RN, P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,   99,  5,      { P_OZ, P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,    5,  5,      { P_LK, P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,   99,  5,      { P_OZ, P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-    {   F_SSE3,   99,  6,{ P_OZ, P_RN, P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
-#endif
-    {   F_SSE2,   99,  3,            { 0x0f, 0x7e, RM_EAX_EAX, } },
-    {   F_SSE2,   99,  4,      { P_OZ, 0x0f, 0x7e, RM_EAX_EAX, } },
-    {   F_SSE2,    5,  5,{ P_RZ, P_OZ, 0x0f, 0x7e, RM_EAX_EAX, } }, // WTF?
-    {   F_SSE2,    5,  5,{ P_OZ, P_RZ, 0x0f, 0x7e, RM_EAX_EAX, } },
-    {   F_SSE2,   99,  5,{ P_RN, P_OZ, 0x0f, 0x7e, RM_EAX_EAX, } },
-    {   F_SSE2,   99,  4,      { P_RN, 0x0f, 0x7e, RM_EAX_EAX, } },
-    {   F_SSE2,    4,  4,      { P_RZ, 0x0f, 0x7e, RM_EAX_EAX, } },
+    {   F_SSE3 | F_UD,  4,            { P_RN, 0x0f, 0xf0, RM_EAX_EAX, } },          /* invalid - reg, reg  */
+    {   F_SSE3 | F_OK,  4,            { P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_OK,  5,      { P_RN, P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_UD,  3,      {             0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_UD,  4,      {       P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_UD,  4,      {       P_OZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_UD,  4,      {       P_LK, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_UD,  5,      { P_RN, P_RZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_OK,  5,      { P_RN, P_OZ, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } }, // AMD,why?
+    {   F_SSE3 | F_UD,  5,      { P_RN, P_LK, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_OK,  5,      { P_RZ, P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_OK,  5,      { P_OZ, P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_UD,  5,      { P_LK, P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_OK,  5,      { P_OZ, P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+    {   F_SSE3 | F_OK,  6,{ P_OZ, P_RZ, P_RN, 0x0f, 0xf0, RM_EAX_DEREF_EBX, } },
+#endif
+#if 0
+    {   F_SSE2 | F_OK,  3,            { 0x0f, 0x7e, RM_EAX_EAX, } },
+    {   F_SSE2 | F_OK,  4,      { P_OZ, 0x0f, 0x7e, RM_EAX_EAX, } },
+    {   F_SSE2 | F_UD,  5,{ P_RN, P_OZ, 0x0f, 0x7e, RM_EAX_EAX, } }, // WTF?
+    {   F_SSE2 | F_UD,  5,{ P_OZ, P_RN, 0x0f, 0x7e, RM_EAX_EAX, } },
+    {   F_SSE2 | F_OK,  5,{ P_RZ, P_OZ, 0x0f, 0x7e, RM_EAX_EAX, } },
+    {   F_SSE2 | F_OK,  4,      { P_RZ, 0x0f, 0x7e, RM_EAX_EAX, } },
+    {   F_SSE2 | F_UD,  4,      { P_RN, 0x0f, 0x7e, RM_EAX_EAX, } },
+#endif
 /** @todo crc32 / movbe  */
 };
 
@@ -152,7 +266,16 @@ void DecodeEdgeTest(void)
 
         for (i = 0; i < RT_ELEMENTS(g_aSimpleTests); i++)
         {
-            unsigned cb = g_aSimpleTests[i].cbOpcodes;
+            unsigned const cbOpcodes = g_aSimpleTests[i].cbOpcodes;
+            uint16_t const fFlags    = g_aSimpleTests[i].fFlags;
+            unsigned cb;
+            /** @todo check if supported. */
+
+            /*
+             * Place the instruction exactly at the page boundrary and proceed to
+             * move it across it and check that we get #PFs then.
+             */
+            cb = cbOpcodes;
             while (cb >= 1)
             {
                 unsigned const   cErrorsBefore = Bs3TestSubErrorCount();
@@ -160,44 +283,39 @@ void DecodeEdgeTest(void)
                 Bs3MemCpy(pbRip, &g_aSimpleTests[i].abOpcodes[0], cb);
                 Bs3RegCtxSetRipCsFromFlat(&Ctx, (uintptr_t)pbRip);
                 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame);
-#if 0
-                Bs3TestPrintf("\ni=%d cb=%#x (cbUd=%#x cbOpcodes=%#x)\n", i, cb, g_aSimpleTests[i].cbUd, g_aSimpleTests[i].cbOpcodes);
-                Bs3TrapPrintFrame(&TrapFrame);
+#if 1
+                Bs3TestPrintf("\ni=%d cb=%#x (cbOpcodes=%#x fFlags=%#x)\n", i, cb, cbOpcodes, fFlags);
+//                Bs3TrapPrintFrame(&TrapFrame);
 #endif
-                if (cb >= g_aSimpleTests[i].cbUd)
+                if (cb >= cbOpcodes && (g_aSimpleTests[i].fFlags & F_UD))
                 {
                     if (TrapFrame.bXcpt != X86_XCPT_UD)
-                        Bs3TestFailedF("i=%d cb=%d cbUd=%d cbOp=%d: expected #UD got %#x at %RX32\n",
-                                       i, cb, g_aSimpleTests[i].cbUd, g_aSimpleTests[i].cbOpcodes,
-                                       TrapFrame.bXcpt, TrapFrame.Ctx.rip.u32);
+                        Bs3TestFailedF("i=%d cb=%d cbOp=%d fFlags=%#x: expected #UD got %#x at %RX32\n",
+                                       i, cb, cbOpcodes, fFlags, TrapFrame.bXcpt, TrapFrame.Ctx.rip.u32);
                 }
-                else if (cb < g_aSimpleTests[i].cbOpcodes)
+                else if (cb < cbOpcodes)
                 {
                     if (TrapFrame.bXcpt != X86_XCPT_PF)
-                        Bs3TestFailedF("i=%d cb=%d cbUd=%d cbOp=%d: expected #PF (on) got %#x at %RX32\n",
-                                       i, cb, g_aSimpleTests[i].cbUd, g_aSimpleTests[i].cbOpcodes,
-                                       TrapFrame.bXcpt, TrapFrame.Ctx.rip.u32);
+                        Bs3TestFailedF("i=%d cb=%d cbOp=%d fFlags=%#x: expected #PF (on) got %#x at %RX32\n",
+                                       i, cb, cbOpcodes, fFlags, TrapFrame.bXcpt, TrapFrame.Ctx.rip.u32);
                     else if (TrapFrame.Ctx.rip.u32 != (uintptr_t)pbRip)
-                        Bs3TestFailedF("i=%d cb=%d cbUd=%d cbOp=%d: expected #PF rip of %p (on) got %#RX32\n",
-                                       i, cb, g_aSimpleTests[i].cbUd, g_aSimpleTests[i].cbOpcodes,
-                                       pbRip, TrapFrame.Ctx.rip.u32);
+                        Bs3TestFailedF("i=%d cb=%d cbOp=%d fFlags=%#x: expected #PF rip of %p (on) got %#RX32\n",
+                                       i, cb, cbOpcodes, fFlags, pbRip, TrapFrame.Ctx.rip.u32);
                 }
                 else
                 {
                     if (TrapFrame.bXcpt != X86_XCPT_PF)
-                        Bs3TestFailedF("i=%d cb=%d cbUd=%d cbOp=%d: expected #PF (after) got %#x at %RX32\n",
-                                       i, cb, g_aSimpleTests[i].cbUd, g_aSimpleTests[i].cbOpcodes,
-                                       TrapFrame.bXcpt, TrapFrame.Ctx.rip.u32);
+                        Bs3TestFailedF("i=%d cb=%d cbOp=%d fFlags=%#x: expected #PF (after) got %#x at %RX32\n",
+                                       i, cb, cbOpcodes, fFlags, TrapFrame.bXcpt, TrapFrame.Ctx.rip.u32);
                     else if (TrapFrame.Ctx.rip.u32 != (uintptr_t)&pbPages[X86_PAGE_SIZE])
-                        Bs3TestFailedF("i=%d cb=%d cbUd=%d cbOp=%d: expected #PF rip of %p (after) got %#RX32\n",
-                                       i, cb, g_aSimpleTests[i].cbUd, g_aSimpleTests[i].cbOpcodes,
-                                       &pbPages[X86_PAGE_SIZE], TrapFrame.Ctx.rip.u32);
+                        Bs3TestFailedF("i=%d cb=%d cbOp=%d fFlags=%#x: expected #PF rip of %p (after) got %#RX32\n",
+                                       i, cb, cbOpcodes, fFlags, &pbPages[X86_PAGE_SIZE], TrapFrame.Ctx.rip.u32);
                 }
                 if (Bs3TestSubErrorCount() != cErrorsBefore)
                 {
                     Bs3TestPrintf("  %.*Rhxs", cb, &g_aSimpleTests[i].abOpcodes[0]);
-                    if (cb < g_aSimpleTests[i].cbOpcodes)
-                        Bs3TestPrintf("[%.*Rhxs]", g_aSimpleTests[i].cbOpcodes - cb, &g_aSimpleTests[i].abOpcodes[cb]);
+                    if (cb < cbOpcodes)
+                        Bs3TestPrintf("[%.*Rhxs]", cbOpcodes - cb, &g_aSimpleTests[i].abOpcodes[cb]);
                     Bs3TestPrintf("\n");
                 }
 
@@ -219,15 +337,1390 @@ void DecodeEdgeTest(void)
 }
 
 
+/**
+ * Undefined opcode test.
+ */
+typedef struct CPUDECODE1UDTST
+{
+    /** Type of undefined opcode decoding logic - UD_T_XXX. */
+    uint8_t     enmType;
+    /** Core opcodes length. */
+    uint8_t     cbOpcodes;
+    /** Core opcodes. */
+    uint8_t     abOpcodes[5];
+    /** UD_F_XXX. */
+    uint8_t     fFlags;
+} CPUDECODE1UDTST;
+typedef CPUDECODE1UDTST const BS3_FAR *PCCPUDECODE1UDTST;
+
+#define UD_T_EXACT          0
+#define UD_T_NOAMD          0x80        /**< AMD does not decode unnecessary bytes, Intel does. */
+#define UD_T_MODRM          1
+#define UD_T_MODRM_I8       2
+#define UD_T_MODRM_M        3
+#define UD_T_MODRM_M_I8     4
+#define UD_T_MODRM_RR0      0x10
+#define UD_T_MODRM_RR1      0x11
+#define UD_T_MODRM_RR2      0x12
+#define UD_T_MODRM_RR3      0x13
+#define UD_T_MODRM_RR4      0x14
+#define UD_T_MODRM_RR5      0x15
+#define UD_T_MODRM_RR6      0x16
+#define UD_T_MODRM_RR7      0x17
+#define UD_T_MODRM_RR0_I8   0x18
+#define UD_T_MODRM_RR1_I8   0x19
+#define UD_T_MODRM_RR2_I8   0x1a
+#define UD_T_MODRM_RR3_I8   0x1b
+#define UD_T_MODRM_RR4_I8   0x1c
+#define UD_T_MODRM_RR5_I8   0x1d
+#define UD_T_MODRM_RR6_I8   0x1e
+#define UD_T_MODRM_RR7_I8   0x1f
+#define UD_T_MODRM_MR0      0x20
+#define UD_T_MODRM_MR1      0x21
+#define UD_T_MODRM_MR2      0x22
+#define UD_T_MODRM_MR3      0x23
+#define UD_T_MODRM_MR4      0x24
+#define UD_T_MODRM_MR5      0x25
+#define UD_T_MODRM_MR6      0x26
+#define UD_T_MODRM_MR7      0x27
+#define UD_T_MODRM_MR0_I8   0x28
+#define UD_T_MODRM_MR1_I8   0x29
+#define UD_T_MODRM_MR2_I8   0x2a
+#define UD_T_MODRM_MR3_I8   0x2b
+#define UD_T_MODRM_MR4_I8   0x2c
+#define UD_T_MODRM_MR5_I8   0x2d
+#define UD_T_MODRM_MR6_I8   0x2e
+#define UD_T_MODRM_MR7_I8   0x2f
+
+#define UD_F_ANY_PFX 0
+#define UD_F_NOT_NO_PFX     UINT8_C(0x01)  /**< Must have some kind of prefix to be \#UD. */
+#define UD_F_NOT_OZ_PFX     UINT8_C(0x02)  /**< Skip the size prefix. */
+#define UD_F_NOT_RZ_PFX     UINT8_C(0x04)  /**< Skip the REPZ prefix. */
+#define UD_F_NOT_RN_PFX     UINT8_C(0x08)  /**< Skip the REPNZ prefix. */
+#define UD_F_NOT_LK_PFX     UINT8_C(0x10)  /**< Skip the LOCK prefix. */
+#define UD_F_3BYTE_ESC      UINT8_C(0x20)  /**< Unused 3 byte escape table. Test all 256 entries */
+
+/**
+ * Two byte opcodes.
+ */
+CPUDECODE1UDTST const g_aUdTest2Byte_0f[] =
+{
+#if 0
+    {  UD_T_EXACT, 2, { 0x0f, 0x04 }, UD_F_ANY_PFX },
+    {  UD_T_EXACT, 2, { 0x0f, 0x0a }, UD_F_ANY_PFX },
+    {  UD_T_EXACT, 2, { 0x0f, 0x0c }, UD_F_ANY_PFX },
+    {  UD_T_EXACT, 2, { 0x0f, 0x0e }, UD_F_ANY_PFX },
+    {  UD_T_EXACT, 2, { 0x0f, 0x0f }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x13 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x14 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x15 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x16 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x17 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    /** @todo figure when 0f 019 and 0f 0c-0f were made into NOPs. */
+    {  UD_T_EXACT, 2, { 0x0f, 0x24 }, UD_F_ANY_PFX },
+    {  UD_T_EXACT, 2, { 0x0f, 0x25 }, UD_F_ANY_PFX },
+    {  UD_T_EXACT, 2, { 0x0f, 0x26 }, UD_F_ANY_PFX },
+    {  UD_T_EXACT, 2, { 0x0f, 0x27 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x28 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x29 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x2b }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x2e }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x2f }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_EXACT, 2, { 0x0f, 0x36 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM,      3, { 0x0f, 0x39, 0x00 }, UD_F_3BYTE_ESC | UD_F_ANY_PFX }, /* Three byte escape table, just unused.  */
+    {  UD_T_MODRM_I8, 3, { 0x0f, 0x3b, 0x00 }, UD_F_3BYTE_ESC | UD_F_ANY_PFX }, /* Three byte escape table, just unused.  */
+    {  UD_T_MODRM,      3, { 0x0f, 0x3c, 0x00 }, UD_F_3BYTE_ESC | UD_F_ANY_PFX }, /* Three byte escape table, just unused.  */
+    {  UD_T_MODRM,      3, { 0x0f, 0x3d, 0x00 }, UD_F_3BYTE_ESC | UD_F_ANY_PFX }, /* Three byte escape table, just unused.  */
+    {  UD_T_MODRM_I8, 3, { 0x0f, 0x3e, 0x00 }, UD_F_3BYTE_ESC | UD_F_ANY_PFX }, /* Three byte escape table, just unused.  */
+    {  UD_T_MODRM_I8, 3, { 0x0f, 0x3f, 0x00 }, UD_F_3BYTE_ESC | UD_F_ANY_PFX }, /* Three byte escape table, just unused.  */
+    {  UD_T_MODRM, 2, { 0x0f, 0x50 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x52 }, UD_F_NOT_NO_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x53 }, UD_F_NOT_NO_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x54 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x55 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x56 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x57 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x5b }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x60 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x61 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x62 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x63 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x64 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x65 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x66 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x67 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x68 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x69 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x6a }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x6b }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x6c }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x6d }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x6e }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x6f }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM_M_I8, 2, { 0x0f, 0x71 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR0_I8, 2, { 0x0f, 0x71 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR1_I8, 2, { 0x0f, 0x71 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR2_I8, 2, { 0x0f, 0x71 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR3_I8, 2, { 0x0f, 0x71 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR4_I8, 2, { 0x0f, 0x71 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR5_I8, 2, { 0x0f, 0x71 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR6_I8, 2, { 0x0f, 0x71 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR7_I8, 2, { 0x0f, 0x71 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_M_I8, 2, { 0x0f, 0x72 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR0_I8, 2, { 0x0f, 0x72 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR1_I8, 2, { 0x0f, 0x72 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR2_I8, 2, { 0x0f, 0x72 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR3_I8, 2, { 0x0f, 0x72 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR4_I8, 2, { 0x0f, 0x72 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR5_I8, 2, { 0x0f, 0x72 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR6_I8, 2, { 0x0f, 0x72 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR7_I8, 2, { 0x0f, 0x72 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_M_I8, 2, { 0x0f, 0x73 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR0_I8, 2, { 0x0f, 0x73 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR1_I8, 2, { 0x0f, 0x73 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR2_I8, 2, { 0x0f, 0x73 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR3_I8, 2, { 0x0f, 0x73 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR4_I8, 2, { 0x0f, 0x73 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR5_I8, 2, { 0x0f, 0x73 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR6_I8, 2, { 0x0f, 0x73 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_RR7_I8, 2, { 0x0f, 0x73 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x74 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x75 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x76 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    /* 0f 77: WTF? OZ, RZ and RN are all empty in the intel tables and LK isn't metnioned at all: */
+    {  UD_T_MODRM, 2, { 0x0f, 0x77 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX | UD_F_NOT_RZ_PFX | UD_F_NOT_LK_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x78 }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x79 }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x7a }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x7b }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x7c }, UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x7d }, UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x7e }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0x7f }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xa6 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xa7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_MR0, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* fxsave only checks REX.W */
+    {  UD_T_MODRM_MR1, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* frstor ditto  */
+    {  UD_T_MODRM_MR2, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* ldmxcsr */
+    {  UD_T_MODRM_MR3, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* stmxcsr */
+    {  UD_T_MODRM_MR4, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* xsave */
+    {  UD_T_MODRM_MR5, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* xrstor */
+    {  UD_T_MODRM_MR6, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* xsaveopt */
+    {  UD_T_MODRM_MR7, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX }, /* clflush (none) and clflushopt (66) */
+    {  UD_T_MODRM_RR0, 2, { 0x0f, 0xae }, UD_F_ANY_PFX }, /* f3=rdfsbase is 64-bit  */
+    {  UD_T_MODRM_RR1, 2, { 0x0f, 0xae }, UD_F_ANY_PFX }, /* f3=rdfsbase is 64-bit  */
+    {  UD_T_MODRM_RR2, 2, { 0x0f, 0xae }, UD_F_ANY_PFX }, /* f3=rdfsbase is 64-bit  */
+    {  UD_T_MODRM_RR3, 2, { 0x0f, 0xae }, UD_F_ANY_PFX }, /* f3=rdfsbase is 64-bit  */
+    {  UD_T_MODRM_RR4, 2, { 0x0f, 0xae }, UD_F_ANY_PFX }, /* unused */
+    {  UD_T_MODRM_RR5, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* 00=lfence */
+    {  UD_T_MODRM_RR6, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* 00=mfence */
+    {  UD_T_MODRM_RR7, 2, { 0x0f, 0xae }, UD_F_NOT_NO_PFX }, /* 00=sfence */
+    {  UD_T_MODRM, 2, { 0x0f, 0xb8 }, UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM | UD_T_NOAMD, 2, { 0x0f, 0xb9 }, UD_F_ANY_PFX }, /* UD1 */
+    {  UD_T_MODRM_MR0_I8, 2, { 0x0f, 0xba }, UD_F_ANY_PFX }, /* grp8 */
+    {  UD_T_MODRM_MR1_I8, 2, { 0x0f, 0xba }, UD_F_ANY_PFX }, /* grp8 */
+    {  UD_T_MODRM_MR2_I8, 2, { 0x0f, 0xba }, UD_F_ANY_PFX }, /* grp8 */
+    {  UD_T_MODRM_MR3_I8, 2, { 0x0f, 0xba }, UD_F_ANY_PFX }, /* grp8 */
+    /** @todo f3 0f bb rm and f2 0f bb rm does stuff on skylake even if their are blank in intel and AMD tables! */
+    //{  UD_T_MODRM, 2, { 0x0f, 0xbb }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    /** @todo AMD tables indicates that f2 0f bc rm is invalid, but on skylake it works differently (BSF?)  */
+    {  UD_T_MODRM, 2, { 0x0f, 0xbc }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX /* figure: */ | UD_F_NOT_RN_PFX },
+    /** @todo AMD tables indicates that f3 0f bc rm is invalid, but on skylake it works differently (BSR?) */
+    {  UD_T_MODRM, 2, { 0x0f, 0xbd }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX /* figure: */ | UD_F_NOT_RN_PFX },
+    /* Note! Intel incorrectly states that XADD (0f c0 and 0f c1) are sensitive to OZ, RN and RZ.  AMD and skylake hw disagrees. */
+    {  UD_T_MODRM, 2, { 0x0f, 0xc3 }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM_I8, 2, { 0x0f, 0xc4 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_I8, 2, { 0x0f, 0xc5 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM_I8, 2, { 0x0f, 0xc6 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+#endif
+    {  UD_T_MODRM_MR0, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR0, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    //{  UD_T_MODRM_MR1, 2, { 0x0f, 0xc7 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX | UD_F_NOT_RZ_PFX | UD_F_NOT_LK_PFX }, - cmpxchg8b ignores everything. @
+    {  UD_T_MODRM_RR1, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_MR2, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR2, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_MR3, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR3, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_MR4, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR4, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_MR5, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_RR5, 2, { 0x0f, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM_MR6, 2, { 0x0f, 0xc7 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX }, /* f2? */
+    {  UD_T_MODRM_RR6, 2, { 0x0f, 0xc7 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX }, /* (rdrand Rv) */
+    {  UD_T_MODRM_MR7, 2, { 0x0f, 0xc7 }, UD_F_NOT_NO_PFX }, /* vmptrst Mq (f2?); */
+    {  UD_T_MODRM_RR7, 2, { 0x0f, 0xc7 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX }, /* rdrand Rv; rdpid Rd/q (f2,66??); */
+#if 0
+    {  UD_T_MODRM, 2, { 0x0f, 0xd0 }, UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd1 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd2 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd3 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd4 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd5 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd6 }, UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd7 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd8 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xd9 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xda }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xdb }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xdc }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xdd }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xde }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xdf }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe0 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe1 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe2 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe3 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe4 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe5 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe6 }, UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX | UD_F_NOT_RZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe7 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe8 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xe9 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xea }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xeb }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xec }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xed }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xee }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xef }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf0 }, UD_F_NOT_RN_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf1 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf2 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf3 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf4 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf5 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf6 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf7 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf8 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xf9 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xfa }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xfb }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xfc }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xfd }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xfe }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 2, { 0x0f, 0xff }, UD_F_ANY_PFX },
+#endif
+};
+
+
+/**
+ * Three byte opcodes.
+ */
+CPUDECODE1UDTST const g_aUdTest3Byte_0f_38[] =
+{
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x00 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x01 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x02 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x03 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x04 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x05 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x06 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x07 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x08 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x09 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x0a }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x0b }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x0c }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x0d }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x0e }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x0f }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x10 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x11 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x12 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x13 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x14 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x15 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x16 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x17 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x18 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x19 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x1a }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x1b }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x1c }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x1d }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x1e }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x1f }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x20 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x21 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x22 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x23 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x24 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x25 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x26 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x27 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x28 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x29 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x2a }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x2b }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x2c }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x2d }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x2e }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x2f }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x30 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x31 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x32 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x33 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x34 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x35 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x36 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x37 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x38 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x39 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x3a }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x3b }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x3c }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x3d }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x3e }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x3f }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x40 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x41 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x42 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x43 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x44 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x45 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x46 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x47 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x48 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x49 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x4a }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x4b }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x4c }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x4d }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x4e }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x4f }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x50 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x51 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x52 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x53 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x54 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x55 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x56 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x57 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x58 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x59 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x5a }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x5b }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x5c }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x5e }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x5d }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x5f }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x60 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x61 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x62 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x63 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x64 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x65 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x66 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x67 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x68 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x69 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x6a }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x6b }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x6c }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x6d }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x6e }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x6f }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x70 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x71 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x72 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x73 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x74 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x75 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x76 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x77 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x78 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x79 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x7a }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x7b }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x7c }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x7d }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x7e }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x7f }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x80 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x81 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x82 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x83 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x84 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x85 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x86 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x87 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x88 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x89 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x8a }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x8b }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x8c }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x8d }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x8e }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x8f }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x90 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x91 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x92 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x93 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x94 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x95 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x96 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x97 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x98 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x99 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x9a }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x9b }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x9c }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x9d }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x9e }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0x9f }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa0 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa1 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa2 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa3 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa4 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa5 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa6 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa7 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa8 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xa9 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xaa }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xab }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xac }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xad }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xae }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xaf }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb0 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb1 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb2 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb3 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb4 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb5 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb6 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb7 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb8 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xb9 }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xba }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xbb }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xbc }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xbd }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xbe }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xbf }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc0 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc1 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc2 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc3 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc4 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc5 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc6 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc8 }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xc9 }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xca }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xcb }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xcc }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xcd }, UD_F_NOT_NO_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xce }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xcf }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd0 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd1 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd2 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd3 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd4 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd5 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd6 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd8 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xd9 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xda }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xdb }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xdc }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xdd }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xde }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xdf }, UD_F_NOT_OZ_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe0 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe1 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe2 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe3 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe4 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe5 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe6 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe8 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xe9 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xea }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xeb }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xec }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xed }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xee }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xef }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xf0 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX }, /// @todo crc32 weirdness
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xf1 }, UD_F_NOT_NO_PFX | UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX }, /// @todo crc32 weirdness
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xf2 }, UD_F_NOT_NO_PFX },
+
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xf4 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xf5 }, UD_F_NOT_NO_PFX | UD_F_NOT_RZ_PFX | UD_F_NOT_RN_PFX },
+
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xf7 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xf8 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xf9 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xfa }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xfb }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xfc }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xfd }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xfe }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 3, { 0x0f, 0x38, 0xff }, UD_F_ANY_PFX },
+
+    /* This is going to be interesting: */
+    {  UD_T_MODRM, 5, { 0x66, 0xf2, 0x0f, 0x38, 0xf5 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 5, { 0x66, 0xf3, 0x0f, 0x38, 0xf5 }, UD_F_ANY_PFX },
+    {  UD_T_MODRM, 5, { 0x66, 0xf2, 0x0f, 0x38, 0xf6 }, UD_F_ANY_PFX },
+    //{  UD_T_MODRM, 5, { 0x66, 0xf3, 0x0f, 0x38, 0xf6 }, UD_F_ANY_PFX }, - not this one.
+};
+
+
+void DecodeUdEdgeTest(PCCPUDECODE1UDTST paTests, unsigned cTests)
+{
+    uint8_t BS3_FAR *pbPages;
+
+    /*
+     * Detect AMD.
+     */
+    bool fIsAmd = false;
+    if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
+        fIsAmd = ASMIsAmdCpu();
+    Bs3TestPrintf("fIsAmd=%d\n", fIsAmd);
+
+    /*
+     * Allocate and initialize a page pair
+     */
+    pbPages  = Bs3MemGuardedTestPageAlloc(BS3MEMKIND_FLAT32);
+    if (pbPages)
+    {
+        unsigned        iTest;
+        BS3REGCTX       Ctx;
+        BS3REGCTX       ExpectCtx;
+        BS3TRAPFRAME    TrapFrame;
+        uint32_t        iStep;
+
+        Bs3MemZero(&Ctx, sizeof(Ctx));
+        Bs3MemZero(&ExpectCtx, sizeof(ExpectCtx));
+        Bs3MemZero(&TrapFrame, sizeof(TrapFrame));
+
+        /* Enable SSE. */
+        ASMSetCR0((ASMGetCR0() & ~(X86_CR0_EM | X86_CR0_TS)) | X86_CR0_MP);
+        ASMSetCR4(ASMGetCR4() | X86_CR4_OSFXSR);
+
+        /* Create a test context. */
+        Bs3RegCtxSaveEx(&Ctx, BS3_MODE_CODE_32, 512);
+        Ctx.rbx.u = (uintptr_t)pbPages;
+        Ctx.rcx.u = (uintptr_t)pbPages;
+        Ctx.rdx.u = (uintptr_t)pbPages;
+        Ctx.rax.u = (uintptr_t)pbPages;
+        Ctx.rbp.u = (uintptr_t)pbPages;
+        Ctx.rsi.u = (uintptr_t)pbPages;
+        Ctx.rdi.u = (uintptr_t)pbPages;
+
+        Bs3MemCpy(&ExpectCtx, &Ctx, sizeof(ExpectCtx));
+        ExpectCtx.rflags.u32 |= X86_EFL_RF;
+
+        /* Loop thru the tests. */
+        iStep = g_usBs3TestStep = 0;
+        for (iTest = 0; iTest < cTests; iTest++)
+        {
+            typedef struct CPUDECODE1UDSEQ
+            {
+                uint8_t cb;
+                uint8_t ab[10];
+                uint8_t fIncompatible;
+            } CPUDECODE1UDSEQ;
+            typedef CPUDECODE1UDSEQ const BS3_FAR *PCCPUDECODE1UDSEQ;
+
+            static CPUDECODE1UDSEQ const s_aPrefixes[] =
+            {
+                { 0, { 0    },              UD_F_NOT_NO_PFX },
+                { 1, { P_OZ },              UD_F_NOT_OZ_PFX },
+                { 1, { P_RN },              UD_F_NOT_RN_PFX },
+                { 1, { P_RZ },              UD_F_NOT_RZ_PFX },
+                { 1, { P_LK },              UD_F_NOT_LK_PFX },
+                { 2, { P_OZ, P_OZ },        UD_F_NOT_OZ_PFX | UD_F_NOT_OZ_PFX },
+                { 2, { P_RN, P_OZ },        UD_F_NOT_RN_PFX | UD_F_NOT_OZ_PFX },
+                { 2, { P_RZ, P_OZ },        UD_F_NOT_RZ_PFX | UD_F_NOT_OZ_PFX },
+                { 2, { P_LK, P_OZ },        UD_F_NOT_LK_PFX | UD_F_NOT_OZ_PFX },
+                { 2, { P_OZ, P_RN },        UD_F_NOT_OZ_PFX | UD_F_NOT_RN_PFX },
+                { 2, { P_RN, P_RN },        UD_F_NOT_RN_PFX | UD_F_NOT_RN_PFX },
+                { 2, { P_RZ, P_RN },        UD_F_NOT_RZ_PFX | UD_F_NOT_RN_PFX },
+                { 2, { P_LK, P_RN },        UD_F_NOT_LK_PFX | UD_F_NOT_RN_PFX },
+                { 2, { P_OZ, P_RZ },        UD_F_NOT_OZ_PFX | UD_F_NOT_RZ_PFX },
+                { 2, { P_RN, P_RZ },        UD_F_NOT_RN_PFX | UD_F_NOT_RZ_PFX },
+                { 2, { P_RZ, P_RZ },        UD_F_NOT_RZ_PFX | UD_F_NOT_RZ_PFX },
+                { 2, { P_LK, P_RZ },        UD_F_NOT_LK_PFX | UD_F_NOT_RZ_PFX },
+                { 2, { P_OZ, P_LK },        UD_F_NOT_OZ_PFX | UD_F_NOT_LK_PFX },
+                { 2, { P_RN, P_LK },        UD_F_NOT_RN_PFX | UD_F_NOT_LK_PFX },
+                { 2, { P_RZ, P_LK },        UD_F_NOT_RZ_PFX | UD_F_NOT_LK_PFX },
+                { 2, { P_LK, P_LK },        UD_F_NOT_LK_PFX | UD_F_NOT_LK_PFX },
+            };
+
+            static CPUDECODE1UDSEQ const s_aExact[] = { { 0, { 0 }, 0 } };
+            static CPUDECODE1UDSEQ const s_aModRm[] =
+            {
+                { 1, { RM_EAX_EAX, },                                       0 },
+                /* Mem forms (hardcoded indexed later): */
+                { 2, { RM_EAX_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_EAX_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_EAX_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_EAX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+            };
+            static CPUDECODE1UDSEQ const s_aModRmImm8[] =
+            {
+                { 1 + 1, { RM_EAX_EAX, 0x11 },                                      0 },
+                /* Mem forms (hardcoded indexed later): */
+                { 2 + 1, { RM_EAX_DEREF_EBX_DISP8, 0, 0x11 },                       0 },
+                { 5 + 1, { RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0, 0x11 },             0 },
+                { 2 + 1, { RM_EAX_SIB, SIB_EBX_X1_NONE, 0x11 },                     0 },
+                { 3 + 1, { RM_EAX_SIB_DISP8,  SIB_EBX_X1_NONE, 0, 0x11 },           0 },
+                { 6 + 1, { RM_EAX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 },  0 },
+            };
+            static CPUDECODE1UDSEQ const s_aModRmRRx[] =
+            {
+                { 1, { RM_EAX_EAX, },                                       0 },
+                { 1, { RM_ECX_EAX, },                                       0 },
+                { 1, { RM_EDX_EAX, },                                       0 },
+                { 1, { RM_EBX_EAX, },                                       0 },
+                { 1, { RM_ESP_EAX, },                                       0 },
+                { 1, { RM_EBP_EAX, },                                       0 },
+                { 1, { RM_ESI_EAX, },                                       0 },
+                { 1, { RM_EDI_EAX, },                                       0 },
+            };
+            static CPUDECODE1UDSEQ const s_aModRmRRxImm8[] =
+            {
+                { 2, { RM_EAX_EAX, 0x11 },                                  0 },
+                { 2, { RM_ECX_EAX, 0x11 },                                  0 },
+                { 2, { RM_EDX_EAX, 0x11 },                                  0 },
+                { 2, { RM_EBX_EAX, 0x11 },                                  0 },
+                { 2, { RM_ESP_EAX, 0x11 },                                  0 },
+                { 2, { RM_EBP_EAX, 0x11 },                                  0 },
+                { 2, { RM_ESI_EAX, 0x11 },                                  0 },
+                { 2, { RM_EDI_EAX, 0x11 },                                  0 },
+            };
+            static CPUDECODE1UDSEQ const s_aModRmMRx[] = /* index*5 */
+            {
+                { 2, { RM_EAX_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_EAX_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_EAX_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_EAX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+
+                { 2, { RM_ECX_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_ECX_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_ECX_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_ECX_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_ECX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+
+                { 2, { RM_EDX_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_EDX_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_EDX_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_EDX_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_EDX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+
+                { 2, { RM_EBX_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_EBX_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_EBX_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_EBX_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_EBX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+
+                { 2, { RM_ESP_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_ESP_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_ESP_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_ESP_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_ESP_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+
+                { 2, { RM_EBP_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_EBP_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_EBP_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_EBP_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_EBP_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+
+                { 2, { RM_ESI_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_ESI_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_ESI_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_ESI_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_ESI_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+
+                { 2, { RM_EDI_DEREF_EBX_DISP8, 0 },                         0 },
+                { 5, { RM_EDI_DEREF_EBX_DISP32, 0, 0, 0, 0 },               0 },
+                { 2, { RM_EDI_SIB, SIB_EBX_X1_NONE, },                      0 },
+                { 3, { RM_EDI_SIB_DISP8,  SIB_EBX_X1_NONE, 0 },             0 },
+                { 6, { RM_EDI_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0 },    0 },
+            };
+            static CPUDECODE1UDSEQ const s_aModRmMRxImm8[] = /* index*5 */
+            {
+                { 2+1, { RM_EAX_DEREF_EBX_DISP8, 0,                      0x11 }, 0 },
+                { 5+1, { RM_EAX_DEREF_EBX_DISP32, 0, 0, 0, 0,            0x11 }, 0 },
+                { 2+1, { RM_EAX_SIB, SIB_EBX_X1_NONE,                    0x11 }, 0 },
+                { 3+1, { RM_EAX_SIB_DISP8,  SIB_EBX_X1_NONE, 0,          0x11 }, 0 },
+                { 6+1, { RM_EAX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 }, 0 },
+
+                { 2+1, { RM_ECX_DEREF_EBX_DISP8, 0,                      0x11 }, 0 },
+                { 5+1, { RM_ECX_DEREF_EBX_DISP32, 0, 0, 0, 0,            0x11 }, 0 },
+                { 2+1, { RM_ECX_SIB, SIB_EBX_X1_NONE,                    0x11 }, 0 },
+                { 3+1, { RM_ECX_SIB_DISP8,  SIB_EBX_X1_NONE, 0,          0x11 }, 0 },
+                { 6+1, { RM_ECX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 }, 0 },
+
+                { 2+1, { RM_EDX_DEREF_EBX_DISP8, 0,                      0x11 }, 0 },
+                { 5+1, { RM_EDX_DEREF_EBX_DISP32, 0, 0, 0, 0,            0x11 }, 0 },
+                { 2+1, { RM_EDX_SIB, SIB_EBX_X1_NONE,                    0x11 }, 0 },
+                { 3+1, { RM_EDX_SIB_DISP8,  SIB_EBX_X1_NONE, 0,          0x11 }, 0 },
+                { 6+1, { RM_EDX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 }, 0 },
+
+                { 2+1, { RM_EBX_DEREF_EBX_DISP8, 0,                      0x11 }, 0 },
+                { 5+1, { RM_EBX_DEREF_EBX_DISP32, 0, 0, 0, 0,            0x11 }, 0 },
+                { 2+1, { RM_EBX_SIB, SIB_EBX_X1_NONE,                    0x11 }, 0 },
+                { 3+1, { RM_EBX_SIB_DISP8,  SIB_EBX_X1_NONE, 0,          0x11 }, 0 },
+                { 6+1, { RM_EBX_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 }, 0 },
+
+                { 2+1, { RM_ESP_DEREF_EBX_DISP8, 0,                      0x11 }, 0 },
+                { 5+1, { RM_ESP_DEREF_EBX_DISP32, 0, 0, 0, 0,            0x11 }, 0 },
+                { 2+1, { RM_ESP_SIB, SIB_EBX_X1_NONE,                    0x11 }, 0 },
+                { 3+1, { RM_ESP_SIB_DISP8,  SIB_EBX_X1_NONE, 0,          0x11 }, 0 },
+                { 6+1, { RM_ESP_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 }, 0 },
+
+                { 2+1, { RM_EBP_DEREF_EBX_DISP8, 0,                      0x11 }, 0 },
+                { 5+1, { RM_EBP_DEREF_EBX_DISP32, 0, 0, 0, 0,            0x11 }, 0 },
+                { 2+1, { RM_EBP_SIB, SIB_EBX_X1_NONE,                    0x11 }, 0 },
+                { 3+1, { RM_EBP_SIB_DISP8,  SIB_EBX_X1_NONE, 0,          0x11 }, 0 },
+                { 6+1, { RM_EBP_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 }, 0 },
+
+                { 2+1, { RM_ESI_DEREF_EBX_DISP8, 0,                      0x11 }, 0 },
+                { 5+1, { RM_ESI_DEREF_EBX_DISP32, 0, 0, 0, 0,            0x11 }, 0 },
+                { 2+1, { RM_ESI_SIB, SIB_EBX_X1_NONE,                    0x11 }, 0 },
+                { 3+1, { RM_ESI_SIB_DISP8,  SIB_EBX_X1_NONE, 0,          0x11 }, 0 },
+                { 6+1, { RM_ESI_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 }, 0 },
+
+                { 2+1, { RM_EDI_DEREF_EBX_DISP8, 0,                      0x11 }, 0 },
+                { 5+1, { RM_EDI_DEREF_EBX_DISP32, 0, 0, 0, 0,            0x11 }, 0 },
+                { 2+1, { RM_EDI_SIB, SIB_EBX_X1_NONE,                    0x11 }, 0 },
+                { 3+1, { RM_EDI_SIB_DISP8,  SIB_EBX_X1_NONE, 0,          0x11 }, 0 },
+                { 6+1, { RM_EDI_SIB_DISP32, SIB_EBX_X1_NONE, 0, 0, 0, 0, 0x11 }, 0 },
+            };
+            unsigned            iPrefix;
+            unsigned            cSuffixes;
+            PCCPUDECODE1UDSEQ   paSuffixes;
+            unsigned const      cSubTabEntries = paTests[iTest].fFlags & UD_F_3BYTE_ESC ? 256 : 1;
+            unsigned            cImmEntries    = 1;
+
+            /*
+             * Skip if implemented.
+             */
+
+            /*
+             * Produce a number of opcode sequences by varying the prefixes and
+             * ModR/M parts.  Each opcode sequence is then treated to the edge test.
+             */
+            switch (paTests[iTest].enmType)
+            {
+                case UD_T_EXACT:
+                l_case_exact:
+                    cSuffixes   = RT_ELEMENTS(s_aExact);
+                    paSuffixes  = s_aExact;
+                    break;
+                case UD_T_MODRM | UD_T_NOAMD:
+                    if (fIsAmd)
+                        goto l_case_exact;
+                case UD_T_MODRM:
+                    cSuffixes   = RT_ELEMENTS(s_aModRm);
+                    paSuffixes  = s_aModRm;
+                    break;
+                case UD_T_MODRM_I8:
+                    cSuffixes   = RT_ELEMENTS(s_aModRmImm8);
+                    paSuffixes  = s_aModRmImm8;
+                    cImmEntries = 256;
+                    break;
+                case UD_T_MODRM_M:
+                    cSuffixes   = RT_ELEMENTS(s_aModRm) - 1;
+                    paSuffixes  = &s_aModRm[1];
+                    break;
+                case UD_T_MODRM_M_I8:
+                    cSuffixes   = RT_ELEMENTS(s_aModRmImm8) - 1;
+                    paSuffixes  = &s_aModRmImm8[1];
+                    break;
+                case UD_T_MODRM_RR0:
+                case UD_T_MODRM_RR1:
+                case UD_T_MODRM_RR2:
+                case UD_T_MODRM_RR3:
+                case UD_T_MODRM_RR4:
+                case UD_T_MODRM_RR5:
+                case UD_T_MODRM_RR6:
+                case UD_T_MODRM_RR7:
+                    cSuffixes   = 1;
+                    paSuffixes  = &s_aModRmRRx[paTests[iTest].enmType - UD_T_MODRM_RR0];
+                    break;
+                case UD_T_MODRM_RR0_I8:
+                case UD_T_MODRM_RR1_I8:
+                case UD_T_MODRM_RR2_I8:
+                case UD_T_MODRM_RR3_I8:
+                case UD_T_MODRM_RR4_I8:
+                case UD_T_MODRM_RR5_I8:
+                case UD_T_MODRM_RR6_I8:
+                case UD_T_MODRM_RR7_I8:
+                    cSuffixes   = 1;
+                    paSuffixes  = &s_aModRmRRxImm8[paTests[iTest].enmType - UD_T_MODRM_RR0_I8];
+                    break;
+                case UD_T_MODRM_MR0:
+                case UD_T_MODRM_MR1:
+                case UD_T_MODRM_MR2:
+                case UD_T_MODRM_MR3:
+                case UD_T_MODRM_MR4:
+                case UD_T_MODRM_MR5:
+                case UD_T_MODRM_MR6:
+                case UD_T_MODRM_MR7:
+                    cSuffixes   = 5;
+                    paSuffixes  = &s_aModRmMRx[(paTests[iTest].enmType - UD_T_MODRM_MR0) * 5];
+                    break;
+                case UD_T_MODRM_MR0_I8:
+                case UD_T_MODRM_MR1_I8:
+                case UD_T_MODRM_MR2_I8:
+                case UD_T_MODRM_MR3_I8:
+                case UD_T_MODRM_MR4_I8:
+                case UD_T_MODRM_MR5_I8:
+                case UD_T_MODRM_MR6_I8:
+                case UD_T_MODRM_MR7_I8:
+                    cSuffixes   = 5;
+                    paSuffixes  = &s_aModRmMRxImm8[(paTests[iTest].enmType - UD_T_MODRM_MR0_I8) * 5];
+                    break;
+                default:
+                    Bs3TestPrintf("#%u: enmType=%d\n", paTests[iTest].enmType);
+                    continue;
+            }
+
+            for (iPrefix = 0; iPrefix < RT_ELEMENTS(s_aPrefixes); iPrefix++)
+                if (!(s_aPrefixes[iPrefix].fIncompatible & paTests[iTest].fFlags))
+                {
+                    unsigned iSubTab;
+                    unsigned cbOpcodesLead;
+                    uint8_t  abOpcodes[32];
+
+                    Bs3MemCpy(&abOpcodes[0], &s_aPrefixes[iPrefix].ab[0], s_aPrefixes[iPrefix].cb);
+                    cbOpcodesLead  = s_aPrefixes[iPrefix].cb;
+                    Bs3MemCpy(&abOpcodes[cbOpcodesLead], &paTests[iTest].abOpcodes[0], paTests[iTest].cbOpcodes);
+                    cbOpcodesLead += paTests[iTest].cbOpcodes;
+
+                    for (iSubTab = 0; iSubTab < cSubTabEntries; iSubTab++)
+                    {
+                        unsigned iSuffix;
+
+                        if (cSubTabEntries > 1)
+                            abOpcodes[cbOpcodesLead - 1] = iSubTab;
+
+                        for (iSuffix = 0; iSuffix < cSuffixes; iSuffix++)
+                            if (!(paSuffixes[iSuffix].fIncompatible & paTests[iTest].fFlags))
+                            {
+                                unsigned const  cbOpcodes    = cbOpcodesLead + paSuffixes[iSuffix].cb;
+                                unsigned        cbOpcodesMin = 1;
+                                unsigned        iImm;
+                                Bs3MemCpy(&abOpcodes[cbOpcodesLead], paSuffixes[iSuffix].ab, paSuffixes[iSuffix].cb);
+
+                                for (iImm = 0; iImm < cImmEntries; iImm++)
+                                {
+                                    unsigned cb;
+
+                                    if (cImmEntries > 1)
+                                        abOpcodes[cbOpcodes - 1] = iImm;
+
+                                    /*
+                                     * Do the edge thing.
+                                     */
+                                    cb = cbOpcodes;
+                                    while (cb >= cbOpcodesMin)
+                                    {
+                                        uint8_t BS3_FAR *pbRip = &pbPages[X86_PAGE_SIZE - cb];
+                                        uint8_t          bXcptExpected;
+
+                                        Bs3RegCtxSetRipCsFromFlat(&Ctx, (uintptr_t)pbRip);
+                                        ExpectCtx.rip = Ctx.rip;
+                                        ExpectCtx.cs  = Ctx.cs;
+                                        if (cb >= cbOpcodes)
+                                        {
+                                            ExpectCtx.cr2 = Ctx.cr2;
+                                            bXcptExpected = X86_XCPT_UD;
+                                        }
+                                        else
+                                        {
+                                            ExpectCtx.cr2.u = (uintptr_t)&pbPages[X86_PAGE_SIZE];
+                                            bXcptExpected = X86_XCPT_PF;
+                                        }
+
+                                        Bs3MemCpy(pbRip, &abOpcodes[0], cb);
+                                        Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame);
+#if 0
+                                        Bs3TestPrintf("iTest=%d iPrefix=%d (%d/%#x) iSubTab=%d iSuffix=%d (%d/%#x) iImm=%d cb=%d cbOp=%d: %.*Rhxs\n",
+                                                       iTest, iPrefix, s_aPrefixes[iPrefix].cb, s_aPrefixes[iPrefix].fIncompatible,
+                                                       iSubTab, iSuffix, paSuffixes[iSuffix].cb, paSuffixes[iSuffix].fIncompatible, iImm,
+                                                       cb, cbOpcodes,
+                                                       cbOpcodes, abOpcodes);
+#endif
+
+                                        if (   !Bs3TestCheckRegCtxEx(&TrapFrame.Ctx, &ExpectCtx, 0 /*cbPcAdjust*/,
+                                                                     0 /*cbSpAdjust*/, 0 /*fExtraEfl*/, "mode",  0)
+                                            || TrapFrame.bXcpt != bXcptExpected)
+                                        {
+                                            Bs3TestFailedF("iTest=%d iPrefix=%d (%d/%#x) iSubTab=%u iSuffix=%d (%d/%#x) cb=%d cbOp=%d: %.*Rhxs\n",
+                                                           iTest, iPrefix, s_aPrefixes[iPrefix].cb, s_aPrefixes[iPrefix].fIncompatible,
+                                                           iSubTab, iSuffix, paSuffixes[iSuffix].cb, paSuffixes[iSuffix].fIncompatible,
+                                                           cb, cbOpcodes,
+                                                           cbOpcodes, abOpcodes);
+                                            if (TrapFrame.bXcpt != bXcptExpected)
+                                                Bs3TestFailedF("Expected bXcpt=%#x got %#x\n", bXcptExpected, TrapFrame.bXcpt);
+                                            Bs3TrapPrintFrame(&TrapFrame);
+                                            Bs3Shutdown();
+                                        }
+
+                                        /* next */
+                                        g_usBs3TestStep++;
+                                        iStep++;
+                                        cb--;
+                                    }
+
+                                    /* For iImm > 0 only test cb == cbOpcode since the byte isn't included when cb < cbOpcode. */
+                                    cbOpcodesMin = cbOpcodes;
+                                }
+                            }
+                    }
+                }
+        }
+        Bs3TestPrintf("%RI32 (%#RX32) test steps\n", iStep, iStep);
+
+        Bs3MemGuardedTestPageFree(pbPages);
+    }
+    else
+        Bs3TestFailed("Failed to allocate two pages!\n");
+}
+
+
+#if 0
+/**
+ * Checks how prefixes affects cmpxchg8b and cmpxchg16b
+ *
+ * The thing here is that the intel opcode tables indicates that the 66 and f3
+ * prefixes encodings are reserved and causes \#UD, where AMD doesn't.  Seems
+ * though that the f2, f3 and 66 prefixes are ignored on skylake intel.  Need to
+ * make sure this is the case, also in 64-bit mode and for the 16b version.
+ */
+static void DecodeCmpXchg8bVs16b(void)
+{
+    uint8_t BS3_FAR *pbPages;
+
+    /* Check that the instructions are supported. */
+    if (   !(g_uBs3CpuDetected & BS3CPU_F_CPUID)
+        || !(ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_CX8))
+    {
+        Bs3TestSkipped("not supported");
+        return;
+    }
+
+    /* Setup a guarded page. */
+    pbPages = Bs3MemGuardedTestPageAlloc(BS3MEMKIND_FLAT32);
+    if (pbPages)
+    {
+
+        Bs3MemGuardedTestPageFree(pbPages);
+    }
+    else
+        Bs3TestFailed("Failed to allocate two pages!\n");
+}
+#endif
+
+
+/**
+ * Checks various prefix encodings with the MOVBE and CRC32 instructions to try
+ * figure out how they are decoded.
+ *
+ * The issue here is that both MOVBE and CRC32 are sensitive to the operand size
+ * prefix, which helps us identify whether the F2h and F3h prefixes takes
+ * precedence over 66h in this case.  (As it turned out they do and it order
+ * doesn't matter.)
+ */
+static void DecodeMovbeVsCrc32(void)
+{
+    uint8_t BS3_FAR *pbPages;
+
+    /* Check that the instructions are supported. */
+    if (   !(g_uBs3CpuDetected & BS3CPU_F_CPUID)
+        || (ASMCpuId_ECX(1) & (X86_CPUID_FEATURE_ECX_MOVBE | X86_CPUID_FEATURE_ECX_SSE4_2))
+           !=                 (X86_CPUID_FEATURE_ECX_MOVBE | X86_CPUID_FEATURE_ECX_SSE4_2) )
+    {
+        Bs3TestSkipped("not supported");
+        return;
+    }
+
+    /* Setup a guarded page. */
+    pbPages = Bs3MemGuardedTestPageAlloc(BS3MEMKIND_FLAT32);
+    if (pbPages)
+    {
+        unsigned        iTest;
+        BS3REGCTX       Ctx;
+        BS3TRAPFRAME    TrapFrame;
+        BS3REGCTX       ExpectCtxMovbe_m32_eax; /*       0f 38 f1 /r */
+        BS3REGCTX       ExpectCtxMovbe_m16_ax;  /*    66 0f 38 f1 /r */
+        BS3REGCTX       ExpectCtxCrc32_eax_m32; /*    f2 0f 38 f1 /r */
+        BS3REGCTX       ExpectCtxCrc32_eax_m16; /* 66 f2 0f 38 f1 /r */
+        BS3REGCTX       ExpectCtxUd;
+        PBS3REGCTX      apExpectCtxs[5];
+        static const struct
+        {
+            uint32_t    u32Stored;
+            uint8_t     iExpectCtx;
+            uint8_t     bXcpt;
+            uint8_t     cbOpcodes;
+            uint8_t     abOpcodes[18];
+        } s_aTests[] =
+        {
+#define BECRC_EAX           UINT32_C(0x11223344)
+#define BECRC_MEM_ORG       UINT32_C(0x55667788)
+#define BECRC_MEM_BE16      UINT32_C(0x55664433)
+#define BECRC_MEM_BE32      UINT32_C(0x44332211)
+
+            /* base forms. */
+            { BECRC_MEM_BE32, 0, X86_XCPT_PF, 4, {             0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_BE16, 1, X86_XCPT_PF, 5, {       P_OZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  2, X86_XCPT_PF, 5, {       P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 6, { P_OZ, P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  4, X86_XCPT_UD, 5, {       P_RZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } }, /* undefined F3 (P_RZ) */
+            { BECRC_MEM_ORG,  4, X86_XCPT_UD, 6, { P_OZ, P_RZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } }, /* undefined F3 (P_RZ) */
+
+            /* CRC32 eax, [word ebx]: Simple variations showing it doesn't matter where the prefixes are placed. */
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 6, { P_RN, P_OZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 7, { P_RN, P_OZ, P_ES, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_RN, P_SS, P_OZ, P_ES, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_RN, P_SS, P_ES, P_OZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_SS, P_RN, P_ES, P_OZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_SS, P_ES, P_RN, P_OZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_SS, P_ES, P_OZ, P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_SS, P_OZ, P_ES, P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_OZ, P_SS, P_ES, P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+
+            /* CRC32 eax, [word ebx]: Throw the F3h prefix into the mix.  The last of F3 and F2 wins on skylake+jaguar. */
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 7, { P_RZ, P_OZ, P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 7, { P_OZ, P_RZ, P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  4, X86_XCPT_UD, 7, { P_OZ, P_RN, P_RZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_OZ, P_RN, P_RZ, P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_RN, P_RZ, P_OZ, P_RN, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+            { BECRC_MEM_ORG,  3, X86_XCPT_PF, 8, { P_RN, P_RZ, P_RN, P_OZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+
+            { BECRC_MEM_ORG,  4, X86_XCPT_UD, 7, { P_OZ, P_RN, P_RZ, 0x0f, 0x38, 0xf1, RM_EAX_DEREF_EBX } },
+        };
+
+        apExpectCtxs[0] = &ExpectCtxMovbe_m32_eax;
+        apExpectCtxs[1] = &ExpectCtxMovbe_m16_ax;
+        apExpectCtxs[2] = &ExpectCtxCrc32_eax_m32;
+        apExpectCtxs[3] = &ExpectCtxCrc32_eax_m16;
+        apExpectCtxs[4] = &ExpectCtxUd;
+
+        Bs3MemZero(&Ctx, sizeof(Ctx));
+        Bs3MemZero(&ExpectCtxMovbe_m32_eax, sizeof(ExpectCtxMovbe_m32_eax));
+        Bs3MemZero(&ExpectCtxMovbe_m16_ax, sizeof(ExpectCtxMovbe_m16_ax));
+        Bs3MemZero(&ExpectCtxCrc32_eax_m32, sizeof(ExpectCtxCrc32_eax_m32));
+        Bs3MemZero(&ExpectCtxCrc32_eax_m16, sizeof(ExpectCtxCrc32_eax_m16));
+        Bs3MemZero(&ExpectCtxUd, sizeof(ExpectCtxUd));
+        Bs3MemZero(&TrapFrame, sizeof(TrapFrame));
+
+        /* Create a test context. */
+        Bs3RegCtxSaveEx(&Ctx, BS3_MODE_CODE_32, 512);
+        Ctx.rax.u = BECRC_EAX;
+        Ctx.rbx.u = (uintptr_t)pbPages;
+
+        /* Create expected result contexts. */
+        Bs3MemCpy(&ExpectCtxMovbe_m32_eax, &Ctx, sizeof(ExpectCtxMovbe_m32_eax));
+        ExpectCtxMovbe_m32_eax.rflags.u32 |= X86_EFL_RF;
+        ExpectCtxMovbe_m32_eax.rip.u = (uintptr_t)&pbPages[X86_PAGE_SIZE];
+        ExpectCtxMovbe_m32_eax.cr2.u = (uintptr_t)&pbPages[X86_PAGE_SIZE];
+
+        Bs3MemCpy(&ExpectCtxMovbe_m16_ax, &ExpectCtxMovbe_m32_eax, sizeof(ExpectCtxMovbe_m16_ax));
+
+        Bs3MemCpy(&ExpectCtxCrc32_eax_m32, &Ctx, sizeof(ExpectCtxCrc32_eax_m32));
+        ExpectCtxCrc32_eax_m32.rflags.u32 |= X86_EFL_RF;
+        ExpectCtxCrc32_eax_m32.rip.u = (uintptr_t)&pbPages[X86_PAGE_SIZE];
+        ExpectCtxCrc32_eax_m32.cr2.u = (uintptr_t)&pbPages[X86_PAGE_SIZE];
+        ExpectCtxCrc32_eax_m32.rax.u32 = 0x1aa7cd75;
+        Bs3MemCpy(&ExpectCtxCrc32_eax_m16, &ExpectCtxCrc32_eax_m32, sizeof(ExpectCtxCrc32_eax_m16));
+        ExpectCtxCrc32_eax_m16.rax.u32 = 0x51ab0518;
+
+        Bs3MemCpy(&ExpectCtxUd, &Ctx, sizeof(ExpectCtxUd));
+        ExpectCtxUd.rflags.u32 |= X86_EFL_RF;
+
+        /* Loop thru the tests. */
+        g_usBs3TestStep = 0;
+        for (iTest = 0; iTest < RT_ELEMENTS(s_aTests); iTest++)
+        {
+            unsigned const   cbOpcodes = s_aTests[iTest].cbOpcodes;
+            uint8_t BS3_FAR *pbRip     = &pbPages[X86_PAGE_SIZE - cbOpcodes];
+
+            Bs3MemCpy(pbRip, s_aTests[iTest].abOpcodes, cbOpcodes);
+            Bs3RegCtxSetRipCsFromFlat(&Ctx, (uintptr_t)pbRip);
+            *(uint32_t *)pbPages = BECRC_MEM_ORG;
+
+#if 0
+            Bs3TestPrintf("iTest=%d pbRip=%p cbOpcodes=%d: %.*Rhxs\n",
+                          iTest, pbRip, cbOpcodes, cbOpcodes, s_aTests[iTest].abOpcodes);
+            //Bs3RegCtxPrint(&Ctx);
+#endif
+            Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame);
+            if (s_aTests[iTest].bXcpt == X86_XCPT_UD)
+                ExpectCtxUd.rip = Ctx.rip;
+            if (   !Bs3TestCheckRegCtxEx(&TrapFrame.Ctx, apExpectCtxs[s_aTests[iTest].iExpectCtx],
+                                         0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/, 0 /*fExtraEfl*/, "mode", iTest)
+                || TrapFrame.bXcpt      != s_aTests[iTest].bXcpt
+                || *(uint32_t *)pbPages != s_aTests[iTest].u32Stored)
+            {
+                Bs3TestFailedF("iTest=%d cbOpcodes=%d: %.*Rhxs\n", iTest, cbOpcodes, cbOpcodes, s_aTests[iTest].abOpcodes);
+                if (TrapFrame.bXcpt != s_aTests[iTest].bXcpt)
+                    Bs3TestFailedF("Expected bXcpt=%#x, got %#x\n", s_aTests[iTest].bXcpt, TrapFrame.bXcpt);
+                if (*(uint32_t *)pbPages != s_aTests[iTest].u32Stored)
+                    Bs3TestFailedF("Expected %#RX32 stored at %p, found: %RX32\n",
+                                   s_aTests[iTest].u32Stored, pbPages, *(uint32_t *)pbPages);
+            }
+        }
+
+        Bs3MemGuardedTestPageFree(pbPages);
+    }
+    else
+        Bs3TestFailed("Failed to allocate two pages!\n");
+}
+
+
+
+/**
+ * Checks various prefix encodings with the CMPPS, CMPPD, CMPSS and CMPSD
+ * instructions to try figure out how they are decoded.
+ *
+ * The important thing to check here is that unlike CRC32/MOVBE the operand size
+ * prefix (66h) is ignored when the F2h and F3h prefixes are used.  We also
+ * check that the prefix ordering is irrelevant and that the last one of F2h and
+ * F3h wins.
+ */
+static void DecodeCmppsCmppdCmpssCmpsd(void)
+{
+    uint8_t BS3_FAR *pbPages;
+
+    /* Check that the instructions are supported. */
+    if (   !(g_uBs3CpuDetected & BS3CPU_F_CPUID)
+        ||    (ASMCpuId_EDX(1) & (X86_CPUID_FEATURE_EDX_SSE | X86_CPUID_FEATURE_EDX_SSE2))
+           !=                    (X86_CPUID_FEATURE_EDX_SSE | X86_CPUID_FEATURE_EDX_SSE2) )
+    {
+        Bs3TestSkipped("SSE and/or SSE2 are not supported");
+        return;
+    }
+
+    /* Setup a guarded page. */
+    pbPages = Bs3MemGuardedTestPageAlloc(BS3MEMKIND_FLAT32);
+    if (pbPages)
+    {
+        unsigned        iTest;
+        BS3REGCTX       Ctx;
+        BS3TRAPFRAME    TrapFrame;
+        BS3REGCTX       ExpectCtxPf;
+        BS3REGCTX       ExpectCtxUd;
+        static const struct
+        {
+            RTUINT128U  Xmm0Expect;
+            uint8_t     bXcpt;
+            uint8_t     cbOpcodes;
+            uint8_t     abOpcodes[18];
+        } s_aTests[] =
+        {
+#define BECRC_IN_XMM1       RTUINT128_INIT_C(0x76547654bbaa9988, 0x7766554433221100)
+#define BECRC_IN_XMM0       RTUINT128_INIT_C(0x765476549988bbaa, 0x7766554400112233)
+#define BECRC_OUT_PS        RTUINT128_INIT_C(0xffffffff00000000, 0xffffffff00000000) /* No prefix. */
+#define BECRC_OUT_PD        RTUINT128_INIT_C(0x0000000000000000, 0x0000000000000000) /* P_OZ (66h) */
+#define BECRC_OUT_SS        RTUINT128_INIT_C(0x765476549988bbaa, 0x7766554400000000) /* P_RZ (f3h) */
+#define BECRC_OUT_SD        RTUINT128_INIT_C(0x765476549988bbaa, 0x0000000000000000) /* P_RN (f2h) */
+
+            /* We use imm8=0 which checks for equality, with the subvalue result being all
+               F's if equal and all zeros if not equal.  The input values are choosen such
+               that the 4 variants produces different results in xmm0. */
+            /* CMPPS xmm0, xmm1, 0:    0f c2 /r ib ; Compares four 32-bit subvalues. */
+            /* CMPPD xmm0, xmm1, 0: 66 0f c2 /r ib ; Compares two 64-bit subvalues. */
+            /* CMPSS xmm0, xmm1, 0: f3 0f c2 /r ib ; Compares two 32-bit subvalues, top 64-bit remains unchanged. */
+            /* CMPSD xmm0, xmm1, 0: f2 0f c2 /r ib ; Compares one 64-bit subvalue, top 64-bit remains unchanged. */
+
+            /* base forms. */
+            { BECRC_OUT_PS, X86_XCPT_PF, 4, {                         0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 5, {                   P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 5, {                   P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 5, {                   P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+
+            /* Skylake+jaguar ignores the 66h prefix with both f3h (P_RZ) and f2h (P_RN). */
+            { BECRC_OUT_SS, X86_XCPT_PF, 6, {             P_OZ, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 6, {             P_RZ, P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 6, {             P_OZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 6, {             P_RN, P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+
+            /* Throw in segment prefixes and address size prefixes. */
+            { BECRC_OUT_PS, X86_XCPT_PF, 5, {                   P_ES, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PS, X86_XCPT_PF, 6, {             P_ES, P_SS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PS, X86_XCPT_PF, 5, {                   P_AZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PS, X86_XCPT_PF, 6, {             P_AZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+
+            { BECRC_OUT_PD, X86_XCPT_PF, 6, {             P_ES, P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 6, {             P_OZ, P_ES, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 7, {       P_ES, P_SS, P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 7, {       P_ES, P_OZ, P_SS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 7, {       P_OZ, P_ES, P_SS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 6, {             P_AZ, P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 6, {             P_OZ, P_AZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 7, {       P_AZ, P_CS, P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 7, {       P_AZ, P_OZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_PD, X86_XCPT_PF, 7, {       P_OZ, P_AZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+
+            { BECRC_OUT_SS, X86_XCPT_PF, 6, {             P_ES, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 6, {             P_RZ, P_ES, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_ES, P_SS, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_ES, P_RZ, P_SS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_RZ, P_ES, P_SS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 6, {             P_AZ, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 6, {             P_RZ, P_AZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_AZ, P_CS, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_AZ, P_RZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_RZ, P_AZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_OZ, P_RZ, P_AZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RZ, P_OZ, P_AZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RZ, P_AZ, P_OZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RZ, P_AZ, P_CS, P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+
+            { BECRC_OUT_SD, X86_XCPT_PF, 6, {             P_ES, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 6, {             P_RN, P_ES, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_ES, P_SS, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_ES, P_RN, P_SS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_RN, P_ES, P_SS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 6, {             P_AZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 6, {             P_RN, P_AZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_AZ, P_CS, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_AZ, P_RN, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_RN, P_AZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_OZ, P_RN, P_AZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RN, P_OZ, P_AZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RN, P_AZ, P_OZ, P_CS, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RN, P_AZ, P_CS, P_OZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+
+            /* Pit f2h against f3h, on skylake+jaguar the last prefix wins. */
+            { BECRC_OUT_SS, X86_XCPT_PF, 6, {             P_RN, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_RN, P_RN, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_RZ, P_RN, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 7, {       P_RN, P_RZ, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RN, P_RN, P_RN, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RN, P_RN, P_RZ, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RN, P_RZ, P_RN, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RZ, P_RN, P_RN, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RZ, P_RZ, P_RN, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SS, X86_XCPT_PF, 8, { P_RN, P_RZ, P_RZ, P_RZ, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+
+            { BECRC_OUT_SD, X86_XCPT_PF, 6, {             P_RZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_RZ, P_RZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_RN, P_RZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 7, {       P_RZ, P_RN, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RZ, P_RZ, P_RZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RZ, P_RZ, P_RN, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RZ, P_RN, P_RZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RN, P_RZ, P_RZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RN, P_RN, P_RZ, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+            { BECRC_OUT_SD, X86_XCPT_PF, 8, { P_RZ, P_RN, P_RN, P_RN, 0x0f, 0xc2, RM_XMM0_XMM1, 0 } },
+        };
+        RTUINT128U  InXmm0 = BECRC_IN_XMM0;
+        RTUINT128U  InXmm1 = BECRC_IN_XMM1;
+        RTUINT128U  OutXmm0 = RTUINT128_INIT_C(0xeeeeeeeeeeeeeeee, 0xcccccccccccccccc);
+
+        Bs3MemZero(&Ctx, sizeof(Ctx));
+        Bs3MemZero(&ExpectCtxPf, sizeof(ExpectCtxPf));
+        Bs3MemZero(&ExpectCtxUd, sizeof(ExpectCtxUd));
+        Bs3MemZero(&TrapFrame, sizeof(TrapFrame));
+
+        /* Enable SSE. */
+        ASMSetCR0((ASMGetCR0() & ~(X86_CR0_EM | X86_CR0_TS)) | X86_CR0_MP);
+        ASMSetCR4(ASMGetCR4() | X86_CR4_OSFXSR);
+
+        /* Create a test context. */
+        Bs3RegCtxSaveEx(&Ctx, BS3_MODE_CODE_32, 512);
+        Ctx.rax.u = BECRC_EAX;
+        Ctx.rbx.u = (uintptr_t)pbPages;
+
+        /* Create expected result contexts. */
+        Bs3MemCpy(&ExpectCtxPf, &Ctx, sizeof(ExpectCtxPf));
+        ExpectCtxPf.rflags.u32 |= X86_EFL_RF;
+        ExpectCtxPf.rip.u = (uintptr_t)&pbPages[X86_PAGE_SIZE];
+        ExpectCtxPf.cr2.u = (uintptr_t)&pbPages[X86_PAGE_SIZE];
+
+        Bs3MemCpy(&ExpectCtxUd, &Ctx, sizeof(ExpectCtxUd));
+        ExpectCtxUd.rflags.u32 |= X86_EFL_RF;
+
+        /* Loop thru the tests. */
+        g_usBs3TestStep = 0;
+        for (iTest = 0; iTest < RT_ELEMENTS(s_aTests); iTest++)
+        {
+            unsigned const   cbOpcodes = s_aTests[iTest].cbOpcodes;
+            uint8_t BS3_FAR *pbRip     = &pbPages[X86_PAGE_SIZE - cbOpcodes];
+
+            Bs3MemCpy(pbRip, s_aTests[iTest].abOpcodes, cbOpcodes);
+            Bs3RegCtxSetRipCsFromFlat(&Ctx, (uintptr_t)pbRip);
+            ExpectCtxUd.rip = Ctx.rip;
+#if 0
+            Bs3TestPrintf("iTest=%d pbRip=%p cbOpcodes=%d: %.*Rhxs\n",
+                          iTest, pbRip, cbOpcodes, cbOpcodes, s_aTests[iTest].abOpcodes);
+            //Bs3RegCtxPrint(&Ctx);
+#endif
+            BS3_CMN_NM(bs3CpuDecoding1_LoadXmm0)(&InXmm0);
+            BS3_CMN_NM(bs3CpuDecoding1_LoadXmm1)(&InXmm1);
+            Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame);
+            BS3_CMN_NM(bs3CpuDecoding1_SaveXmm0)(&OutXmm0);
+
+            if (   !Bs3TestCheckRegCtxEx(&TrapFrame.Ctx, s_aTests[iTest].bXcpt == X86_XCPT_UD ? &ExpectCtxUd : &ExpectCtxPf,
+                                         0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/, 0 /*fExtraEfl*/, "mode", iTest)
+                || TrapFrame.bXcpt      != s_aTests[iTest].bXcpt
+                || OutXmm0.s.Lo         != s_aTests[iTest].Xmm0Expect.s.Lo
+                || OutXmm0.s.Hi         != s_aTests[iTest].Xmm0Expect.s.Hi)
+            {
+                Bs3TestFailedF("iTest=%d cbOpcodes=%d: %.*Rhxs\n", iTest, cbOpcodes, cbOpcodes, s_aTests[iTest].abOpcodes);
+                if (TrapFrame.bXcpt != s_aTests[iTest].bXcpt)
+                    Bs3TestFailedF("Expected bXcpt=%#x, got %#x\n", s_aTests[iTest].bXcpt, TrapFrame.bXcpt);
+                if (   OutXmm0.s.Lo         != s_aTests[iTest].Xmm0Expect.s.Lo
+                    || OutXmm0.s.Hi         != s_aTests[iTest].Xmm0Expect.s.Hi)
+                    Bs3TestFailedF("Expected XMM0=%08RX32:%08RX32:%08RX32:%08RX32, not %08RX32:%08RX32:%08RX32:%08RX32\n",
+                                   s_aTests[iTest].Xmm0Expect.DWords.dw3, s_aTests[iTest].Xmm0Expect.DWords.dw2,
+                                   s_aTests[iTest].Xmm0Expect.DWords.dw1, s_aTests[iTest].Xmm0Expect.DWords.dw0,
+                                   OutXmm0.DWords.dw3, OutXmm0.DWords.dw2, OutXmm0.DWords.dw1, OutXmm0.DWords.dw0);
+            }
+        }
+
+        Bs3MemGuardedTestPageFree(pbPages);
+    }
+    else
+        Bs3TestFailed("Failed to allocate two pages!\n");
+}
+
+
 BS3_DECL(void) Main_pp32()
 {
     Bs3TestInit("bs3-cpu-decoding-1");
     Bs3TestPrintf("g_uBs3CpuDetected=%#x\n", g_uBs3CpuDetected);
 
+#if 0
+    Bs3TestSub("CMPPS, CMPPD, CMPSS, CMPSD");
+    DecodeCmppsCmppdCmpssCmpsd();
+
+    Bs3TestSub("MOVBE vs CRC32");
+    DecodeMovbeVsCrc32();
+#endif
+
+    //Bs3TestSub("CMPXCHG8B/16B");
+    //DecodeCmpXchg8bVs16b();
+
+#if 1
+    Bs3TestSub("2 byte undefined opcodes 0f");
+    DecodeUdEdgeTest(g_aUdTest2Byte_0f, RT_ELEMENTS(g_aUdTest2Byte_0f));
+#endif
+#if 0
+    Bs3TestSub("3 byte undefined opcodes 0f 38");
+    DecodeUdEdgeTest(g_aUdTest3Byte_0f_38, RT_ELEMENTS(g_aUdTest3Byte_0f_38));
+#endif
+
+#if 0
+    Bs3TestSub("misc");
     DecodeEdgeTest();
+#endif
 
     Bs3TestTerm();
-
-    //for (;;) ASMHalt();
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-asm.asm
similarity index 83%
copy from src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
copy to src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-asm.asm
index d9ea987..cf6fa84 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-asm.asm
@@ -1,10 +1,10 @@
-; $Id: bs3-cpu-decoding-1-asm.asm $
+; $Id: bs3-cpu-generated-1-asm.asm $
 ;; @file
-; BS3Kit - bs3-cpu-decoding-1, assembly helpers and template instantiation.
+; BS3Kit - bs3-generated-1, assembly helpers and template instantiation.
 ;
 
 ;
-; Copyright (C) 2007-2016 Oracle Corporation
+; Copyright (C) 2007-2017 Oracle Corporation
 ;
 ; This file is part of VirtualBox Open Source Edition (OSE), as
 ; available from http://www.virtualbox.org. This file is free software;
@@ -30,9 +30,5 @@
 ;*********************************************************************************************************************************
 %include "bs3kit.mac"
 
-
-;
-; Instantiate code templates.
-;
-BS3_INSTANTIATE_TEMPLATE_ESSENTIALS      "bs3-cpu-decoding-1-template.mac"
+; later maybe.
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-data.py b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-data.py
new file mode 100755
index 0000000..617c62d
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-data.py
@@ -0,0 +1,611 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# $Id: bs3-cpu-generated-1-data.py $
+# pylint: disable=invalid-name
+
+"""
+Generates testcases from @optest specifications in IEM.
+"""
+
+from __future__ import print_function;
+
+__copyright__ = \
+"""
+Copyright (C) 2017 Oracle Corporation
+
+This file is part of VirtualBox Open Source Edition (OSE), as
+available from http://www.virtualbox.org. This file is free software;
+you can redistribute it and/or modify it under the terms of the GNU
+General Public License (GPL) as published by the Free Software
+Foundation, in version 2 as it comes in the "COPYING" file of the
+VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+
+The contents of this file may alternatively be used under the terms
+of the Common Development and Distribution License Version 1.0
+(CDDL) only, as it comes in the "COPYING.CDDL" file of the
+VirtualBox OSE distribution, in which case the provisions of the
+CDDL are applicable instead of those of the GPL.
+
+You may elect to license modified versions of this file under the
+terms and conditions of either the GPL or the CDDL or both.
+"""
+__version__ = "$Revision: 114989 $"
+
+# Standard python imports.
+import os;
+import sys;
+
+# Only the main script needs to modify the path.
+g_ksValidationKitDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)));
+g_ksVmmAllDir = os.path.join(os.path.dirname(g_ksValidationKitDir), 'VMM', 'VMMAll')
+sys.path.append(g_ksVmmAllDir);
+
+import IEMAllInstructionsPython as iai; # pylint: disable=import-error
+
+
+# Python 3 hacks:
+if sys.version_info[0] >= 3:
+    long = int;     # pylint: disable=redefined-builtin,invalid-name
+
+
+class Bs3Cg1TestEncoder(object):
+    """
+    Does the encoding of a single test.
+    """
+
+    def __init__(self, fLast):
+        self.fLast      = fLast;
+        # Each list member (in all lists) are C expression of a byte.
+        self.asHdr       = [];
+        self.asSelectors = [];
+        self.asInputs    = [];
+        self.asOutputs   = [];
+
+    @staticmethod
+    def _compileSelectors(aoSelectors): # (list(iai.TestSelector)) -> list(str)
+        """
+        Compiles a list of iai.TestSelector predicate checks.
+        Returns C byte expression strings.
+        """
+        asRet = [];
+        for oSelector in aoSelectors:
+            sConstant = oSelector.kdVariables[oSelector.sVariable][oSelector.sValue];
+            sConstant = sConstant.upper().replace('.', '_');
+            if oSelector.sOp == '==':
+                sByte = '(BS3CG1PRED_%s << BS3CG1SEL_OP_PRED_SHIFT) | BS3CG1SEL_OP_IS_TRUE' % (sConstant,);
+            elif oSelector.sOp == '!=':
+                sByte = '(BS3CG1PRED_%s << BS3CG1SEL_OP_PRED_SHIFT) | BS3CG1SEL_OP_IS_FALSE' % (sConstant,);
+            else:
+                raise Exception('Unknown selector operator: %s' % (oSelector.sOp,));
+            asRet.append(sByte);
+        return asRet;
+
+    kdSmallFields = {
+        'op1':  'BS3CG1_CTXOP_OP1',
+        'op2':  'BS3CG1_CTXOP_OP2',
+        'efl':  'BS3CG1_CTXOP_EFL',
+    };
+    kdOperators = {
+        '=':    'BS3CG1_CTXOP_ASSIGN',
+        '|=':   'BS3CG1_CTXOP_OR',
+        '&=':   'BS3CG1_CTXOP_AND',
+        '&~=':  'BS3CG1_CTXOP_AND_INV',
+    };
+    kdSmallSizes = {
+        1:      'BS3CG1_CTXOP_1_BYTE',
+        2:      'BS3CG1_CTXOP_2_BYTES',
+        4:      'BS3CG1_CTXOP_4_BYTES',
+        8:      'BS3CG1_CTXOP_8_BYTES',
+        16:     'BS3CG1_CTXOP_16_BYTES',
+        32:     'BS3CG1_CTXOP_32_BYTES',
+        12:     'BS3CG1_CTXOP_12_BYTES',
+    };
+
+    @staticmethod
+    def _amendOutputs(aoOutputs, oInstr): # type: (list(iai.TestInOut), iai.Instruction) -> list(iai.TestInOut)
+        """
+        Amends aoOutputs for instructions with special flag behaviour (undefined,
+        always set, always clear).
+
+        Undefined flags are copied from the result context as the very first
+        operation so they can be set to CPU vendor specific values later if
+        desired.
+
+        Always set or cleared flags are applied at the very end of the
+        modification operations so that we spot incorrect specifications.
+        """
+        if oInstr.asFlUndefined or oInstr.asFlClear or oInstr.asFlSet:
+            aoOutputs = list(aoOutputs);
+
+            if oInstr.asFlUndefined:
+                fFlags = oInstr.getUndefinedFlagsMask();
+                assert fFlags != 0;
+                aoOutputs.insert(0, iai.TestInOut('efl_undef', '=', str(fFlags), 'uint'));
+
+            if oInstr.asFlClear:
+                fFlags = oInstr.getClearedFlagsMask();
+                assert fFlags != 0;
+                aoOutputs.append(iai.TestInOut('efl', '&~=', str(fFlags), 'uint'));
+
+            if oInstr.asFlSet:
+                fFlags = oInstr.getSetFlagsMask();
+                assert fFlags != 0;
+                aoOutputs.append(iai.TestInOut('efl', '|=', str(fFlags), 'uint'));
+
+        return aoOutputs;
+
+    @staticmethod
+    def _compileContextModifers(aoOperations): # (list(iai.TestInOut))
+        """
+        Compile a list of iai.TestInOut context modifiers.
+        """
+        asRet = [];
+        for oOperation in aoOperations:
+            oType = iai.TestInOut.kdTypes[oOperation.sType];
+            aaoValues = oType.get(oOperation.sValue);
+            assert len(aaoValues) == 1 or len(aaoValues) == 2;
+
+            sOp = oOperation.sOp;
+            if sOp == '&|=':
+                sOp = '|=' if len(aaoValues) == 1 else '&~=';
+
+            for fSignExtend, abValue in aaoValues:
+                cbValue = len(abValue);
+
+                # The opcode byte.
+                sOpcode = Bs3Cg1TestEncoder.kdOperators[sOp];
+                sOpcode += ' | ';
+                if oOperation.sField in Bs3Cg1TestEncoder.kdSmallFields:
+                    sOpcode += Bs3Cg1TestEncoder.kdSmallFields[oOperation.sField];
+                else:
+                    sOpcode += 'BS3CG1_CTXOP_DST_ESC';
+                sOpcode += ' | ';
+                if cbValue in Bs3Cg1TestEncoder.kdSmallSizes:
+                    sOpcode += Bs3Cg1TestEncoder.kdSmallSizes[cbValue];
+                else:
+                    sOpcode += 'BS3CG1_CTXOP_SIZE_ESC';
+                if fSignExtend:
+                    sOpcode += ' | BS3CG1_CTXOP_SIGN_EXT';
+                asRet.append(sOpcode);
+
+                # Escaped field identifier.
+                if oOperation.sField not in Bs3Cg1TestEncoder.kdSmallFields:
+                    asRet.append('BS3CG1DST_%s' % (oOperation.sField.upper().replace('.', '_'),));
+
+                # Escaped size byte?
+                if cbValue not in Bs3Cg1TestEncoder.kdSmallSizes:
+                    if cbValue >= 256 or cbValue not in [ 1, 2, 4, 6, 8, 12, 16, 32, 64, 128, ]:
+                        raise Exception('Invalid value size: %s' % (cbValue,));
+                    asRet.append('0x%02x' % (cbValue,));
+
+                # The value bytes.
+                for b in abValue:
+                    asRet.append('0x%02x' % (b,));
+
+                sOp = '|=';
+
+        return asRet;
+
+    def _constructHeader(self):
+        """
+        Returns C byte expression strings for BS3CG1TESTHDR.
+        """
+        cbSelectors = len(self.asSelectors);
+        if cbSelectors >=  256:
+            raise Exception('Too many selectors: %s bytes, max 255 bytes' % (cbSelectors,))
+
+        cbInputs = len(self.asInputs);
+        if cbInputs >= 4096:
+            raise Exception('Too many input context modifiers: %s bytes, max 4095 bytes' % (cbInputs,))
+
+        cbOutputs = len(self.asOutputs);
+        if cbOutputs >= 2048:
+            raise Exception('Too many output context modifiers: %s bytes, max 2047 bytes' % (cbOutputs,))
+
+        return [
+            '%#04x' % (cbSelectors,),                                     # 8-bit
+            '%#05x & 0xff' % (cbInputs,),                                 # first 8 bits of cbInputs
+            '(%#05x >> 8) | ((%#05x & 0xf) << 4)' % (cbInputs, cbOutputs,),  # last 4 bits of cbInputs, lower 4 bits of cbOutputs.
+            '(%#05x >> 4) | (%#05x << 7)' % (cbOutputs, self.fLast),         # last 7 bits of cbOutputs and 1 bit fLast.
+        ];
+
+    def encodeTest(self, oTest): # type: (iai.InstructionTest)
+        """
+        Does the encoding.
+        """
+        self.asSelectors = self._compileSelectors(oTest.aoSelectors);
+        self.asInputs    = self._compileContextModifers(oTest.aoInputs);
+        self.asOutputs   = self._compileContextModifers(self._amendOutputs(oTest.aoOutputs, oTest.oInstr));
+        self.asHdr       = self._constructHeader();
+
+
+class Bs3Cg1EncodedTests(object):
+    """
+    Encodes the tests for an instruction.
+    """
+
+    def __init__(self, oInstr):
+        self.offTests       = -1;
+        self.cbTests        = 0;
+        self.asLines        = [];       # type: list(str)
+        self.aoInstructions = [];       # type: list(iai.Instruction)
+
+        # Encode the tests.
+        for iTest, oTest in enumerate(oInstr.aoTests):
+            oEncodedTest = Bs3Cg1TestEncoder(iTest + 1 == len(oInstr.aoTests));
+            oEncodedTest.encodeTest(oTest);
+
+            self.cbTests += len(oEncodedTest.asHdr) + len(oEncodedTest.asSelectors) \
+                          + len(oEncodedTest.asInputs) + len(oEncodedTest.asOutputs);
+
+            self.asLines.append('    /* test #%s: %s */' % (iTest, oTest,));
+            self.asLines += self.bytesToLines('             ', oEncodedTest.asHdr);
+            if oEncodedTest.asSelectors:
+                self.asLines += self.bytesToLines('    /*sel:*/ ', oEncodedTest.asSelectors);
+            if oEncodedTest.asInputs:
+                self.asLines += self.bytesToLines('    /* in:*/ ', oEncodedTest.asInputs);
+            if oEncodedTest.asOutputs:
+                self.asLines += self.bytesToLines('    /*out:*/ ', oEncodedTest.asOutputs);
+
+    @staticmethod
+    def bytesToLines(sPrefix, asBytes):
+        """
+        Formats a series of bytes into one or more lines.
+        A byte ending with a newline indicates that we should start a new line,
+        and prefix it by len(sPrefix) spaces.
+
+        Returns list of lines.
+        """
+        asRet = [];
+        sLine = sPrefix;
+        for sByte in asBytes:
+            if sByte[-1] == '\n':
+                sLine += sByte[:-1] + ',';
+                asRet.append(sLine);
+                sLine = ' ' * len(sPrefix);
+            else:
+                if len(sLine) + 2 + len(sByte) > 132 and len(sLine) > len(sPrefix):
+                    asRet.append(sLine[:-1]);
+                    sLine = ' ' * len(sPrefix);
+                sLine += sByte + ', ';
+
+
+        if len(sLine) > len(sPrefix):
+            asRet.append(sLine);
+        return asRet;
+
+
+    def isEqual(self, oOther):
+        """ Compares two encoded tests. """
+        if self.cbTests != oOther.cbTests:
+            return False;
+        if len(self.asLines) != len(oOther.asLines):
+            return False;
+        for iLine, sLines in enumerate(self.asLines):
+            if sLines != oOther.asLines[iLine]:
+                return False;
+        return True;
+
+
+
+class Bs3Cg1Instruction(object):
+    """
+    An instruction with tests.
+    """
+
+    def __init__(self, oMap, oInstr, oTests):
+        self.oMap   = oMap;             # type: iai.InstructionMap
+        self.oInstr = oInstr;           # type: iai.Instruction
+        self.oTests = oTests;           # type: Bs3Cg1EncodedTests
+
+        self.asOpcodes          = oMap.asLeadOpcodes + [ '0x%02x' % (oInstr.getOpcodeByte(),) ];
+        self.sEncoding          = iai.g_kdEncodings[oInstr.sEncoding][0];
+        for oOp in oInstr.aoOperands:
+            self.sEncoding     += '_' + oOp.sType;
+        if oInstr.fUnused:
+            if oInstr.sInvalidStyle == 'immediate' and oInstr.sSubOpcode:
+                self.sEncoding += '_MOD_EQ_3' if oInstr.sSubOpcode == '11 mr/reg' else '_MOD_NE_3';
+
+        self.asFlags            = [];
+        if 'invalid_64' in oInstr.dHints:
+            self.asFlags.append('BS3CG1INSTR_F_INVALID_64BIT');
+        if oInstr.fUnused:
+            self.asFlags.append('BS3CG1INSTR_F_UNUSED');
+        elif oInstr.fInvalid:
+            self.asFlags.append('BS3CG1INSTR_F_INVALID');
+        if oInstr.sInvalidStyle and oInstr.sInvalidStyle.startswith('intel-'):
+            self.asFlags.append('BS3CG1INSTR_F_INTEL_DECODES_INVALID');
+
+        self.fAdvanceMnemonic   = True; ##< Set by the caller.
+        if oInstr.sPrefix:
+            if oInstr.sPrefix == 'none':
+                self.sPfxKind = 'BS3CG1PFXKIND_NO_F2_F3_66';
+            else:
+                self.sPfxKind = 'BS3CG1PFXKIND_REQ_' + oInstr.sPrefix[-2:].upper();
+        elif oInstr.sEncoding == 'ModR/M':
+            if 'ignores_op_size' not in oInstr.dHints:
+                self.sPfxKind   = 'BS3CG1PFXKIND_MODRM';
+            else:
+                self.sPfxKind   = 'BS3CG1PFXKIND_MODRM_NO_OP_SIZES';
+        else:
+            self.sPfxKind       = '0';
+
+        self.sCpu = 'BS3CG1CPU_';
+        assert len(oInstr.asCpuIds) in [0, 1], str(oInstr);
+        if oInstr.asCpuIds:
+            self.sCpu += oInstr.asCpuIds[0].upper();
+        elif oInstr.sMinCpu:
+            self.sCpu += 'GE_' + oInstr.sMinCpu;
+        else:
+            self.sCpu += 'ANY';
+
+        if oInstr.sXcptType:
+            self.sXcptType = 'BS3CG1XCPTTYPE_' + oInstr.sXcptType.upper();
+        else:
+            self.sXcptType = 'BS3CG1XCPTTYPE_NONE';
+
+    def getOperands(self):
+        """ Returns comma separated string of operand values for g_abBs3Cg1Operands. """
+        return ', '.join(['(uint8_t)BS3CG1OP_%s' % (oOp.sType,) for oOp in self.oInstr.aoOperands]);
+
+    def getInstructionEntry(self):
+        """ Returns an array of BS3CG1INSTR member initializers. """
+        sOperands = ', '.join([oOp.sType for oOp in self.oInstr.aoOperands]);
+        if sOperands:
+            sOperands = ' /* ' + sOperands + ' */';
+        return [
+            '        /* cbOpcodes = */        %s, /* %s */' % (len(self.asOpcodes), ' '.join(self.asOpcodes),),
+            '        /* cOperands = */        %s,%s' % (len(self.oInstr.aoOperands), sOperands,),
+            '        /* cchMnemonic = */      %s, /* %s */' % (len(self.oInstr.sMnemonic), self.oInstr.sMnemonic,),
+            '        /* fAdvanceMnemonic = */ %s,' % ('true' if self.fAdvanceMnemonic else 'false',),
+            '        /* offTests = */         %s,' % (self.oTests.offTests,),
+            '        /* enmEncoding = */      (unsigned)%s,' % (self.sEncoding,),
+            '        /* enmPrefixKind = */    (unsigned)%s,' % (self.sPfxKind,),
+            '        /* enmCpuTest = */       (unsigned)%s,' % (self.sCpu,),
+            '        /* enmXcptType = */      (unsigned)%s,' % (self.sXcptType,),
+            '        /* uUnused = */          0,',
+            '        /* fFlags = */           %s' % (' | '.join(self.asFlags) if self.asFlags else '0'),
+        ];
+
+
+class Bs3CpuGenerated1Generator(object):
+    """
+    The generator code for bs3-cpu-generated-1.
+    """
+
+    def __init__(self):
+        self.aoInstructions = [];       # type: Bs3Cg1Instruction
+        self.aoTests        = [];       # type: Bs3Cg1EncodedTests
+        self.cbTests        = 0;
+
+    def addTests(self, oTests, oInstr): # type: (Bs3Cg1EncodedTests, iai.Instruction) -> Bs3Cg1EncodedTests
+        """
+        Adds oTests to self.aoTests, setting the oTests.offTests member.
+        Checks for and eliminates duplicates.
+        Returns the tests to use.
+        """
+        # Check for duplicates.
+        for oExisting in self.aoTests:
+            if oTests.isEqual(oExisting):
+                oExisting.aoInstructions.append(oInstr);
+                return oExisting;
+
+        # New test, so add it.
+        oTests.offTests = self.cbTests;
+        self.aoTests.append(oTests);
+        self.cbTests   += oTests.cbTests;
+
+        assert not oTests.aoInstructions;
+        oTests.aoInstructions.append(oInstr);
+
+        return oTests;
+
+    def processInstruction(self):
+        """
+        Processes the IEM specified instructions.
+        Returns success indicator.
+        """
+
+        #
+        # Group instructions by mnemonic to reduce the number of sub-tests.
+        #
+        for oInstr in sorted(iai.g_aoAllInstructions,
+                             key = lambda oInstr: oInstr.sMnemonic + ''.join([oOp.sType for oOp in oInstr.aoOperands])
+                                                                   + (oInstr.sOpcode if oInstr.sOpcode else 'zz')):
+            if oInstr.aoTests:
+                oTests = Bs3Cg1EncodedTests(oInstr);
+                oTests = self.addTests(oTests, oInstr);
+
+                for oMap in oInstr.aoMaps:
+                    self.aoInstructions.append(Bs3Cg1Instruction(oMap, oInstr, oTests));
+
+        # Set fAdvanceMnemonic.
+        for iInstr, oInstr in enumerate(self.aoInstructions):
+            oInstr.fAdvanceMnemonic = iInstr + 1 >= len(self.aoInstructions) \
+                                   or oInstr.oInstr.sMnemonic != self.aoInstructions[iInstr + 1].oInstr.sMnemonic;
+
+        return True;
+
+    def generateCode(self, oOut):
+        """
+        Generates the C code.
+        Returns success indicator.
+        """
+
+        # First, a file header.
+        asLines = [
+            '/*',
+            ' * Autogenerated by  $Id: bs3-cpu-generated-1-data.py $ ',
+            ' * Do not edit!',
+            ' */',
+            '',
+            '/*',
+            ' * Copyright (C) 2017 Oracle Corporation',
+            ' *',
+            ' * This file is part of VirtualBox Open Source Edition (OSE), as',
+            ' * available from http://www.virtualbox.org. This file is free software;',
+            ' * you can redistribute it and/or modify it under the terms of the GNU',
+            ' * General Public License (GPL) as published by the Free Software',
+            ' * Foundation, in version 2 as it comes in the "COPYING" file of the',
+            ' * VirtualBox OSE distribution. VirtualBox OSE is distributed in the',
+            ' * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.',
+            ' * ',
+            ' * The contents of this file may alternatively be used under the terms',
+            ' * of the Common Development and Distribution License Version 1.0',
+            ' * (CDDL) only, as it comes in the "COPYING.CDDL" file of the',
+            ' * VirtualBox OSE distribution, in which case the provisions of the',
+            ' * CDDL are applicable instead of those of the GPL.',
+            ' * ',
+            ' * You may elect to license modified versions of this file under the',
+            ' * terms and conditions of either the GPL or the CDDL or both.',
+            ' */',
+            '',
+            '',
+            '#include "bs3-cpu-generated-1.h"',
+            '',
+            '',
+            '#pragma data_seg ("BS3DATA16")',
+        ];
+
+        # Generate the g_achBs3Cg1Mnemonics array.
+        asLines += [
+            'const char BS3_FAR_DATA g_achBs3Cg1Mnemonics[] = ',
+            '{',
+        ];
+        fAdvanceMnemonic = True;
+        for oInstr in self.aoInstructions:
+            if fAdvanceMnemonic:
+                asLines.append('    \"%s\"' % (oInstr.oInstr.sMnemonic,));
+            fAdvanceMnemonic = oInstr.fAdvanceMnemonic;
+        asLines += [
+            '};',
+            '',
+            '',
+        ];
+
+        # Generate the g_abBs3Cg1Opcodes array.
+        asLines += [
+            'const uint8_t BS3_FAR_DATA g_abBs3Cg1Opcodes[] = ',
+            '{',
+        ];
+        for oInstr in self.aoInstructions:
+            asLines.append('    ' + ', '.join(oInstr.asOpcodes) + ',');
+        asLines += [
+            '};',
+            '',
+            '',
+        ];
+
+        # Generate the g_abBs3Cg1Opcodes array.
+        asLines += [
+            'const uint8_t BS3_FAR_DATA g_abBs3Cg1Operands[] = ',
+            '{',
+        ];
+        cOperands = 0;
+        for oInstr in self.aoInstructions:
+            if oInstr.oInstr.aoOperands:
+                cOperands += len(oInstr.oInstr.aoOperands);
+                asLines.append('    ' + oInstr.getOperands() + ', /* %s */' % (oInstr.oInstr.sStats,));
+            else:
+                asLines.append('    /* none */');
+        if not cOperands:
+            asLines.append('    0 /* dummy */');
+        asLines += [
+            '};',
+            '',
+            '',
+        ];
+
+        # Generate the g_abBs3Cg1Operands array.
+        asLines += [
+            'const BS3CG1INSTR BS3_FAR_DATA g_aBs3Cg1Instructions[] = ',
+            '{',
+        ];
+        for oInstr in self.aoInstructions:
+            asLines.append('    {');
+            asLines += oInstr.getInstructionEntry();
+            asLines.append('    },');
+        asLines += [
+            '};',
+            'const  uint16_t BS3_FAR_DATA g_cBs3Cg1Instructions = RT_ELEMENTS(g_aBs3Cg1Instructions);',
+            '',
+            '',
+        ];
+
+        # Generate the g_abBs3Cg1Tests array.
+        asLines += [
+            'const uint8_t BS3_FAR_DATA g_abBs3Cg1Tests[] = ',
+            '{',
+        ];
+        for oTests in self.aoTests:
+            asLines.append('    /*');
+            asLines.append('     * offTests=%s' % (oTests.offTests,));
+            asLines.append('     * Instructions: %s' % (', '.join([oInstr.sStats for oInstr in oTests.aoInstructions]),));
+            asLines.append('     */');
+            asLines += oTests.asLines;
+        asLines += [
+            '};',
+            '',
+        ];
+
+
+        #/** The test data that BS3CG1INSTR.
+        # * In order to simplify generating these, we use a byte array. */
+        #extern const uint8_t BS3_FAR_DATA   g_abBs3Cg1Tests[];
+
+
+        oOut.write('\n'.join(asLines));
+        return True;
+
+
+    def usage(self):
+        """ Prints usage. """
+        print('usage: bs3-cpu-generated-1-data.py [output file|-]');
+        return 0;
+
+    def main(self, asArgs):
+        """
+        C-like main function.
+        Returns exit code.
+        """
+
+        #
+        # Quick argument parsing.
+        #
+        if len(asArgs) == 1:
+            sOutFile = '-';
+        elif len(asArgs) != 2:
+            print('syntax error! Expected exactly one argument.');
+            return 2;
+        elif asArgs[1] in [ '-h', '-?', '--help' ]:
+            return self.usage();
+        else:
+            sOutFile = asArgs[1];
+
+        #
+        # Process the instructions specified in the IEM sources.
+        #
+        if self.processInstruction():
+
+            #
+            # Open the output file and generate the code.
+            #
+            if sOutFile == '-':
+                oOut = sys.stdout;
+            else:
+                try:
+                    oOut = open(sOutFile, 'w');
+                except Exception as oXcpt:
+                    print('error! Failed open "%s" for writing: %s' % (sOutFile, oXcpt,));
+                    return 1;
+            if self.generateCode(oOut):
+                return 0;
+
+        return 1;
+
+
+if __name__ == '__main__':
+    sys.exit(Bs3CpuGenerated1Generator().main(sys.argv));
+
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-template.c b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-template.c
new file mode 100644
index 0000000..e94f4ba
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1-template.c
@@ -0,0 +1,3829 @@
+/* $Id: bs3-cpu-generated-1-template.c $ */
+/** @file
+ * BS3Kit - bs3-cpu-generated-1, C code template.
+ */
+
+/*
+ * Copyright (C) 2007-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+#ifndef BS3_INSTANTIATING_CMN
+# error "BS3_INSTANTIATING_CMN not defined"
+#endif
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/asm.h>
+#include <iprt/asm-amd64-x86.h>
+
+#include "bs3-cpu-generated-1.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+#define P_CS        X86_OP_PRF_CS
+#define P_SS        X86_OP_PRF_SS
+#define P_DS        X86_OP_PRF_DS
+#define P_ES        X86_OP_PRF_ES
+#define P_FS        X86_OP_PRF_FS
+#define P_GS        X86_OP_PRF_GS
+#define P_OZ        X86_OP_PRF_SIZE_OP
+#define P_AZ        X86_OP_PRF_SIZE_ADDR
+#define P_LK        X86_OP_PRF_LOCK
+#define P_RN        X86_OP_PRF_REPNZ
+#define P_RZ        X86_OP_PRF_REPZ
+
+#define REX_WRBX    (X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B | X86_OP_REX_X)
+#define REX_W___    (X86_OP_REX_W)
+#define REX_WR__    (X86_OP_REX_W | X86_OP_REX_R)
+#define REX_W_B_    (X86_OP_REX_W | X86_OP_REX_B)
+#define REX_W__X    (X86_OP_REX_W | X86_OP_REX_X)
+#define REX_WRB_    (X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B)
+#define REX_WR_X    (X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_X)
+#define REX_W_BX    (X86_OP_REX_W | X86_OP_REX_B | X86_OP_REX_X)
+#define REX__R__    (X86_OP_REX_R)
+#define REX__RB_    (X86_OP_REX_R | X86_OP_REX_B)
+#define REX__R_X    (X86_OP_REX_R | X86_OP_REX_X)
+#define REX__RBX    (X86_OP_REX_R | X86_OP_REX_B | X86_OP_REX_X)
+#define REX___B_    (X86_OP_REX_B)
+#define REX___BX    (X86_OP_REX_B | X86_OP_REX_X)
+#define REX____X    (X86_OP_REX_X)
+#define REX_____    (0x40)
+
+
+/** @def  BS3CG1_DPRINTF
+ * Debug print macro.
+ */
+#if 0
+# define BS3CG1_DPRINTF(a_ArgList) Bs3TestPrintf a_ArgList
+# define BS3CG1_DEBUG_CTX_MOD
+#else
+# define BS3CG1_DPRINTF(a_ArgList) do { } while (0)
+#endif
+
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/** Operand value location. */
+typedef enum BS3CG1OPLOC
+{
+    BS3CG1OPLOC_INVALID = 0,
+    BS3CG1OPLOC_CTX,
+    BS3CG1OPLOC_IMM,
+    BS3CG1OPLOC_MEM,
+    BS3CG1OPLOC_MEM_RW,
+    BS3CG1OPLOC_END
+} BS3CG1OPLOC;
+
+
+/**
+ * The state.
+ */
+typedef struct BS3CG1STATE
+{
+    /** @name Instruction details (expanded from BS3CG1INSTR).
+     * @{ */
+    /** Pointer to the mnemonic string (not terminated) (g_achBs3Cg1Mnemonics). */
+    const char BS3_FAR     *pchMnemonic;
+    /** Pointer to the test header. */
+    PCBS3CG1TESTHDR         pTestHdr;
+    /** Pointer to the per operand flags (g_abBs3Cg1Operands). */
+    const uint8_t BS3_FAR  *pabOperands;
+    /** Opcode bytes (g_abBs3Cg1Opcodes). */
+    const uint8_t BS3_FAR  *pabOpcodes;
+    /** The current instruction number in the input array (for error reporting). */
+    uint32_t                iInstr;
+
+    /** The instruction flags. */
+    uint32_t                fFlags;
+    /** The encoding. */
+    BS3CG1ENC               enmEncoding;
+    /** The non-invalid encoding.  This differs from enmEncoding when
+     * Bs3Cg1CalcNoneIntelInvalidEncoding has been called. */
+    BS3CG1ENC               enmEncodingNonInvalid;
+    /** The CPU test / CPU ID. */
+    BS3CG1CPU               enmCpuTest;
+    /** Prefix sensitivity and requirements. */
+    BS3CG1PFXKIND           enmPrefixKind;
+    /** Exception type (SSE, AVX). */
+    BS3CG1XCPTTYPE          enmXcptType;
+    /** Per operand flags. */
+    BS3CG1OP                aenmOperands[4];
+    /** Opcode bytes. */
+    uint8_t                 abOpcodes[4];
+
+    /** The length of the mnemonic. */
+    uint8_t                 cchMnemonic;
+    /** Whether to advance the mnemonic pointer or not. */
+    uint8_t                 fAdvanceMnemonic;
+    /** The number of opcode bytes.   */
+    uint8_t                 cbOpcodes;
+    /** Number of operands. */
+    uint8_t                 cOperands;
+    /** @} */
+
+    /** Operand size in bytes (0 if not applicable). */
+    uint8_t                 cbOperand;
+    /** Current target ring (0..3). */
+    uint8_t                 uCpl;
+
+    /** The current test number. */
+    uint8_t                 iTest;
+
+    /** Target mode (g_bBs3CurrentMode).  */
+    uint8_t                 bMode;
+    /** The CPU vendor (BS3CPUVENDOR). */
+    uint8_t                 bCpuVendor;
+    /** First ring being tested. */
+    uint8_t                 iFirstRing;
+    /** End of rings being tested. */
+    uint8_t                 iEndRing;
+
+
+    /** @name Current encoded instruction.
+     * @{ */
+    /** The size of the current instruction that we're testing. */
+    uint8_t                 cbCurInstr;
+    /** The size the prefixes. */
+    uint8_t                 cbCurPrefix;
+    /** The offset into abCurInstr of the immediate. */
+    uint8_t                 offCurImm;
+    /** Buffer for assembling the current instruction. */
+    uint8_t                 abCurInstr[23];
+
+    /** Set if the encoding can't be tested in the same ring as this test code.
+     *  This is used to deal with encodings modifying SP/ESP/RSP. */
+    bool                    fSameRingNotOkay;
+    /** Whether to work the extended context too. */
+    bool                    fWorkExtCtx;
+    /** The aOperands index of the modrm.reg operand (if applicable). */
+    uint8_t                 iRegOp;
+    /** The aOperands index of the modrm.rm operand (if applicable). */
+    uint8_t                 iRmOp;
+
+    /** Operands details. */
+    struct
+    {
+        uint8_t             cbOp;
+        /** BS3CG1OPLOC_XXX. */
+        uint8_t             enmLocation;
+        /** The BS3CG1DST value for this field.
+         * Set to BS3CG1DST_INVALID if memory or immediate.  */
+        uint8_t             idxField;
+        /** Depends on enmLocation.
+         * - BS3CG1OPLOC_IMM:       offset relative to start of the instruction.
+         * - BS3CG1OPLOC_MEM:       offset should be subtracted from &pbDataPg[_4K].
+         * - BS3CG1OPLOC_MEM_RW:    offset should be subtracted from &pbDataPg[_4K].
+         * - BS3CG1OPLOC_CTX:       not used (use idxField instead).
+         */
+        uint8_t             off;
+    } aOperands[4];
+    /** @} */
+
+    /** Page to put code in.  When paging is enabled, the page before and after
+     * are marked not-present. */
+    uint8_t BS3_FAR        *pbCodePg;
+    /** The flat address corresponding to pbCodePg.  */
+    uintptr_t               uCodePgFlat;
+    /** The 16-bit address corresponding to pbCodePg if relevant for bMode.  */
+    RTFAR16                 CodePgFar;
+    /** The IP/EIP/RIP value for pbCodePg[0] relative to CS (bMode). */
+    uintptr_t               CodePgRip;
+
+    /** Page for placing data operands in.  When paging is enabled, the page before
+     * and after are marked not-present.  */
+    uint8_t BS3_FAR        *pbDataPg;
+    /** The flat address corresponding to pbDataPg.  */
+    uintptr_t               uDataPgFlat;
+    /** The 16-bit address corresponding to pbDataPg.  */
+    RTFAR16                 DataPgFar;
+
+    /** The name corresponding to bMode. */
+    const char BS3_FAR     *pszMode;
+    /** The short name corresponding to bMode. */
+    const char BS3_FAR     *pszModeShort;
+
+    /** @name Expected result (modifiable by output program).
+     * @{ */
+    /** The expected exception based on operand values or result.
+     * UINT8_MAX if no special exception expected. */
+    uint8_t                 bValueXcpt;
+    /** @} */
+    /** Alignment exception expected by the encoder.
+     * UINT8_MAX if no special exception expected. */
+    uint8_t                 bAlignmentXcpt;
+    /** Set by the encoding method to indicating invalid encoding. */
+    bool                    fInvalidEncoding;
+
+    /** The context we're working on. */
+    BS3REGCTX               Ctx;
+    /** The trap context and frame. */
+    BS3TRAPFRAME            TrapFrame;
+    /** Initial contexts, one for each ring. */
+    BS3REGCTX               aInitialCtxs[4];
+
+    /** The extended context we're working on (input, expected output). */
+    PBS3EXTCTX              pExtCtx;
+    /** The extended result context (analoguous to TrapFrame). */
+    PBS3EXTCTX              pResultExtCtx;
+    /** The initial extended context. */
+    PBS3EXTCTX              pInitialExtCtx;
+
+    /** Memory operand scratch space. */
+    union
+    {
+        uint8_t             ab[128];
+        uint16_t            au16[128 / sizeof(uint16_t)];
+        uint32_t            au32[128 / sizeof(uint32_t)];
+        uint64_t            au64[128 / sizeof(uint64_t)];
+    } MemOp;
+
+    /** Array parallel to aInitialCtxs for saving segment registers. */
+    struct
+    {
+        RTSEL               ds;
+    } aSavedSegRegs[4];
+
+} BS3CG1STATE;
+/** Pointer to the generated test state. */
+typedef BS3CG1STATE *PBS3CG1STATE;
+
+
+#define BS3CG1_PF_OZ  UINT16_C(0x0001)
+#define BS3CG1_PF_AZ  UINT16_C(0x0002)
+#define BS3CG1_PF_CS  UINT16_C(0x0004)
+#define BS3CG1_PF_DS  UINT16_C(0x0008)
+#define BS3CG1_PF_ES  UINT16_C(0x0010)
+#define BS3CG1_PF_FS  UINT16_C(0x0020)
+#define BS3CG1_PF_GS  UINT16_C(0x0040)
+#define BS3CG1_PF_SS  UINT16_C(0x0080)
+#define BS3CG1_PF_SEGS (BS3CG1_PF_CS | BS3CG1_PF_DS | BS3CG1_PF_ES | BS3CG1_PF_FS | BS3CG1_PF_GS | BS3CG1_PF_SS)
+#define BS3CG1_PF_MEM  (BS3CG1_PF_SEGS | BS3CG1_PF_AZ)
+#define BS3CG1_PF_LK  UINT16_C(0x0100)
+#define BS3CG1_PF_RN  UINT16_C(0x0200)
+#define BS3CG1_PF_RZ  UINT16_C(0x0400)
+#define BS3CG1_PF_W   UINT16_C(0x0800) /**< REX.W */
+#define BS3CG1_PF_R   UINT16_C(0x1000) /**< REX.R */
+#define BS3CG1_PF_B   UINT16_C(0x2000) /**< REX.B */
+#define BS3CG1_PF_X   UINT16_C(0x4000) /**< REX.X */
+
+
+/** Used in g_cbBs3Cg1DstFields to indicate that it's one of the 4 operands. */
+#define BS3CG1DSTSIZE_OPERAND               UINT8_C(255)
+/** Used in g_cbBs3Cg1DstFields to indicate that the operand size determins
+ * the field size (2, 4, or 8). */
+#define BS3CG1DSTSIZE_OPERAND_SIZE_GRP      UINT8_C(254)
+
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** Destination field sizes indexed by bBS3CG1DST.
+ * Zero means operand size sized.  */
+static const uint8_t g_acbBs3Cg1DstFields[] =
+{
+    /* [BS3CG1DST_INVALID] = */ BS3CG1DSTSIZE_OPERAND,
+
+    /* [BS3CG1DST_OP1] = */     BS3CG1DSTSIZE_OPERAND,
+    /* [BS3CG1DST_OP2] = */     BS3CG1DSTSIZE_OPERAND,
+    /* [BS3CG1DST_OP3] = */     BS3CG1DSTSIZE_OPERAND,
+    /* [BS3CG1DST_OP4] = */     BS3CG1DSTSIZE_OPERAND,
+    /* [BS3CG1DST_EFL] = */     4,
+    /* [BS3CG1DST_EFL_UNDEF]=*/ 4,
+
+    /* [BS3CG1DST_AL] = */      1,
+    /* [BS3CG1DST_CL] = */      1,
+    /* [BS3CG1DST_DL] = */      1,
+    /* [BS3CG1DST_BL] = */      1,
+    /* [BS3CG1DST_AH] = */      1,
+    /* [BS3CG1DST_CH] = */      1,
+    /* [BS3CG1DST_DH] = */      1,
+    /* [BS3CG1DST_BH] = */      1,
+    /* [BS3CG1DST_SPL] = */     1,
+    /* [BS3CG1DST_BPL] = */     1,
+    /* [BS3CG1DST_SIL] = */     1,
+    /* [BS3CG1DST_DIL] = */     1,
+    /* [BS3CG1DST_R8L] = */     1,
+    /* [BS3CG1DST_R9L] = */     1,
+    /* [BS3CG1DST_R10L] = */    1,
+    /* [BS3CG1DST_R11L] = */    1,
+    /* [BS3CG1DST_R12L] = */    1,
+    /* [BS3CG1DST_R13L] = */    1,
+    /* [BS3CG1DST_R14L] = */    1,
+    /* [BS3CG1DST_R15L] = */    1,
+
+    /* [BS3CG1DST_AX] = */      2,
+    /* [BS3CG1DST_CX] = */      2,
+    /* [BS3CG1DST_DX] = */      2,
+    /* [BS3CG1DST_BX] = */      2,
+    /* [BS3CG1DST_SP] = */      2,
+    /* [BS3CG1DST_BP] = */      2,
+    /* [BS3CG1DST_SI] = */      2,
+    /* [BS3CG1DST_DI] = */      2,
+    /* [BS3CG1DST_R8W] = */     2,
+    /* [BS3CG1DST_R9W] = */     2,
+    /* [BS3CG1DST_R10W] = */    2,
+    /* [BS3CG1DST_R11W] = */    2,
+    /* [BS3CG1DST_R12W] = */    2,
+    /* [BS3CG1DST_R13W] = */    2,
+    /* [BS3CG1DST_R14W] = */    2,
+    /* [BS3CG1DST_R15W] = */    2,
+
+    /* [BS3CG1DST_EAX] = */     4,
+    /* [BS3CG1DST_ECX] = */     4,
+    /* [BS3CG1DST_EDX] = */     4,
+    /* [BS3CG1DST_EBX] = */     4,
+    /* [BS3CG1DST_ESP] = */     4,
+    /* [BS3CG1DST_EBP] = */     4,
+    /* [BS3CG1DST_ESI] = */     4,
+    /* [BS3CG1DST_EDI] = */     4,
+    /* [BS3CG1DST_R8D] = */     4,
+    /* [BS3CG1DST_R9D] = */     4,
+    /* [BS3CG1DST_R10D] = */    4,
+    /* [BS3CG1DST_R11D] = */    4,
+    /* [BS3CG1DST_R12D] = */    4,
+    /* [BS3CG1DST_R13D] = */    4,
+    /* [BS3CG1DST_R14D] = */    4,
+    /* [BS3CG1DST_R15D] = */    4,
+
+    /* [BS3CG1DST_RAX] = */     8,
+    /* [BS3CG1DST_RCX] = */     8,
+    /* [BS3CG1DST_RDX] = */     8,
+    /* [BS3CG1DST_RBX] = */     8,
+    /* [BS3CG1DST_RSP] = */     8,
+    /* [BS3CG1DST_RBP] = */     8,
+    /* [BS3CG1DST_RSI] = */     8,
+    /* [BS3CG1DST_RDI] = */     8,
+    /* [BS3CG1DST_R8] = */      8,
+    /* [BS3CG1DST_R9] = */      8,
+    /* [BS3CG1DST_R10] = */     8,
+    /* [BS3CG1DST_R11] = */     8,
+    /* [BS3CG1DST_R12] = */     8,
+    /* [BS3CG1DST_R13] = */     8,
+    /* [BS3CG1DST_R14] = */     8,
+    /* [BS3CG1DST_R15] = */     8,
+
+    /* [BS3CG1DST_OZ_RAX] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_RCX] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_RDX] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_RBX] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_RSP] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_RBP] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_RSI] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_RDI] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_R8] = */   BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_R9] = */   BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_R10] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_R11] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_R12] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_R13] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_R14] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+    /* [BS3CG1DST_OZ_R15] = */  BS3CG1DSTSIZE_OPERAND_SIZE_GRP,
+
+    /* [BS3CG1DST_CR0] = */     4,
+    /* [BS3CG1DST_CR4] = */     4,
+    /* [BS3CG1DST_XCR0] = */    8,
+
+    /* [BS3CG1DST_FCW] = */         2,
+    /* [BS3CG1DST_FSW] = */         2,
+    /* [BS3CG1DST_FTW] = */         2,
+    /* [BS3CG1DST_FOP] = */         2,
+    /* [BS3CG1DST_FPUIP] = */       2,
+    /* [BS3CG1DST_FPUCS] = */       2,
+    /* [BS3CG1DST_FPUDP] = */       2,
+    /* [BS3CG1DST_FPUDS] = */       2,
+    /* [BS3CG1DST_MXCSR] = */       4,
+    /* [BS3CG1DST_ST0] = */         12,
+    /* [BS3CG1DST_ST1] = */         12,
+    /* [BS3CG1DST_ST2] = */         12,
+    /* [BS3CG1DST_ST3] = */         12,
+    /* [BS3CG1DST_ST4] = */         12,
+    /* [BS3CG1DST_ST5] = */         12,
+    /* [BS3CG1DST_ST6] = */         12,
+    /* [BS3CG1DST_ST7] = */         12,
+    /* [BS3CG1DST_MM0] = */         8,
+    /* [BS3CG1DST_MM1] = */         8,
+    /* [BS3CG1DST_MM2] = */         8,
+    /* [BS3CG1DST_MM3] = */         8,
+    /* [BS3CG1DST_MM4] = */         8,
+    /* [BS3CG1DST_MM5] = */         8,
+    /* [BS3CG1DST_MM6] = */         8,
+    /* [BS3CG1DST_MM7] = */         8,
+    /* [BS3CG1DST_XMM0] = */        16,
+    /* [BS3CG1DST_XMM1] = */        16,
+    /* [BS3CG1DST_XMM2] = */        16,
+    /* [BS3CG1DST_XMM3] = */        16,
+    /* [BS3CG1DST_XMM4] = */        16,
+    /* [BS3CG1DST_XMM5] = */        16,
+    /* [BS3CG1DST_XMM6] = */        16,
+    /* [BS3CG1DST_XMM7] = */        16,
+    /* [BS3CG1DST_XMM8] = */        16,
+    /* [BS3CG1DST_XMM9] = */        16,
+    /* [BS3CG1DST_XMM10] = */       16,
+    /* [BS3CG1DST_XMM11] = */       16,
+    /* [BS3CG1DST_XMM12] = */       16,
+    /* [BS3CG1DST_XMM13] = */       16,
+    /* [BS3CG1DST_XMM14] = */       16,
+    /* [BS3CG1DST_XMM15] = */       16,
+    /* [BS3CG1DST_XMM0_LO] = */     8,
+    /* [BS3CG1DST_XMM1_LO] = */     8,
+    /* [BS3CG1DST_XMM2_LO] = */     8,
+    /* [BS3CG1DST_XMM3_LO] = */     8,
+    /* [BS3CG1DST_XMM4_LO] = */     8,
+    /* [BS3CG1DST_XMM5_LO] = */     8,
+    /* [BS3CG1DST_XMM6_LO] = */     8,
+    /* [BS3CG1DST_XMM7_LO] = */     8,
+    /* [BS3CG1DST_XMM8_LO] = */     8,
+    /* [BS3CG1DST_XMM9_LO] = */     8,
+    /* [BS3CG1DST_XMM10_LO] = */    8,
+    /* [BS3CG1DST_XMM11_LO] = */    8,
+    /* [BS3CG1DST_XMM12_LO] = */    8,
+    /* [BS3CG1DST_XMM13_LO] = */    8,
+    /* [BS3CG1DST_XMM14_LO] = */    8,
+    /* [BS3CG1DST_XMM15_LO] = */    8,
+    /* [BS3CG1DST_XMM0_HI] = */     8,
+    /* [BS3CG1DST_XMM1_HI] = */     8,
+    /* [BS3CG1DST_XMM2_HI] = */     8,
+    /* [BS3CG1DST_XMM3_HI] = */     8,
+    /* [BS3CG1DST_XMM4_HI] = */     8,
+    /* [BS3CG1DST_XMM5_HI] = */     8,
+    /* [BS3CG1DST_XMM6_HI] = */     8,
+    /* [BS3CG1DST_XMM7_HI] = */     8,
+    /* [BS3CG1DST_XMM8_HI] = */     8,
+    /* [BS3CG1DST_XMM9_HI] = */     8,
+    /* [BS3CG1DST_XMM10_HI] = */    8,
+    /* [BS3CG1DST_XMM11_HI] = */    8,
+    /* [BS3CG1DST_XMM12_HI] = */    8,
+    /* [BS3CG1DST_XMM13_HI] = */    8,
+    /* [BS3CG1DST_XMM14_HI] = */    8,
+    /* [BS3CG1DST_XMM15_HI] = */    8,
+    /* [BS3CG1DST_XMM0_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM1_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM2_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM3_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM4_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM5_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM6_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM7_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM8_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM9_LO_ZX] = */  8,
+    /* [BS3CG1DST_XMM10_LO_ZX] = */ 8,
+    /* [BS3CG1DST_XMM11_LO_ZX] = */ 8,
+    /* [BS3CG1DST_XMM12_LO_ZX] = */ 8,
+    /* [BS3CG1DST_XMM13_LO_ZX] = */ 8,
+    /* [BS3CG1DST_XMM14_LO_ZX] = */ 8,
+    /* [BS3CG1DST_XMM15_LO_ZX] = */ 8,
+    /* [BS3CG1DST_XMM0_DW0] = */    4,
+    /* [BS3CG1DST_XMM1_DW0] = */    4,
+    /* [BS3CG1DST_XMM2_DW0] = */    4,
+    /* [BS3CG1DST_XMM3_DW0] = */    4,
+    /* [BS3CG1DST_XMM4_DW0] = */    4,
+    /* [BS3CG1DST_XMM5_DW0] = */    4,
+    /* [BS3CG1DST_XMM6_DW0] = */    4,
+    /* [BS3CG1DST_XMM7_DW0] = */    4,
+    /* [BS3CG1DST_XMM8_DW0] = */    4,
+    /* [BS3CG1DST_XMM9_DW0] = */    4,
+    /* [BS3CG1DST_XMM10_DW0] = */   4,
+    /* [BS3CG1DST_XMM11_DW0] = */   4,
+    /* [BS3CG1DST_XMM12_DW0] = */   4,
+    /* [BS3CG1DST_XMM13_DW0] = */   4,
+    /* [BS3CG1DST_XMM14_DW0] = */   4,
+    /* [BS3CG1DST_XMM15_DW0] = */   4,
+    /* [BS3CG1DST_XMM0_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM1_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM2_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM3_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM4_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM5_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM6_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM7_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM8_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM9_DW0_ZX] = */ 4,
+    /* [BS3CG1DST_XMM10_DW0_ZX] =*/ 4,
+    /* [BS3CG1DST_XMM11_DW0_ZX] =*/ 4,
+    /* [BS3CG1DST_XMM12_DW0_ZX] =*/ 4,
+    /* [BS3CG1DST_XMM13_DW0_ZX] =*/ 4,
+    /* [BS3CG1DST_XMM14_DW0_ZX] =*/ 4,
+    /* [BS3CG1DST_XMM15_DW0_ZX] =*/ 4,
+    /* [BS3CG1DST_YMM0] = */        32,
+    /* [BS3CG1DST_YMM1] = */        32,
+    /* [BS3CG1DST_YMM2] = */        32,
+    /* [BS3CG1DST_YMM3] = */        32,
+    /* [BS3CG1DST_YMM4] = */        32,
+    /* [BS3CG1DST_YMM5] = */        32,
+    /* [BS3CG1DST_YMM6] = */        32,
+    /* [BS3CG1DST_YMM7] = */        32,
+    /* [BS3CG1DST_YMM8] = */        32,
+    /* [BS3CG1DST_YMM9] = */        32,
+    /* [BS3CG1DST_YMM10] = */       32,
+    /* [BS3CG1DST_YMM11] = */       32,
+    /* [BS3CG1DST_YMM12] = */       32,
+    /* [BS3CG1DST_YMM13] = */       32,
+    /* [BS3CG1DST_YMM14] = */       32,
+    /* [BS3CG1DST_YMM15] = */       32,
+
+    /* [BS3CG1DST_VALUE_XCPT] = */ 1,
+};
+AssertCompile(RT_ELEMENTS(g_acbBs3Cg1DstFields) == BS3CG1DST_END);
+
+/** Destination field offset indexed by bBS3CG1DST.
+ * Zero means operand size sized.  */
+static const unsigned g_aoffBs3Cg1DstFields[] =
+{
+    /* [BS3CG1DST_INVALID] = */     ~0U,
+    /* [BS3CG1DST_OP1] = */         ~0U,
+    /* [BS3CG1DST_OP2] = */         ~0U,
+    /* [BS3CG1DST_OP3] = */         ~0U,
+    /* [BS3CG1DST_OP4] = */         ~0U,
+    /* [BS3CG1DST_EFL] = */         RT_OFFSETOF(BS3REGCTX, rflags),
+    /* [BS3CG1DST_EFL_UNDEF]=*/     ~0, /* special field */
+
+    /* [BS3CG1DST_AL] = */          RT_OFFSETOF(BS3REGCTX, rax.u8),
+    /* [BS3CG1DST_CL] = */          RT_OFFSETOF(BS3REGCTX, rcx.u8),
+    /* [BS3CG1DST_DL] = */          RT_OFFSETOF(BS3REGCTX, rdx.u8),
+    /* [BS3CG1DST_BL] = */          RT_OFFSETOF(BS3REGCTX, rbx.u8),
+    /* [BS3CG1DST_AH] = */          RT_OFFSETOF(BS3REGCTX, rax.b.bHi),
+    /* [BS3CG1DST_CH] = */          RT_OFFSETOF(BS3REGCTX, rcx.b.bHi),
+    /* [BS3CG1DST_DH] = */          RT_OFFSETOF(BS3REGCTX, rdx.b.bHi),
+    /* [BS3CG1DST_BH] = */          RT_OFFSETOF(BS3REGCTX, rbx.b.bHi),
+    /* [BS3CG1DST_SPL] = */         RT_OFFSETOF(BS3REGCTX, rsp.u8),
+    /* [BS3CG1DST_BPL] = */         RT_OFFSETOF(BS3REGCTX, rbp.u8),
+    /* [BS3CG1DST_SIL] = */         RT_OFFSETOF(BS3REGCTX, rsi.u8),
+    /* [BS3CG1DST_DIL] = */         RT_OFFSETOF(BS3REGCTX, rdi.u8),
+    /* [BS3CG1DST_R8L] = */         RT_OFFSETOF(BS3REGCTX, r8.u8),
+    /* [BS3CG1DST_R9L] = */         RT_OFFSETOF(BS3REGCTX, r9.u8),
+    /* [BS3CG1DST_R10L] = */        RT_OFFSETOF(BS3REGCTX, r10.u8),
+    /* [BS3CG1DST_R11L] = */        RT_OFFSETOF(BS3REGCTX, r11.u8),
+    /* [BS3CG1DST_R12L] = */        RT_OFFSETOF(BS3REGCTX, r12.u8),
+    /* [BS3CG1DST_R13L] = */        RT_OFFSETOF(BS3REGCTX, r13.u8),
+    /* [BS3CG1DST_R14L] = */        RT_OFFSETOF(BS3REGCTX, r14.u8),
+    /* [BS3CG1DST_R15L] = */        RT_OFFSETOF(BS3REGCTX, r15.u8),
+
+    /* [BS3CG1DST_AX] = */          RT_OFFSETOF(BS3REGCTX, rax.u16),
+    /* [BS3CG1DST_CX] = */          RT_OFFSETOF(BS3REGCTX, rcx.u16),
+    /* [BS3CG1DST_DX] = */          RT_OFFSETOF(BS3REGCTX, rdx.u16),
+    /* [BS3CG1DST_BX] = */          RT_OFFSETOF(BS3REGCTX, rbx.u16),
+    /* [BS3CG1DST_SP] = */          RT_OFFSETOF(BS3REGCTX, rsp.u16),
+    /* [BS3CG1DST_BP] = */          RT_OFFSETOF(BS3REGCTX, rbp.u16),
+    /* [BS3CG1DST_SI] = */          RT_OFFSETOF(BS3REGCTX, rsi.u16),
+    /* [BS3CG1DST_DI] = */          RT_OFFSETOF(BS3REGCTX, rdi.u16),
+    /* [BS3CG1DST_R8W] = */         RT_OFFSETOF(BS3REGCTX, r8.u16),
+    /* [BS3CG1DST_R9W] = */         RT_OFFSETOF(BS3REGCTX, r9.u16),
+    /* [BS3CG1DST_R10W] = */        RT_OFFSETOF(BS3REGCTX, r10.u16),
+    /* [BS3CG1DST_R11W] = */        RT_OFFSETOF(BS3REGCTX, r11.u16),
+    /* [BS3CG1DST_R12W] = */        RT_OFFSETOF(BS3REGCTX, r12.u16),
+    /* [BS3CG1DST_R13W] = */        RT_OFFSETOF(BS3REGCTX, r13.u16),
+    /* [BS3CG1DST_R14W] = */        RT_OFFSETOF(BS3REGCTX, r14.u16),
+    /* [BS3CG1DST_R15W] = */        RT_OFFSETOF(BS3REGCTX, r15.u16),
+
+    /* [BS3CG1DST_EAX] = */         RT_OFFSETOF(BS3REGCTX, rax.u32),
+    /* [BS3CG1DST_ECX] = */         RT_OFFSETOF(BS3REGCTX, rcx.u32),
+    /* [BS3CG1DST_EDX] = */         RT_OFFSETOF(BS3REGCTX, rdx.u32),
+    /* [BS3CG1DST_EBX] = */         RT_OFFSETOF(BS3REGCTX, rbx.u32),
+    /* [BS3CG1DST_ESP] = */         RT_OFFSETOF(BS3REGCTX, rsp.u32),
+    /* [BS3CG1DST_EBP] = */         RT_OFFSETOF(BS3REGCTX, rbp.u32),
+    /* [BS3CG1DST_ESI] = */         RT_OFFSETOF(BS3REGCTX, rsi.u32),
+    /* [BS3CG1DST_EDI] = */         RT_OFFSETOF(BS3REGCTX, rdi.u32),
+    /* [BS3CG1DST_R8D] = */         RT_OFFSETOF(BS3REGCTX, r8.u32),
+    /* [BS3CG1DST_R9D] = */         RT_OFFSETOF(BS3REGCTX, r9.u32),
+    /* [BS3CG1DST_R10D] = */        RT_OFFSETOF(BS3REGCTX, r10.u32),
+    /* [BS3CG1DST_R11D] = */        RT_OFFSETOF(BS3REGCTX, r11.u32),
+    /* [BS3CG1DST_R12D] = */        RT_OFFSETOF(BS3REGCTX, r12.u32),
+    /* [BS3CG1DST_R13D] = */        RT_OFFSETOF(BS3REGCTX, r13.u32),
+    /* [BS3CG1DST_R14D] = */        RT_OFFSETOF(BS3REGCTX, r14.u32),
+    /* [BS3CG1DST_R15D] = */        RT_OFFSETOF(BS3REGCTX, r15.u32),
+
+    /* [BS3CG1DST_RAX] = */         RT_OFFSETOF(BS3REGCTX, rax.u64),
+    /* [BS3CG1DST_RCX] = */         RT_OFFSETOF(BS3REGCTX, rcx.u64),
+    /* [BS3CG1DST_RDX] = */         RT_OFFSETOF(BS3REGCTX, rdx.u64),
+    /* [BS3CG1DST_RBX] = */         RT_OFFSETOF(BS3REGCTX, rbx.u64),
+    /* [BS3CG1DST_RSP] = */         RT_OFFSETOF(BS3REGCTX, rsp.u64),
+    /* [BS3CG1DST_RBP] = */         RT_OFFSETOF(BS3REGCTX, rbp.u64),
+    /* [BS3CG1DST_RSI] = */         RT_OFFSETOF(BS3REGCTX, rsi.u64),
+    /* [BS3CG1DST_RDI] = */         RT_OFFSETOF(BS3REGCTX, rdi.u64),
+    /* [BS3CG1DST_R8] = */          RT_OFFSETOF(BS3REGCTX, r8.u64),
+    /* [BS3CG1DST_R9] = */          RT_OFFSETOF(BS3REGCTX, r9.u64),
+    /* [BS3CG1DST_R10] = */         RT_OFFSETOF(BS3REGCTX, r10.u64),
+    /* [BS3CG1DST_R11] = */         RT_OFFSETOF(BS3REGCTX, r11.u64),
+    /* [BS3CG1DST_R12] = */         RT_OFFSETOF(BS3REGCTX, r12.u64),
+    /* [BS3CG1DST_R13] = */         RT_OFFSETOF(BS3REGCTX, r13.u64),
+    /* [BS3CG1DST_R14] = */         RT_OFFSETOF(BS3REGCTX, r14.u64),
+    /* [BS3CG1DST_R15] = */         RT_OFFSETOF(BS3REGCTX, r15.u64),
+
+    /* [BS3CG1DST_OZ_RAX] = */      RT_OFFSETOF(BS3REGCTX, rax),
+    /* [BS3CG1DST_OZ_RCX] = */      RT_OFFSETOF(BS3REGCTX, rcx),
+    /* [BS3CG1DST_OZ_RDX] = */      RT_OFFSETOF(BS3REGCTX, rdx),
+    /* [BS3CG1DST_OZ_RBX] = */      RT_OFFSETOF(BS3REGCTX, rbx),
+    /* [BS3CG1DST_OZ_RSP] = */      RT_OFFSETOF(BS3REGCTX, rsp),
+    /* [BS3CG1DST_OZ_RBP] = */      RT_OFFSETOF(BS3REGCTX, rbp),
+    /* [BS3CG1DST_OZ_RSI] = */      RT_OFFSETOF(BS3REGCTX, rsi),
+    /* [BS3CG1DST_OZ_RDI] = */      RT_OFFSETOF(BS3REGCTX, rdi),
+    /* [BS3CG1DST_OZ_R8] = */       RT_OFFSETOF(BS3REGCTX, r8),
+    /* [BS3CG1DST_OZ_R9] = */       RT_OFFSETOF(BS3REGCTX, r9),
+    /* [BS3CG1DST_OZ_R10] = */      RT_OFFSETOF(BS3REGCTX, r10),
+    /* [BS3CG1DST_OZ_R11] = */      RT_OFFSETOF(BS3REGCTX, r11),
+    /* [BS3CG1DST_OZ_R12] = */      RT_OFFSETOF(BS3REGCTX, r12),
+    /* [BS3CG1DST_OZ_R13] = */      RT_OFFSETOF(BS3REGCTX, r13),
+    /* [BS3CG1DST_OZ_R14] = */      RT_OFFSETOF(BS3REGCTX, r14),
+    /* [BS3CG1DST_OZ_R15] = */      RT_OFFSETOF(BS3REGCTX, r15),
+
+    /* [BS3CG1DST_CR0] = */         RT_OFFSETOF(BS3REGCTX, cr0),
+    /* [BS3CG1DST_CR4] = */         RT_OFFSETOF(BS3REGCTX, cr4),
+    /* [BS3CG1DST_XCR0] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, fXcr0Saved),
+
+    /* [BS3CG1DST_FCW] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.FCW),
+    /* [BS3CG1DST_FSW] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.FSW),
+    /* [BS3CG1DST_FTW] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.FTW),
+    /* [BS3CG1DST_FOP] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.FOP),
+    /* [BS3CG1DST_FPUIP] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.FPUIP),
+    /* [BS3CG1DST_FPUCS] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.CS),
+    /* [BS3CG1DST_FPUDP] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.FPUDP),
+    /* [BS3CG1DST_FPUDS] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.DS),
+    /* [BS3CG1DST_MXCSR] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.MXCSR),
+    /* [BS3CG1DST_ST0] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[0]),
+    /* [BS3CG1DST_ST1] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[1]),
+    /* [BS3CG1DST_ST2] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[2]),
+    /* [BS3CG1DST_ST3] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[3]),
+    /* [BS3CG1DST_ST4] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[4]),
+    /* [BS3CG1DST_ST5] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[5]),
+    /* [BS3CG1DST_ST6] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[6]),
+    /* [BS3CG1DST_ST7] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[7]),
+    /* [BS3CG1DST_MM0] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[0]),
+    /* [BS3CG1DST_MM1] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[1]),
+    /* [BS3CG1DST_MM2] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[2]),
+    /* [BS3CG1DST_MM3] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[3]),
+    /* [BS3CG1DST_MM4] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[4]),
+    /* [BS3CG1DST_MM5] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[5]),
+    /* [BS3CG1DST_MM6] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[6]),
+    /* [BS3CG1DST_MM7] = */         sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aRegs[7]),
+
+    /* [BS3CG1DST_XMM0] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[0]),
+    /* [BS3CG1DST_XMM1] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[1]),
+    /* [BS3CG1DST_XMM2] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[2]),
+    /* [BS3CG1DST_XMM3] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[3]),
+    /* [BS3CG1DST_XMM4] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[4]),
+    /* [BS3CG1DST_XMM5] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[5]),
+    /* [BS3CG1DST_XMM6] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[6]),
+    /* [BS3CG1DST_XMM7] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[7]),
+    /* [BS3CG1DST_XMM8] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[8]),
+    /* [BS3CG1DST_XMM9] = */        sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[9]),
+    /* [BS3CG1DST_XMM10] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[10]),
+    /* [BS3CG1DST_XMM11] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[11]),
+    /* [BS3CG1DST_XMM12] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[12]),
+    /* [BS3CG1DST_XMM13] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[13]),
+    /* [BS3CG1DST_XMM14] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[14]),
+    /* [BS3CG1DST_XMM15] = */       sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[15]),
+    /* [BS3CG1DST_XMM0_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[0]),
+    /* [BS3CG1DST_XMM1_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[1]),
+    /* [BS3CG1DST_XMM2_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[2]),
+    /* [BS3CG1DST_XMM3_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[3]),
+    /* [BS3CG1DST_XMM4_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[4]),
+    /* [BS3CG1DST_XMM5_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[5]),
+    /* [BS3CG1DST_XMM6_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[6]),
+    /* [BS3CG1DST_XMM7_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[7]),
+    /* [BS3CG1DST_XMM8_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[8]),
+    /* [BS3CG1DST_XMM9_LO] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[9]),
+    /* [BS3CG1DST_XMM10_LO] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[10]),
+    /* [BS3CG1DST_XMM11_LO] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[11]),
+    /* [BS3CG1DST_XMM12_LO] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[12]),
+    /* [BS3CG1DST_XMM13_LO] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[13]),
+    /* [BS3CG1DST_XMM14_LO] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[14]),
+    /* [BS3CG1DST_XMM15_LO] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[15]),
+    /* [BS3CG1DST_XMM0_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[0])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM1_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[1])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM2_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[2])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM3_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[3])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM4_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[4])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM5_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[5])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM6_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[6])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM7_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[7])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM8_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[8])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM9_HI] = */     sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[9])  + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM10_HI] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[10]) + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM11_HI] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[11]) + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM12_HI] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[12]) + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM13_HI] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[13]) + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM14_HI] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[14]) + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM15_HI] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[15]) + sizeof(uint64_t),
+    /* [BS3CG1DST_XMM0_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[0]),
+    /* [BS3CG1DST_XMM1_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[1]),
+    /* [BS3CG1DST_XMM2_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[2]),
+    /* [BS3CG1DST_XMM3_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[3]),
+    /* [BS3CG1DST_XMM4_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[4]),
+    /* [BS3CG1DST_XMM5_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[5]),
+    /* [BS3CG1DST_XMM6_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[6]),
+    /* [BS3CG1DST_XMM7_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[7]),
+    /* [BS3CG1DST_XMM8_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[8]),
+    /* [BS3CG1DST_XMM9_LO_ZX] = */  sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[9]),
+    /* [BS3CG1DST_XMM10_LO_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[10]),
+    /* [BS3CG1DST_XMM11_LO_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[11]),
+    /* [BS3CG1DST_XMM12_LO_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[12]),
+    /* [BS3CG1DST_XMM13_LO_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[13]),
+    /* [BS3CG1DST_XMM14_LO_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[14]),
+    /* [BS3CG1DST_XMM15_LO_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[15]),
+    /* [BS3CG1DST_XMM0_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[0]),
+    /* [BS3CG1DST_XMM1_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[1]),
+    /* [BS3CG1DST_XMM2_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[2]),
+    /* [BS3CG1DST_XMM3_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[3]),
+    /* [BS3CG1DST_XMM4_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[4]),
+    /* [BS3CG1DST_XMM5_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[5]),
+    /* [BS3CG1DST_XMM6_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[6]),
+    /* [BS3CG1DST_XMM7_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[7]),
+    /* [BS3CG1DST_XMM8_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[8]),
+    /* [BS3CG1DST_XMM9_DW0] = */    sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[9]),
+    /* [BS3CG1DST_XMM10_DW0] = */   sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[10]),
+    /* [BS3CG1DST_XMM11_DW0] = */   sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[11]),
+    /* [BS3CG1DST_XMM12_DW0] = */   sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[12]),
+    /* [BS3CG1DST_XMM13_DW0] = */   sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[13]),
+    /* [BS3CG1DST_XMM14_DW0] = */   sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[14]),
+    /* [BS3CG1DST_XMM15_DW0] = */   sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[15]),
+    /* [BS3CG1DST_XMM0_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[0]),
+    /* [BS3CG1DST_XMM1_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[1]),
+    /* [BS3CG1DST_XMM2_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[2]),
+    /* [BS3CG1DST_XMM3_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[3]),
+    /* [BS3CG1DST_XMM4_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[4]),
+    /* [BS3CG1DST_XMM5_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[5]),
+    /* [BS3CG1DST_XMM6_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[6]),
+    /* [BS3CG1DST_XMM7_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[7]),
+    /* [BS3CG1DST_XMM8_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[8]),
+    /* [BS3CG1DST_XMM9_DW0_ZX] = */ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[9]),
+    /* [BS3CG1DST_XMM10_DW0_ZX] =*/ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[10]),
+    /* [BS3CG1DST_XMM11_DW0_ZX] =*/ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[11]),
+    /* [BS3CG1DST_XMM12_DW0_ZX] =*/ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[12]),
+    /* [BS3CG1DST_XMM13_DW0_ZX] =*/ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[13]),
+    /* [BS3CG1DST_XMM14_DW0_ZX] =*/ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[14]),
+    /* [BS3CG1DST_XMM15_DW0_ZX] =*/ sizeof(BS3REGCTX) + RT_OFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[15]),
+
+    /* [BS3CG1DST_YMM0] = */        ~0U,
+    /* [BS3CG1DST_YMM1] = */        ~0U,
+    /* [BS3CG1DST_YMM2] = */        ~0U,
+    /* [BS3CG1DST_YMM3] = */        ~0U,
+    /* [BS3CG1DST_YMM4] = */        ~0U,
+    /* [BS3CG1DST_YMM5] = */        ~0U,
+    /* [BS3CG1DST_YMM6] = */        ~0U,
+    /* [BS3CG1DST_YMM7] = */        ~0U,
+    /* [BS3CG1DST_YMM8] = */        ~0U,
+    /* [BS3CG1DST_YMM9] = */        ~0U,
+    /* [BS3CG1DST_YMM10] = */       ~0U,
+    /* [BS3CG1DST_YMM11] = */       ~0U,
+    /* [BS3CG1DST_YMM12] = */       ~0U,
+    /* [BS3CG1DST_YMM13] = */       ~0U,
+    /* [BS3CG1DST_YMM14] = */       ~0U,
+    /* [BS3CG1DST_YMM15] = */       ~0U,
+
+    /* [BS3CG1DST_VALUE_XCPT] = */  ~0U,
+};
+AssertCompile(RT_ELEMENTS(g_aoffBs3Cg1DstFields) == BS3CG1DST_END);
+
+#ifdef BS3CG1_DEBUG_CTX_MOD
+/** Destination field names. */
+static const struct { char sz[12]; } g_aszBs3Cg1DstFields[] =
+{
+    { "INVALID" },
+    { "OP1" },
+    { "OP2" },
+    { "OP3" },
+    { "OP4" },
+    { "EFL" },
+    { "EFL_UND" },
+
+    { "AL" },
+    { "CL" },
+    { "DL" },
+    { "BL" },
+    { "AH" },
+    { "CH" },
+    { "DH" },
+    { "BH" },
+    { "SPL" },
+    { "BPL" },
+    { "SIL" },
+    { "DIL" },
+    { "R8L" },
+    { "R9L" },
+    { "R10L" },
+    { "R11L" },
+    { "R12L" },
+    { "R13L" },
+    { "R14L" },
+    { "R15L" },
+
+    { "AX" },
+    { "CX" },
+    { "DX" },
+    { "BX" },
+    { "SP" },
+    { "BP" },
+    { "SI" },
+    { "DI" },
+    { "R8W" },
+    { "R9W" },
+    { "R10W" },
+    { "R11W" },
+    { "R12W" },
+    { "R13W" },
+    { "R14W" },
+    { "R15W" },
+
+    { "EAX" },
+    { "ECX" },
+    { "EDX" },
+    { "EBX" },
+    { "ESP" },
+    { "EBP" },
+    { "ESI" },
+    { "EDI" },
+    { "R8D" },
+    { "R9D" },
+    { "R10D" },
+    { "R11D" },
+    { "R12D" },
+    { "R13D" },
+    { "R14D" },
+    { "R15D" },
+
+    { "RAX" },
+    { "RCX" },
+    { "RDX" },
+    { "RBX" },
+    { "RSP" },
+    { "RBP" },
+    { "RSI" },
+    { "RDI" },
+    { "R8"  },
+    { "R9"  },
+    { "R10" },
+    { "R11" },
+    { "R12" },
+    { "R13" },
+    { "R14" },
+    { "R15" },
+
+    { "OZ_RAX" },
+    { "OZ_RCX" },
+    { "OZ_RDX" },
+    { "OZ_RBX" },
+    { "OZ_RSP" },
+    { "OZ_RBP" },
+    { "OZ_RSI" },
+    { "OZ_RDI" },
+    { "OZ_R8"  },
+    { "OZ_R9"  },
+    { "OZ_R10" },
+    { "OZ_R11" },
+    { "OZ_R12" },
+    { "OZ_R13" },
+    { "OZ_R14" },
+    { "OZ_R15" },
+
+    { "CR0" },
+    { "CR4" },
+    { "XCR0" },
+
+    { "FCW" },
+    { "FSW" },
+    { "FTW" },
+    { "FOP" },
+    { "FPUIP" },
+    { "FPUCS" },
+    { "FPUDP" },
+    { "FPUDS" },
+    { "MXCSR" },
+    { "ST0" },
+    { "ST1" },
+    { "ST2" },
+    { "ST3" },
+    { "ST4" },
+    { "ST5" },
+    { "ST6" },
+    { "ST7" },
+    { "MM0" },
+    { "MM1" },
+    { "MM2" },
+    { "MM3" },
+    { "MM4" },
+    { "MM5" },
+    { "MM6" },
+    { "MM7" },
+    { "XMM0" },
+    { "XMM1" },
+    { "XMM2" },
+    { "XMM3" },
+    { "XMM4" },
+    { "XMM5" },
+    { "XMM6" },
+    { "XMM7" },
+    { "XMM8" },
+    { "XMM9" },
+    { "XMM10" },
+    { "XMM11" },
+    { "XMM12" },
+    { "XMM13" },
+    { "XMM14" },
+    { "XMM15" },
+    { "XMM0_LO" },
+    { "XMM1_LO" },
+    { "XMM2_LO" },
+    { "XMM3_LO" },
+    { "XMM4_LO" },
+    { "XMM5_LO" },
+    { "XMM6_LO" },
+    { "XMM7_LO" },
+    { "XMM8_LO" },
+    { "XMM9_LO" },
+    { "XMM10_LO" },
+    { "XMM11_LO" },
+    { "XMM12_LO" },
+    { "XMM13_LO" },
+    { "XMM14_LO" },
+    { "XMM15_LO" },
+    { "XMM0_HI" },
+    { "XMM1_HI" },
+    { "XMM2_HI" },
+    { "XMM3_HI" },
+    { "XMM4_HI" },
+    { "XMM5_HI" },
+    { "XMM6_HI" },
+    { "XMM7_HI" },
+    { "XMM8_HI" },
+    { "XMM9_HI" },
+    { "XMM10_HI" },
+    { "XMM11_HI" },
+    { "XMM12_HI" },
+    { "XMM13_HI" },
+    { "XMM14_HI" },
+    { "XMM15_HI" },
+    { "XMM0_LO_ZX" },
+    { "XMM1_LO_ZX" },
+    { "XMM2_LO_ZX" },
+    { "XMM3_LO_ZX" },
+    { "XMM4_LO_ZX" },
+    { "XMM5_LO_ZX" },
+    { "XMM6_LO_ZX" },
+    { "XMM7_LO_ZX" },
+    { "XMM8_LO_ZX" },
+    { "XMM9_LO_ZX" },
+    { "XMM10_LO_ZX" },
+    { "XMM11_LO_ZX" },
+    { "XMM12_LO_ZX" },
+    { "XMM13_LO_ZX" },
+    { "XMM14_LO_ZX" },
+    { "XMM15_LO_ZX" },
+    { "XMM0_DW0" },
+    { "XMM1_DW0" },
+    { "XMM2_DW0" },
+    { "XMM3_DW0" },
+    { "XMM4_DW0" },
+    { "XMM5_DW0" },
+    { "XMM6_DW0" },
+    { "XMM7_DW0" },
+    { "XMM8_DW0" },
+    { "XMM9_DW0" },
+    { "XMM10_DW0" },
+    { "XMM11_DW0" },
+    { "XMM12_DW0" },
+    { "XMM13_DW0" },
+    { "XMM14_DW0" },
+    { "XMM15_DW0" },
+    { "XMM0_DW0_ZX" },
+    { "XMM1_DW0_ZX" },
+    { "XMM2_DW0_ZX" },
+    { "XMM3_DW0_ZX" },
+    { "XMM4_DW0_ZX" },
+    { "XMM5_DW0_ZX" },
+    { "XMM6_DW0_ZX" },
+    { "XMM7_DW0_ZX" },
+    { "XMM8_DW0_ZX" },
+    { "XMM9_DW0_ZX" },
+    { "XMM10_DW0_ZX" },
+    { "XMM11_DW0_ZX" },
+    { "XMM12_DW0_ZX" },
+    { "XMM13_DW0_ZX" },
+    { "XMM14_DW0_ZX" },
+    { "XMM15_DW0_ZX" },
+    { "YMM0" },
+    { "YMM1" },
+    { "YMM2" },
+    { "YMM3" },
+    { "YMM4" },
+    { "YMM5" },
+    { "YMM6" },
+    { "YMM7" },
+    { "YMM8" },
+    { "YMM9" },
+    { "YMM10" },
+    { "YMM11" },
+    { "YMM12" },
+    { "YMM13" },
+    { "YMM14" },
+    { "YMM15" },
+
+    { "VALXCPT" },
+};
+AssertCompile(RT_ELEMENTS(g_aszBs3Cg1DstFields) >= BS3CG1DST_END);
+AssertCompile(RT_ELEMENTS(g_aszBs3Cg1DstFields) == BS3CG1DST_END);
+
+#endif
+
+#if 0
+static const struct
+{
+    uint8_t     cbPrefixes;
+    uint8_t     abPrefixes[14];
+    uint16_t    fEffective;
+} g_aPrefixVariations[] =
+{
+    { 0, { 0x00 }, BS3CG1_PF_NONE },
+
+    { 1, { P_OZ }, BS3CG1_PF_OZ },
+    { 1, { P_CS }, BS3CG1_PF_CS },
+    { 1, { P_DS }, BS3CG1_PF_DS },
+    { 1, { P_ES }, BS3CG1_PF_ES },
+    { 1, { P_FS }, BS3CG1_PF_FS },
+    { 1, { P_GS }, BS3CG1_PF_GS },
+    { 1, { P_SS }, BS3CG1_PF_SS },
+    { 1, { P_LK }, BS3CG1_PF_LK },
+
+    { 2, { P_CS, P_OZ, }, BS3CG1_PF_CS | BS3CFG1_PF_OZ },
+    { 2, { P_DS, P_OZ, }, BS3CG1_PF_DS | BS3CFG1_PF_OZ },
+    { 2, { P_ES, P_OZ, }, BS3CG1_PF_ES | BS3CFG1_PF_OZ },
+    { 2, { P_FS, P_OZ, }, BS3CG1_PF_FS | BS3CFG1_PF_OZ },
+    { 2, { P_GS, P_OZ, }, BS3CG1_PF_GS | BS3CFG1_PF_OZ },
+    { 2, { P_GS, P_OZ, }, BS3CG1_PF_SS | BS3CFG1_PF_OZ },
+    { 2, { P_SS, P_OZ, }, BS3CG1_PF_SS | BS3CFG1_PF_OZ },
+
+    { 2, { P_OZ, P_CS, }, BS3CG1_PF_CS | BS3CFG1_PF_OZ },
+    { 2, { P_OZ, P_DS, }, BS3CG1_PF_DS | BS3CFG1_PF_OZ },
+    { 2, { P_OZ, P_ES, }, BS3CG1_PF_ES | BS3CFG1_PF_OZ },
+    { 2, { P_OZ, P_FS, }, BS3CG1_PF_FS | BS3CFG1_PF_OZ },
+    { 2, { P_OZ, P_GS, }, BS3CG1_PF_GS | BS3CFG1_PF_OZ },
+    { 2, { P_OZ, P_GS, }, BS3CG1_PF_SS | BS3CFG1_PF_OZ },
+    { 2, { P_OZ, P_SS, }, BS3CG1_PF_SS | BS3CFG1_PF_OZ },
+};
+
+static const uint16_t g_afPfxKindToIgnoredFlags[BS3CG1PFXKIND_END] =
+{
+    /* [BS3CG1PFXKIND_INVALID] = */              UINT16_MAX,
+    /* [BS3CG1PFXKIND_MODRM] = */                0,
+    /* [BS3CG1PFXKIND_MODRM_NO_OP_SIZES] = */    BS3CG1_PF_OZ | BS3CG1_PF_W,
+};
+
+#endif
+
+
+/**
+ * Checks if >= 16 byte SSE/AVX alignment are exempted for the exception type.
+ *
+ * @returns true / false.
+ * @param   enmXcptType         The type to check.
+ */
+static bool BS3_NEAR_CODE Bs3Cg1XcptTypeIsUnaligned(BS3CG1XCPTTYPE enmXcptType)
+{
+    switch (enmXcptType)
+    {
+        case BS3CG1XCPTTYPE_4UA:
+        case BS3CG1XCPTTYPE_5:
+            return true;
+        default:
+            return false;
+    }
+}
+
+
+/**
+ * Inserts a 2-byte VEX prefix.
+ *
+ * @returns New offDst value.
+ * @param   pThis       The state.
+ * @param   offDst      The current instruction offset.
+ * @param   uVexL       The VEX.L value.
+ * @param   uVexV       The VEX.V value (caller inverted it already).
+ * @param   uVexR       The VEX.R value (caller inverted it already).
+ */
+DECLINLINE(unsigned) BS3_NEAR_CODE Bs3Cg1InsertVex2bPrefix(PBS3CG1STATE pThis, unsigned offDst,
+                                                           uint8_t uVexV, uint8_t uVexL, uint8_t uVexR)
+{
+    uint8_t b = uVexR << 7;
+    b        |= uVexV << 3;
+    b        |= uVexL << 2;
+    switch (pThis->enmPrefixKind)
+    {
+        case BS3CG1PFXKIND_NO_F2_F3_66:     b |= 0; break;
+        case BS3CG1PFXKIND_REQ_66:          b |= 1; break;
+        case BS3CG1PFXKIND_REQ_F3:          b |= 2; break;
+        case BS3CG1PFXKIND_REQ_F2:          b |= 3; break;
+        default:
+            Bs3TestFailedF("enmPrefixKind=%d not supported for VEX!\n");
+            break;
+    }
+
+    pThis->abCurInstr[offDst]     = 0xc5; /* vex2 */
+    pThis->abCurInstr[offDst + 1] = b;
+    return offDst + 2;
+}
+
+
+/**
+ * Inserts a 3-byte VEX prefix.
+ *
+ * @returns New offDst value.
+ * @param   pThis       The state.
+ * @param   offDst      The current instruction offset.
+ * @param   uVexL       The VEX.L value.
+ * @param   uVexV       The VEX.V value (caller inverted it already).
+ * @param   uVexR       The VEX.R value (caller inverted it already).
+ * @param   uVexR       The VEX.X value (caller inverted it already).
+ * @param   uVexR       The VEX.B value (caller inverted it already).
+ * @param   uVexR       The VEX.W value (straight).
+ */
+DECLINLINE(unsigned) BS3_NEAR_CODE Bs3Cg1InsertVex3bPrefix(PBS3CG1STATE pThis, unsigned offDst, uint8_t uVexV, uint8_t uVexL,
+                                                           uint8_t uVexR, uint8_t uVexX, uint8_t uVexB, uint8_t uVexW)
+{
+    uint8_t b1;
+    uint8_t b2;
+    b1        = uVexR << 7;
+    b1       |= uVexX << 6;
+    b1       |= uVexB << 5;
+    b1       |= 1; /* VEX.mmmmm = 1*/ /** @todo three byte opcode tables */
+    b2        = uVexV << 3;
+    b2       |= uVexW << 7;
+    b2       |= uVexL << 2;
+    switch (pThis->enmPrefixKind)
+    {
+        case BS3CG1PFXKIND_NO_F2_F3_66:     b2 |= 0; break;
+        case BS3CG1PFXKIND_REQ_66:          b2 |= 1; break;
+        case BS3CG1PFXKIND_REQ_F3:          b2 |= 2; break;
+        case BS3CG1PFXKIND_REQ_F2:          b2 |= 3; break;
+        default:
+            Bs3TestFailedF("enmPrefixKind=%d not supported for VEX!\n", pThis->enmPrefixKind);
+            break;
+    }
+
+    pThis->abCurInstr[offDst]     = 0xc4; /* vex3 */
+    pThis->abCurInstr[offDst + 1] = b1;
+    pThis->abCurInstr[offDst + 2] = b2;
+    return offDst + 3;
+}
+
+
+DECLINLINE(unsigned) BS3_NEAR_CODE Bs3Cg1InsertReqPrefix(PBS3CG1STATE pThis, unsigned offDst)
+{
+    switch (pThis->enmPrefixKind)
+    {
+        case BS3CG1PFXKIND_REQ_66:
+            pThis->abCurInstr[offDst] = 0x66;
+            break;
+        case BS3CG1PFXKIND_REQ_F2:
+            pThis->abCurInstr[offDst] = 0xf2;
+            break;
+        case BS3CG1PFXKIND_REQ_F3:
+            pThis->abCurInstr[offDst] = 0xf3;
+            break;
+        default:
+            return offDst;
+    }
+    return offDst + 1;
+}
+
+
+DECLINLINE(unsigned) BS3_NEAR_CODE Bs3Cg1InsertOpcodes(PBS3CG1STATE pThis, unsigned offDst)
+{
+    switch (pThis->cbOpcodes)
+    {
+        case 4: pThis->abCurInstr[offDst + 3] = pThis->abOpcodes[3];
+        case 3: pThis->abCurInstr[offDst + 2] = pThis->abOpcodes[2];
+        case 2: pThis->abCurInstr[offDst + 1] = pThis->abOpcodes[1];
+        case 1: pThis->abCurInstr[offDst]     = pThis->abOpcodes[0];
+            return offDst + pThis->cbOpcodes;
+
+        default:
+            BS3_ASSERT(0);
+            return 0;
+    }
+}
+
+
+/**
+ * Cleans up state and context changes made by the encoder.
+ *
+ * @param   pThis       The state.
+ */
+static void BS3_NEAR_CODE Bs3Cg1EncodeCleanup(PBS3CG1STATE pThis)
+{
+    /* Restore the DS registers in the contexts. */
+    unsigned iRing = 4;
+    while (iRing-- > 0)
+        pThis->aInitialCtxs[iRing].ds = pThis->aSavedSegRegs[iRing].ds;
+
+    switch (pThis->enmEncoding)
+    {
+        /* Most encodings currently doesn't need any special cleaning up. */
+        default:
+            return;
+    }
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cfg1EncodeMemMod0Disp(PBS3CG1STATE pThis, bool fAddrOverride, unsigned off, uint8_t iReg,
+                                                       uint8_t cbOp, uint8_t cbMissalign, BS3CG1OPLOC enmLocation)
+{
+    pThis->aOperands[pThis->iRmOp].idxField     = BS3CG1DST_INVALID;
+    pThis->aOperands[pThis->iRmOp].enmLocation  = enmLocation;
+    pThis->aOperands[pThis->iRmOp].cbOp         = cbOp;
+    pThis->aOperands[pThis->iRmOp].off          = cbOp + cbMissalign;
+
+    if (   BS3_MODE_IS_16BIT_CODE(pThis->bMode)
+        || (fAddrOverride && BS3_MODE_IS_32BIT_CODE(pThis->bMode)) )
+    {
+        /*
+         * 16-bit code doing 16-bit or 32-bit addressing,
+         * or 32-bit code doing 16-bit addressing.
+         */
+        unsigned iRing = 4;
+        if (BS3_MODE_IS_RM_OR_V86(pThis->bMode))
+            while (iRing-- > 0)
+                pThis->aInitialCtxs[iRing].ds = pThis->DataPgFar.sel;
+        else
+            while (iRing-- > 0)
+                pThis->aInitialCtxs[iRing].ds = pThis->DataPgFar.sel | iRing;
+        if (!fAddrOverride || BS3_MODE_IS_32BIT_CODE(pThis->bMode))
+        {
+            pThis->abCurInstr[off++] = X86_MODRM_MAKE(0, iReg, 6 /*disp16*/);
+            *(uint16_t *)&pThis->abCurInstr[off] = pThis->DataPgFar.off + X86_PAGE_SIZE - cbOp - cbMissalign;
+            off += 2;
+        }
+        else
+        {
+            pThis->abCurInstr[off++] = X86_MODRM_MAKE(0, iReg, 5 /*disp32*/);
+            *(uint32_t *)&pThis->abCurInstr[off] = pThis->DataPgFar.off + X86_PAGE_SIZE - cbOp - cbMissalign;
+            off += 4;
+        }
+    }
+    else
+    {
+        /*
+         * 32-bit code doing 32-bit addressing,
+         * or 64-bit code doing either 64-bit or 32-bit addressing.
+         */
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(0, iReg, 5 /*disp32*/);
+        *(uint32_t *)&pThis->abCurInstr[off] = BS3_FP_OFF(pThis->pbDataPg) + X86_PAGE_SIZE - cbOp - cbMissalign;
+
+        /* In 64-bit mode we always have a rip relative encoding regardless of fAddrOverride. */
+        if (BS3_MODE_IS_64BIT_CODE(pThis->bMode))
+            *(uint32_t *)&pThis->abCurInstr[off] -= BS3_FP_OFF(&pThis->pbCodePg[X86_PAGE_SIZE]);
+        off += 4;
+    }
+
+    /*
+     * Fill the memory with 0xcc.
+     */
+    switch (cbOp + cbMissalign)
+    {
+        case 8: pThis->pbDataPg[X86_PAGE_SIZE - 8] = 0xcc;  /* fall thru */
+        case 7: pThis->pbDataPg[X86_PAGE_SIZE - 7] = 0xcc;  /* fall thru */
+        case 6: pThis->pbDataPg[X86_PAGE_SIZE - 6] = 0xcc;  /* fall thru */
+        case 5: pThis->pbDataPg[X86_PAGE_SIZE - 5] = 0xcc;  /* fall thru */
+        case 4: pThis->pbDataPg[X86_PAGE_SIZE - 4] = 0xcc;  /* fall thru */
+        case 3: pThis->pbDataPg[X86_PAGE_SIZE - 3] = 0xcc;  /* fall thru */
+        case 2: pThis->pbDataPg[X86_PAGE_SIZE - 2] = 0xcc;  /* fall thru */
+        case 1: pThis->pbDataPg[X86_PAGE_SIZE - 1] = 0xcc;  /* fall thru */
+        case 0: break;
+        default:
+        {
+            BS3CG1_DPRINTF(("Bs3MemSet(%p,%#x,%#x)\n", &pThis->pbDataPg[X86_PAGE_SIZE - cbOp - cbMissalign], 0xcc, cbOp - cbMissalign));
+            Bs3MemSet(&pThis->pbDataPg[X86_PAGE_SIZE - cbOp - cbMissalign], 0xcc, cbOp - cbMissalign);
+            break;
+        }
+    }
+
+    return off;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Eb_Gb(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    /* Start by reg,reg encoding. */
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, X86_GREG_xAX, X86_GREG_xCX);
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_AL;
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_CL;
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_CH;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, X86_GREG_xBP, 1, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 2 && (g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80386)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_BH;
+        pThis->abCurInstr[0] = P_AZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 1));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, true, off, X86_GREG_xDI, 1, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Gb_Eb(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    /* Start by reg,reg encoding. */
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, X86_GREG_xAX, X86_GREG_xCX);
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_AL;
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_CL;
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_CH;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, X86_GREG_xBP, 1, 0, BS3CG1OPLOC_MEM);
+    }
+    else if (iEncoding == 2 && (g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80386)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_BH;
+        pThis->abCurInstr[0] = P_AZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 1));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, true, off, X86_GREG_xDI, 1, 0, BS3CG1OPLOC_MEM);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Gv_Ev__OR__BS3CG1ENC_MODRM_Ev_Gv(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    unsigned cbOp;
+    if (iEncoding == 0)
+    {
+        cbOp = BS3_MODE_IS_16BIT_CODE(pThis->bMode) ? 2 : 4;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, X86_GREG_xBX, X86_GREG_xDX);
+        pThis->aOperands[pThis->iRegOp].idxField    = BS3CG1DST_OZ_RBX;
+        pThis->aOperands[pThis->iRmOp ].idxField    = BS3CG1DST_OZ_RDX;
+    }
+    else if (iEncoding == 1)
+    {
+        cbOp = BS3_MODE_IS_16BIT_CODE(pThis->bMode) ? 2 : 4;
+        pThis->aOperands[pThis->iRegOp].idxField    = BS3CG1DST_OZ_RBP;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, X86_GREG_xBP, cbOp, 0,
+                                       pThis->enmEncoding == BS3CG1ENC_MODRM_Gv_Ev ? BS3CG1OPLOC_MEM : BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 2 && (g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80386)
+    {
+        cbOp = BS3_MODE_IS_16BIT_CODE(pThis->bMode) ? 4 : 2;
+        pThis->abCurInstr[0] = P_OZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 1));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, X86_GREG_xBX, X86_GREG_xDX);
+        pThis->aOperands[pThis->iRegOp].idxField    = BS3CG1DST_OZ_RBX;
+        pThis->aOperands[pThis->iRmOp ].idxField    = BS3CG1DST_OZ_RDX;
+        pThis->aOperands[pThis->iRmOp ].enmLocation = BS3CG1OPLOC_CTX;
+    }
+    else if (iEncoding == 3)
+    {
+        cbOp = BS3_MODE_IS_16BIT_CODE(pThis->bMode) ? 4 : 2;
+        pThis->aOperands[pThis->iRegOp].idxField    = BS3CG1DST_OZ_RSI;
+        pThis->abCurInstr[0] = P_OZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 1));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, X86_GREG_xSI, cbOp, 0,
+                                       pThis->enmEncoding == BS3CG1ENC_MODRM_Gv_Ev ? BS3CG1OPLOC_MEM : BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 4)
+    {
+        cbOp = BS3_MODE_IS_16BIT_CODE(pThis->bMode) ? 2 : 4;
+        pThis->aOperands[pThis->iRegOp].idxField    = BS3CG1DST_OZ_RDI;
+        pThis->abCurInstr[0] = P_AZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 1));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, true, off, X86_GREG_xDI, cbOp, 0,
+                                       pThis->enmEncoding == BS3CG1ENC_MODRM_Gv_Ev ? BS3CG1OPLOC_MEM : BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 5)
+    {
+        cbOp = BS3_MODE_IS_16BIT_CODE(pThis->bMode) ? 4 : 2;
+        pThis->aOperands[pThis->iRegOp].idxField    = BS3CG1DST_OZ_RSI;
+        pThis->abCurInstr[0] = P_OZ;
+        pThis->abCurInstr[1] = P_AZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 2));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, true, off, X86_GREG_xSI, cbOp, 0,
+                                       pThis->enmEncoding == BS3CG1ENC_MODRM_Gv_Ev ? BS3CG1OPLOC_MEM : BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 6 && BS3_MODE_IS_64BIT_CODE(pThis->bMode))
+    {
+        cbOp = 8;
+        off = Bs3Cg1InsertReqPrefix(pThis, 0);
+        pThis->abCurInstr[off++] = REX_W___;
+        off = Bs3Cg1InsertOpcodes(pThis, off);
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, X86_GREG_xBX, X86_GREG_xDX);
+        pThis->aOperands[pThis->iRegOp].idxField    = BS3CG1DST_RBX;
+        pThis->aOperands[pThis->iRmOp ].idxField    = BS3CG1DST_RDX;
+        pThis->aOperands[pThis->iRmOp ].enmLocation = BS3CG1OPLOC_CTX;
+    }
+    else
+        return 0;
+    pThis->aOperands[0].cbOp = cbOp;
+    pThis->aOperands[1].cbOp = cbOp;
+    pThis->cbOperand  = cbOp;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Wss_Vss(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 1, 0);
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_XMM0_DW0;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM1_DW0;
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM2_DW0;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 2 /*iReg*/, 4, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 2)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM3_DW0;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 3 /*iReg*/, 4, 1 /*cbMissalign*/, BS3CG1OPLOC_MEM_RW);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Wsd_Vsd(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 1, 0);
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_XMM0_LO;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM1_LO;
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM2_LO;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 2 /*iReg*/, 8, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 2)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM3_LO;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 3 /*iReg*/, 8, 1 /*cbMissalign*/, BS3CG1OPLOC_MEM_RW);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Wps_Vps__OR__BS3CG1ENC_MODRM_Wpd_Vpd(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 1, 0);
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_XMM0;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM1;
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM2;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 2 /*iReg*/, 16, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 2)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM3;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 3 /*iReg*/, 16, 1 /*cbMissalign*/, BS3CG1OPLOC_MEM_RW);
+        if (!Bs3Cg1XcptTypeIsUnaligned(pThis->enmXcptType))
+            pThis->bAlignmentXcpt = X86_XCPT_GP;
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_WqZxReg_Vq(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 1, 0);
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_XMM0_LO_ZX;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM1_LO;
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM2_LO;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 2 /*iReg*/, 8, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 2)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM3_LO;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 3 /*iReg*/, 8, 1 /*cbMissalign*/, BS3CG1OPLOC_MEM_RW);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Vq_UqHi(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 1, 0);
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_XMM0_HI;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM1_LO;
+    }
+    else if (iEncoding == 1)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 2, 2);
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_XMM2_HI;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM2_LO;
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Vq_Mq(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM2_LO;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 2 /*iReg*/, 8, 0, BS3CG1OPLOC_MEM);
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM3_LO;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 3 /*iReg*/, 8, 1 /*cbMissalign*/, BS3CG1OPLOC_MEM);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Vdq_Wdq(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 1, 0);
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_XMM0;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM1;
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM2;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 2 /*iReg*/, 16, 0, BS3CG1OPLOC_MEM);
+    }
+    else if (iEncoding == 2)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM3;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 3 /*iReg*/, 16, 1 /*cbMissalign*/, BS3CG1OPLOC_MEM);
+        if (!Bs3Cg1XcptTypeIsUnaligned(pThis->enmXcptType))
+            pThis->bAlignmentXcpt = X86_XCPT_GP;
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_VssZxReg_Wss(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 1, 0);
+        pThis->aOperands[pThis->iRmOp ].idxField = BS3CG1DST_XMM0_LO;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM1_DW0_ZX;
+    }
+    else if (iEncoding == 1)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM2_DW0_ZX;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 2 /*iReg*/, 4, 0, BS3CG1OPLOC_MEM);
+    }
+    else if (iEncoding == 2)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_XMM3_DW0_ZX;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, 3 /*iReg*/, 4, 1 /*cbMissalign*/, BS3CG1OPLOC_MEM);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Gv_Ma(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    unsigned cbOp = BS3_MODE_IS_16BIT_CODE(pThis->bMode) ? 2 : 4;
+    if (iEncoding == 0)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_OZ_RBP;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, X86_GREG_xBP, cbOp * 2, 0, BS3CG1OPLOC_MEM);
+    }
+    else if (iEncoding == 1 && (g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80386)
+    {
+        cbOp = cbOp == 2 ? 4 : 2;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_OZ_RBP;
+        pThis->abCurInstr[0] = P_OZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 1));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off, X86_GREG_xBP, cbOp * 2, 0, BS3CG1OPLOC_MEM);
+    }
+    else if (iEncoding == 2)
+    {
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_OZ_RBP;
+        pThis->abCurInstr[0] = P_AZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 1));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, true, off, X86_GREG_xBP, cbOp * 2, 0, BS3CG1OPLOC_MEM);
+    }
+    else if (iEncoding == 3)
+    {
+        cbOp = cbOp == 2 ? 4 : 2;
+        pThis->aOperands[pThis->iRegOp].idxField = BS3CG1DST_OZ_RBP;
+        pThis->abCurInstr[0] = P_AZ;
+        pThis->abCurInstr[1] = P_OZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 2));
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, true, off, X86_GREG_xBP, cbOp * 2, 0, BS3CG1OPLOC_MEM);
+    }
+    else
+        return 0;
+    pThis->aOperands[pThis->iRegOp].cbOp = cbOp;
+    pThis->cbOperand  = cbOp;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MbRO(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0)) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       1, 0, BS3CG1OPLOC_MEM);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MdRO(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0)) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MdWO(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0)) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_VEX_MODRM_MdWO(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        /** @todo three by opcode needs some tweaking. */
+        off = Bs3Cg1InsertVex2bPrefix(pThis, 0 /*offDst*/, 0xf /*~V*/, 0 /*L*/, 1 /*~R*/);
+        off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 1)
+    {
+        off = Bs3Cg1InsertVex3bPrefix(pThis, 0 /*offDst*/, 0xf /*~V*/, 0 /*L*/, 1 /*~R*/, 1 /*~X*/, 1 /*~B*/, 0 /*W*/);
+        off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+    }
+    else if (iEncoding == 2)
+    {
+        off = Bs3Cg1InsertVex3bPrefix(pThis, 0 /*offDst*/, 0x7 /*~V*/, 0 /*L*/, 1 /*~R*/, 1 /*~X*/, 1 /*~B*/, 0 /*W*/);
+        off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+        pThis->fInvalidEncoding = true;
+    }
+    else if (iEncoding == 3)
+    {
+        off = Bs3Cg1InsertVex3bPrefix(pThis, 0 /*offDst*/, 0xf /*~V*/, 1 /*L*/, 1 /*~R*/, 1 /*~X*/, 1 /*~B*/, 0 /*W*/);
+        off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+        pThis->fInvalidEncoding = true;
+    }
+    else if (iEncoding == 4)
+    {
+        pThis->abCurInstr[0] = P_OZ;
+        off = Bs3Cg1InsertVex3bPrefix(pThis, 1 /*offDst*/, 0xf /*~V*/, 0 /*L*/, 1 /*~R*/, 1 /*~X*/, 1 /*~B*/, 0 /*W*/);
+        off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+        pThis->fInvalidEncoding = true;
+    }
+    else if (iEncoding == 5)
+    {
+        pThis->abCurInstr[0] = P_RZ;
+        off = Bs3Cg1InsertVex3bPrefix(pThis, 1 /*offDst*/, 0xf /*~V*/, 0 /*L*/, 1 /*~R*/, 1 /*~X*/, 1 /*~B*/, 0 /*W*/);
+        off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+        pThis->fInvalidEncoding = true;
+    }
+    else if (iEncoding == 6)
+    {
+        pThis->abCurInstr[0] = P_RN;
+        off = Bs3Cg1InsertVex3bPrefix(pThis, 1 /*offDst*/, 0xf /*~V*/, 0 /*L*/, 1 /*~R*/, 1 /*~X*/, 1 /*~B*/, 0 /*W*/);
+        off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+        pThis->fInvalidEncoding = true;
+    }
+    else if (iEncoding == 7)
+    {
+        off = Bs3Cg1InsertVex3bPrefix(pThis, 0 /*offDst*/, 0xf /*~V*/, 0 /*L*/, 1 /*~R*/, 1 /*~X*/, 1 /*~B*/, 1 /*W*/);
+        off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+        off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                       (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                       4, 0, BS3CG1OPLOC_MEM_RW);
+    }
+#if ARCH_BITS == 64
+    else if (BS3_MODE_IS_64BIT_CODE(pThis->bMode))
+    {
+        if (iEncoding == 8)
+        {
+            pThis->abCurInstr[0] = REX_____;
+            off = Bs3Cg1InsertVex3bPrefix(pThis, 1 /*offDst*/, 0xf /*~V*/, 0 /*L*/, 1 /*~R*/, 1 /*~X*/, 1 /*~B*/, 0 /*W*/);
+            off = Bs3Cg1InsertOpcodes(pThis, off) - 1;
+            off = Bs3Cfg1EncodeMemMod0Disp(pThis, false, off,
+                                           (pThis->abCurInstr[off] & X86_MODRM_REG_MASK) >> X86_MODRM_REG_SHIFT,
+                                           4, 0, BS3CG1OPLOC_MEM_RW);
+            pThis->fInvalidEncoding = true;
+        }
+        else
+            return 0;
+    }
+#endif
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_FIXED(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->cbCurInstr = off;
+        iEncoding++;
+    }
+    else
+        return 0;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_FIXED_AL_Ib(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->aOperands[1].off = (uint8_t)off;
+        pThis->abCurInstr[off++] = 0xff;
+        pThis->cbCurInstr = off;
+    }
+    else
+        return 0;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_FIXED_rAX_Iz(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding == 0)
+    {
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 0));
+        pThis->aOperands[1].off = (uint8_t)off;
+        if (BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+        {
+            *(uint16_t *)&pThis->abCurInstr[off] = UINT16_MAX;
+            off += 2;
+            pThis->aOperands[0].cbOp = 2;
+            pThis->aOperands[1].cbOp = 2;
+            pThis->cbOperand         = 2;
+        }
+        else
+        {
+            *(uint32_t *)&pThis->abCurInstr[off] = UINT32_MAX;
+            off += 4;
+            pThis->aOperands[0].cbOp = 4;
+            pThis->aOperands[1].cbOp = 4;
+            pThis->cbOperand         = 4;
+        }
+    }
+    else if (iEncoding == 1 && (g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80386)
+    {
+        pThis->abCurInstr[0] = P_OZ;
+        off = Bs3Cg1InsertOpcodes(pThis, Bs3Cg1InsertReqPrefix(pThis, 1));
+        pThis->aOperands[1].off = (uint8_t)off;
+        if (!BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+        {
+            *(uint16_t *)&pThis->abCurInstr[off] = UINT16_MAX;
+            off += 2;
+            pThis->aOperands[0].cbOp = 2;
+            pThis->aOperands[1].cbOp = 2;
+            pThis->cbOperand         = 2;
+        }
+        else
+        {
+            *(uint32_t *)&pThis->abCurInstr[off] = UINT32_MAX;
+            off += 4;
+            pThis->aOperands[0].cbOp = 4;
+            pThis->aOperands[1].cbOp = 4;
+            pThis->cbOperand         = 4;
+        }
+    }
+    else if (iEncoding == 2 && BS3_MODE_IS_64BIT_CODE(pThis->bMode))
+    {
+        off = Bs3Cg1InsertReqPrefix(pThis, 0);
+        pThis->abCurInstr[off++] = REX_W___;
+        off = Bs3Cg1InsertOpcodes(pThis, off);
+        pThis->aOperands[1].off = (uint8_t)off;
+        *(uint32_t *)&pThis->abCurInstr[off] = UINT32_MAX;
+        off += 4;
+        pThis->aOperands[0].cbOp = 8;
+        pThis->aOperands[1].cbOp = 4;
+        pThis->cbOperand         = 8;
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MOD_EQ_3(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding < 8)
+    {
+        off = Bs3Cg1InsertReqPrefix(pThis, 0);
+        off = Bs3Cg1InsertOpcodes(pThis, off);
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, iEncoding, 1);
+    }
+    else if (iEncoding < 16)
+    {
+        off = Bs3Cg1InsertReqPrefix(pThis, 0);
+        off = Bs3Cg1InsertOpcodes(pThis, off);
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(3, 0, iEncoding);
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+
+    return iEncoding + 1;
+}
+
+
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MOD_NE_3(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    unsigned off;
+    if (iEncoding < 3)
+    {
+        off = Bs3Cg1InsertReqPrefix(pThis, 0);
+        off = Bs3Cg1InsertOpcodes(pThis, off);
+        pThis->abCurInstr[off++] = X86_MODRM_MAKE(iEncoding, 0, 1);
+        if (iEncoding >= 1)
+            pThis->abCurInstr[off++] = 0x7f;
+        if (iEncoding == 2)
+        {
+            pThis->abCurInstr[off++] = 0x5f;
+            if (!BS3_MODE_IS_16BIT_CODE(pThis->bMode))
+            {
+                pThis->abCurInstr[off++] = 0x3f;
+                pThis->abCurInstr[off++] = 0x1f;
+            }
+        }
+    }
+    else
+        return 0;
+    pThis->cbCurInstr = off;
+    return iEncoding + 1;
+}
+
+
+/**
+ * Encodes the next instruction.
+ *
+ * @returns Next iEncoding value.  Returns @a iEncoding unchanged to indicate
+ *          that there are no more encodings to test.
+ * @param   pThis           The state.
+ * @param   iEncoding       The encoding to produce.  Meaning is specific to
+ *                          each BS3CG1ENC_XXX value and should be considered
+ *                          internal.
+ */
+static unsigned BS3_NEAR_CODE Bs3Cg1EncodeNext(PBS3CG1STATE pThis, unsigned iEncoding)
+{
+    pThis->bAlignmentXcpt = UINT8_MAX;
+
+    switch (pThis->enmEncoding)
+    {
+        case BS3CG1ENC_MODRM_Eb_Gb:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Eb_Gb(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_Gb_Eb:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Gb_Eb(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_Gv_Ev:
+        case BS3CG1ENC_MODRM_Ev_Gv:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Gv_Ev__OR__BS3CG1ENC_MODRM_Ev_Gv(pThis, iEncoding);
+
+        case BS3CG1ENC_MODRM_Wss_Vss:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Wss_Vss(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_Wsd_Vsd:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Wsd_Vsd(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_Wps_Vps:
+        case BS3CG1ENC_MODRM_Wpd_Vpd:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Wps_Vps__OR__BS3CG1ENC_MODRM_Wpd_Vpd(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_WqZxReg_Vq:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_WqZxReg_Vq(pThis, iEncoding);
+
+        case BS3CG1ENC_MODRM_Vq_UqHi:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Vq_UqHi(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_Vq_Mq:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Vq_Mq(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_Vdq_Wdq:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Vdq_Wdq(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_VssZxReg_Wss:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_VssZxReg_Wss(pThis, iEncoding);
+
+        case BS3CG1ENC_MODRM_Gv_Ma:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_Gv_Ma(pThis, iEncoding);
+
+        case BS3CG1ENC_MODRM_MbRO:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MbRO(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_MdRO:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MdRO(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_MdWO:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MdWO(pThis, iEncoding);
+        case BS3CG1ENC_VEX_MODRM_MdWO:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_VEX_MODRM_MdWO(pThis, iEncoding);
+
+        case BS3CG1ENC_FIXED:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_FIXED(pThis, iEncoding);
+        case BS3CG1ENC_FIXED_AL_Ib:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_FIXED_AL_Ib(pThis, iEncoding);
+        case BS3CG1ENC_FIXED_rAX_Iz:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_FIXED_rAX_Iz(pThis, iEncoding);
+
+        case BS3CG1ENC_MODRM_MOD_EQ_3:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MOD_EQ_3(pThis, iEncoding);
+        case BS3CG1ENC_MODRM_MOD_NE_3:
+            return Bs3Cg1EncodeNext_BS3CG1ENC_MODRM_MOD_NE_3(pThis, iEncoding);
+
+        default:
+            Bs3TestFailedF("Internal error! BS3CG1ENC_XXX = %u not implemented", pThis->enmEncoding);
+            break;
+    }
+
+
+    return iEncoding;
+}
+
+
+/**
+ * Prepares doing instruction encodings.
+ *
+ * This is in part specific to how the instruction is encoded, but generally it
+ * sets up basic operand values that doesn't change (much) when Bs3Cg1EncodeNext
+ * is called from within the loop.
+ *
+ * @returns Success indicator (true/false).
+ * @param   pThis           The state.
+ */
+#define Bs3Cg1EncodePrep BS3_CMN_NM(Bs3Cg1EncodePrep)
+bool BS3_NEAR_CODE Bs3Cg1EncodePrep(PBS3CG1STATE pThis)
+{
+    unsigned iRing = 4;
+    while (iRing-- > 0)
+        pThis->aSavedSegRegs[iRing].ds = pThis->aInitialCtxs[iRing].ds;
+
+    pThis->iRmOp            = RT_ELEMENTS(pThis->aOperands) - 1;
+    pThis->iRegOp           = RT_ELEMENTS(pThis->aOperands) - 1;
+    pThis->fSameRingNotOkay = false;
+    pThis->cbOperand        = 0;
+
+    switch (pThis->enmEncoding)
+    {
+        case BS3CG1ENC_MODRM_Eb_Gb:
+            pThis->iRmOp             = 0;
+            pThis->iRegOp            = 1;
+            pThis->aOperands[0].cbOp = 1;
+            pThis->aOperands[1].cbOp = 1;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Ev_Gv:
+            pThis->iRmOp             = 0;
+            pThis->iRegOp            = 1;
+            pThis->cbOperand         = 2;
+            pThis->aOperands[0].cbOp = 2;
+            pThis->aOperands[1].cbOp = 2;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Gb_Eb:
+            pThis->iRmOp             = 1;
+            pThis->iRegOp            = 0;
+            pThis->aOperands[0].cbOp = 1;
+            pThis->aOperands[1].cbOp = 1;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Gv_Ev:
+            pThis->iRmOp             = 1;
+            pThis->iRegOp            = 0;
+            pThis->cbOperand         = 2;
+            pThis->aOperands[0].cbOp = 2;
+            pThis->aOperands[1].cbOp = 2;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Gv_Ma:
+            pThis->iRmOp             = 1;
+            pThis->iRegOp            = 0;
+            pThis->cbOperand         = 2;
+            pThis->aOperands[0].cbOp = 2;
+            pThis->aOperands[1].cbOp = 4;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_MEM;
+            pThis->aOperands[1].idxField    = BS3CG1DST_INVALID;
+            break;
+
+        case BS3CG1ENC_MODRM_Wss_Vss:
+            pThis->iRmOp             = 0;
+            pThis->iRegOp            = 1;
+            pThis->aOperands[0].cbOp = 4;
+            pThis->aOperands[1].cbOp = 4;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Wsd_Vsd:
+        case BS3CG1ENC_MODRM_WqZxReg_Vq:
+            pThis->iRmOp             = 0;
+            pThis->iRegOp            = 1;
+            pThis->aOperands[0].cbOp = 8;
+            pThis->aOperands[1].cbOp = 8;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Wps_Vps:
+        case BS3CG1ENC_MODRM_Wpd_Vpd:
+            pThis->iRmOp             = 0;
+            pThis->iRegOp            = 1;
+            pThis->aOperands[0].cbOp = 16;
+            pThis->aOperands[1].cbOp = 16;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Vdq_Wdq:
+            pThis->iRmOp             = 1;
+            pThis->iRegOp            = 0;
+            pThis->aOperands[0].cbOp = 16;
+            pThis->aOperands[1].cbOp = 16;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Vq_UqHi:
+            pThis->iRmOp             = 1;
+            pThis->iRegOp            = 0;
+            pThis->aOperands[0].cbOp = 8;
+            pThis->aOperands[1].cbOp = 8;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_Vq_Mq:
+            pThis->iRmOp             = 1;
+            pThis->iRegOp            = 0;
+            pThis->aOperands[0].cbOp = 8;
+            pThis->aOperands[1].cbOp = 8;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_MEM;
+            break;
+
+        case BS3CG1ENC_MODRM_VssZxReg_Wss:
+            pThis->iRmOp             = 1;
+            pThis->iRegOp            = 0;
+            pThis->aOperands[0].cbOp = 4;
+            pThis->aOperands[1].cbOp = 4;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_CTX;
+            break;
+
+        case BS3CG1ENC_MODRM_MbRO:
+            pThis->iRmOp             = 0;
+            pThis->aOperands[0].cbOp = 1;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_MEM;
+            break;
+
+        case BS3CG1ENC_MODRM_MdRO:
+            pThis->iRmOp             = 0;
+            pThis->aOperands[0].cbOp = 4;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_MEM;
+            break;
+
+        case BS3CG1ENC_MODRM_MdWO:
+        case BS3CG1ENC_VEX_MODRM_MdWO:
+            pThis->iRmOp             = 0;
+            pThis->aOperands[0].cbOp = 4;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_MEM_RW;
+            break;
+
+        case BS3CG1ENC_FIXED:
+            /* nothing to do here */
+            break;
+
+        case BS3CG1ENC_FIXED_AL_Ib:
+            pThis->aOperands[0].cbOp = 1;
+            pThis->aOperands[1].cbOp = 1;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_IMM;
+            pThis->aOperands[0].idxField    = BS3CG1DST_AL;
+            pThis->aOperands[1].idxField    = BS3CG1DST_INVALID;
+            break;
+
+        case BS3CG1ENC_FIXED_rAX_Iz:
+            pThis->aOperands[0].cbOp = 2;
+            pThis->aOperands[1].cbOp = 2;
+            pThis->aOperands[0].enmLocation = BS3CG1OPLOC_CTX;
+            pThis->aOperands[1].enmLocation = BS3CG1OPLOC_IMM;
+            pThis->aOperands[0].idxField    = BS3CG1DST_OZ_RAX;
+            pThis->aOperands[1].idxField    = BS3CG1DST_INVALID;
+            break;
+
+        case BS3CG1ENC_MODRM_MOD_EQ_3:
+        case BS3CG1ENC_MODRM_MOD_NE_3:
+            /* Unused or invalid instructions mostly. */
+            break;
+
+        default:
+            Bs3TestFailedF("Invalid/unimplemented enmEncoding for instruction #%RU32 (%.*s): %d",
+                           pThis->iInstr, pThis->cchMnemonic, pThis->pchMnemonic, pThis->enmEncoding);
+            return false;
+    }
+    return true;
+}
+
+
+/**
+ * Calculates the appropriate non-intel invalid instruction encoding.
+ *
+ * @returns the encoding to use instead.
+ * @param   enmEncoding         The intel invalid instruction encoding.
+ */
+static BS3CG1ENC Bs3Cg1CalcNoneIntelInvalidEncoding(BS3CG1ENC enmEncoding)
+{
+    switch (enmEncoding)
+    {
+        case BS3CG1ENC_MODRM_Gb_Eb:
+        case BS3CG1ENC_MODRM_Gv_Ma:
+        case BS3CG1ENC_FIXED:
+            return BS3CG1ENC_FIXED;
+        default:
+            Bs3TestFailedF("Bs3Cg1CalcNoneIntelInvalidEncoding: Unsupported encoding: %d\n", enmEncoding);
+            return BS3CG1ENC_FIXED;
+    }
+}
+
+
+/**
+ * Sets up SSE and maybe AVX.
+ *
+ * @returns true (if successful, false if not and the SSE instructions ends up
+ *          being invalid).
+ * @param   pThis               The state.
+ */
+static bool BS3_NEAR_CODE Bs3Cg3SetupSseAndAvx(PBS3CG1STATE pThis)
+{
+    if (!pThis->fWorkExtCtx)
+    {
+        unsigned i;
+        uint32_t cr0 = ASMGetCR0();
+        uint32_t cr4 = ASMGetCR4();
+
+        cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
+        cr0 |= X86_CR0_NE;
+        ASMSetCR0(cr0);
+        if (pThis->pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
+        {
+            cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
+            ASMSetCR4(cr4);
+            ASMSetXcr0(pThis->pExtCtx->fXcr0Nominal);
+        }
+        else
+        {
+            cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
+            ASMSetCR4(cr4);
+        }
+
+        for (i = 0; i < RT_ELEMENTS(pThis->aInitialCtxs); i++)
+        {
+            pThis->aInitialCtxs[i].cr0.u32 = cr0;
+            pThis->aInitialCtxs[i].cr4.u32 = cr4;
+        }
+        pThis->fWorkExtCtx = true;
+    }
+
+    return true;
+}
+
+
+/**
+ * Next CPU configuration to test the current instruction in.
+ *
+ * This is for testing FPU, SSE and AVX instructions with the various lazy state
+ * load and enable bits in different configurations to ensure we're getting the
+ * right response.
+ *
+ * This also cleans up the CPU and test driver state.
+ *
+ * @returns true if we're to do another round, false if we're done.
+ * @param   pThis           The state.
+ * @param   iCpuSetup       The current CPU setup number.
+ * @param   pfInvalidInstr  Where to indicate whether the setup causes an
+ *                          invalid instruction or not.  This is also used as
+ *                          input to avoid unnecessary CPUID work.
+ */
+static bool BS3_NEAR_CODE Bs3Cg1CpuSetupNext(PBS3CG1STATE pThis, unsigned iCpuSetup, bool BS3_FAR *pfInvalidInstr)
+{
+    if (   (pThis->fFlags & BS3CG1INSTR_F_INVALID_64BIT)
+        && BS3_MODE_IS_64BIT_CODE(pThis->bMode))
+        return false;
+
+    switch (pThis->enmCpuTest)
+    {
+        case BS3CG1CPU_ANY:
+        case BS3CG1CPU_GE_80186:
+        case BS3CG1CPU_GE_80286:
+        case BS3CG1CPU_GE_80386:
+        case BS3CG1CPU_GE_80486:
+        case BS3CG1CPU_GE_Pentium:
+        case BS3CG1CPU_CLFSH:
+        case BS3CG1CPU_CLFLUSHOPT:
+            return false;
+
+        case BS3CG1CPU_SSE:
+        case BS3CG1CPU_SSE2:
+        case BS3CG1CPU_SSE3:
+        case BS3CG1CPU_AVX:
+        case BS3CG1CPU_AVX2:
+            if (iCpuSetup > 0 || *pfInvalidInstr)
+            {
+                /** @todo do more configs here. */
+                pThis->fWorkExtCtx = false;
+                ASMSetCR0(ASMGetCR0() | X86_CR0_EM | X86_CR0_MP);
+                ASMSetCR4(ASMGetCR4() & ~(X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE));
+                return false;
+            }
+            return false;
+
+        default:
+            Bs3TestFailedF("Invalid enmCpuTest value: %d", pThis->enmCpuTest);
+            return false;
+    }
+}
+
+
+/**
+ * Check if the instruction is supported by the CPU, possibly making state
+ * adjustments to enable support for it.
+ *
+ * @returns true if supported, false if not.
+ * @param   pThis               The state.
+ */
+static bool BS3_NEAR_CODE Bs3Cg1CpuSetupFirst(PBS3CG1STATE pThis)
+{
+    uint32_t fEax;
+    uint32_t fEbx;
+    uint32_t fEcx;
+    uint32_t fEdx;
+
+    if (   (pThis->fFlags & BS3CG1INSTR_F_INVALID_64BIT)
+        && BS3_MODE_IS_64BIT_CODE(pThis->bMode))
+        return false;
+
+    switch (pThis->enmCpuTest)
+    {
+        case BS3CG1CPU_ANY:
+            return true;
+
+        case BS3CG1CPU_GE_80186:
+            if ((g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80186)
+                return true;
+            return false;
+
+        case BS3CG1CPU_GE_80286:
+            if ((g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80286)
+                return true;
+            return false;
+
+        case BS3CG1CPU_GE_80386:
+            if ((g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80386)
+                return true;
+            return false;
+
+        case BS3CG1CPU_GE_80486:
+            if ((g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80486)
+                return true;
+            return false;
+
+        case BS3CG1CPU_GE_Pentium:
+            if ((g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_Pentium)
+                return true;
+            return false;
+
+        case BS3CG1CPU_SSE:
+        case BS3CG1CPU_SSE2:
+        case BS3CG1CPU_SSE3:
+        case BS3CG1CPU_AVX:
+            if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
+            {
+                ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
+                switch (pThis->enmCpuTest)
+                {
+                    case BS3CG1CPU_SSE:
+                        if (fEdx & X86_CPUID_FEATURE_EDX_SSE)
+                            return Bs3Cg3SetupSseAndAvx(pThis);
+                        return false;
+                    case BS3CG1CPU_SSE2:
+                        if (fEdx & X86_CPUID_FEATURE_EDX_SSE2)
+                            return Bs3Cg3SetupSseAndAvx(pThis);
+                        return false;
+                    case BS3CG1CPU_SSE3:
+                        if (fEcx & X86_CPUID_FEATURE_ECX_SSE3)
+                            return Bs3Cg3SetupSseAndAvx(pThis);
+                        return false;
+                    case BS3CG1CPU_AVX:
+                        if (fEcx & X86_CPUID_FEATURE_ECX_AVX)
+                            return Bs3Cg3SetupSseAndAvx(pThis) && !BS3_MODE_IS_RM_OR_V86(pThis->bMode);
+                        return false;
+                    default: BS3_ASSERT(0); /* impossible */
+                }
+            }
+            return false;
+
+        case BS3CG1CPU_AVX2:
+            if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
+            {
+                ASMCpuIdExSlow(7, 0, 0/*leaf*/, 0, &fEax, &fEbx, &fEcx, &fEdx);
+                switch (pThis->enmCpuTest)
+                {
+                    case BS3CG1CPU_AVX2:
+                        if (fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2)
+                            return Bs3Cg3SetupSseAndAvx(pThis) && !BS3_MODE_IS_RM_OR_V86(pThis->bMode);
+                        return false;
+                    default: BS3_ASSERT(0); return false; /* impossible */
+                }
+            }
+            return false;
+
+        case BS3CG1CPU_CLFSH:
+            if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
+            {
+                ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, NULL, &fEdx);
+                if (fEdx & X86_CPUID_FEATURE_EDX_CLFSH)
+                    return true;
+            }
+            return false;
+
+        case BS3CG1CPU_CLFLUSHOPT:
+            if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
+            {
+                ASMCpuIdExSlow(7, 0, 0/*leaf*/, 0, NULL, &fEbx, NULL, NULL);
+                if (fEbx & X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT)
+                    return true;
+            }
+            return false;
+
+        default:
+            Bs3TestFailedF("Invalid enmCpuTest value: %d", pThis->enmCpuTest);
+            return false;
+    }
+}
+
+
+
+/**
+ * Checks the preconditions for a test.
+ *
+ * @returns true if the test be executed, false if not.
+ * @param   pThis       The state.
+ * @param   pHdr        The test header.
+ */
+static bool BS3_NEAR_CODE Bs3Cg1RunSelector(PBS3CG1STATE pThis, PCBS3CG1TESTHDR pHdr)
+{
+
+    uint8_t const BS3_FAR *pbCode = (uint8_t const BS3_FAR *)(pHdr + 1);
+    unsigned cbLeft = pHdr->cbSelector;
+    while (cbLeft-- > 0)
+    {
+        switch (*pbCode++)
+        {
+#define CASE_PRED(a_Pred, a_Expr) \
+            case ((a_Pred) << BS3CG1SEL_OP_KIND_MASK) | BS3CG1SEL_OP_IS_TRUE: \
+                if (!(a_Expr)) return false; \
+                break; \
+            case ((a_Pred) << BS3CG1SEL_OP_KIND_MASK) | BS3CG1SEL_OP_IS_FALSE: \
+                if (a_Expr) return false; \
+                break
+            CASE_PRED(BS3CG1PRED_SIZE_O16, pThis->cbOperand == 2);
+            CASE_PRED(BS3CG1PRED_SIZE_O32, pThis->cbOperand == 4);
+            CASE_PRED(BS3CG1PRED_SIZE_O64, pThis->cbOperand == 8);
+            CASE_PRED(BS3CG1PRED_RING_0, pThis->uCpl == 0);
+            CASE_PRED(BS3CG1PRED_RING_1, pThis->uCpl == 1);
+            CASE_PRED(BS3CG1PRED_RING_2, pThis->uCpl == 2);
+            CASE_PRED(BS3CG1PRED_RING_3, pThis->uCpl == 3);
+            CASE_PRED(BS3CG1PRED_RING_0_THRU_2, pThis->uCpl <= 2);
+            CASE_PRED(BS3CG1PRED_RING_1_THRU_3, pThis->uCpl >= 1);
+            CASE_PRED(BS3CG1PRED_CODE_64BIT, BS3_MODE_IS_64BIT_CODE(pThis->bMode));
+            CASE_PRED(BS3CG1PRED_CODE_32BIT, BS3_MODE_IS_32BIT_CODE(pThis->bMode));
+            CASE_PRED(BS3CG1PRED_CODE_16BIT, BS3_MODE_IS_16BIT_CODE(pThis->bMode));
+            CASE_PRED(BS3CG1PRED_MODE_REAL,  BS3_MODE_IS_RM_SYS(pThis->bMode));
+            CASE_PRED(BS3CG1PRED_MODE_PROT,  BS3_MODE_IS_PM_SYS(pThis->bMode));
+            CASE_PRED(BS3CG1PRED_MODE_LONG,  BS3_MODE_IS_64BIT_SYS(pThis->bMode));
+            CASE_PRED(BS3CG1PRED_MODE_SMM,   false);
+            CASE_PRED(BS3CG1PRED_MODE_VMX,   false);
+            CASE_PRED(BS3CG1PRED_MODE_SVM,   false);
+            CASE_PRED(BS3CG1PRED_PAGING_ON,  BS3_MODE_IS_PAGED(pThis->bMode));
+            CASE_PRED(BS3CG1PRED_PAGING_OFF, !BS3_MODE_IS_PAGED(pThis->bMode));
+            CASE_PRED(BS3CG1PRED_VENDOR_AMD,   pThis->bCpuVendor == BS3CPUVENDOR_AMD);
+            CASE_PRED(BS3CG1PRED_VENDOR_INTEL, pThis->bCpuVendor == BS3CPUVENDOR_INTEL);
+            CASE_PRED(BS3CG1PRED_VENDOR_VIA,   pThis->bCpuVendor == BS3CPUVENDOR_VIA);
+
+#undef CASE_PRED
+            default:
+                return Bs3TestFailedF("Invalid selector opcode %#x!", pbCode[-1]);
+        }
+    }
+
+    return true;
+}
+
+
+#ifdef BS3CG1_DEBUG_CTX_MOD
+/**
+ * Translates the operator into a string.
+ *
+ * @returns Read-only string pointer.
+ * @param   bOpcode             The context modifier program opcode.
+ */
+static const char BS3_FAR * BS3_NEAR_CODE Bs3Cg1CtxOpToString(uint8_t bOpcode)
+{
+    switch (bOpcode & BS3CG1_CTXOP_OPERATOR_MASK)
+    {
+        case BS3CG1_CTXOP_ASSIGN:   return "=";
+        case BS3CG1_CTXOP_OR:       return "|=";
+        case BS3CG1_CTXOP_AND:      return "&=";
+        case BS3CG1_CTXOP_AND_INV:  return "&~=";
+        default:                    return "?WTF?";
+    }
+}
+#endif
+
+
+/**
+ * Runs a context modifier program.
+ *
+ * @returns Success indicator (true/false).
+ * @param   pThis       The state.
+ * @param   pCtx        The context.
+ * @param   pHdr        The program header.
+ * @param   off         The program offset relative to the end of the header.
+ * @param   cb          The program size.
+ * @param   pEflCtx     The context to take undefined EFLAGS from.  (This is NULL
+ *                      if we're processing a input context modifier program.)
+ * @param   pbInstr     Points to the first instruction byte.  For storing
+ *                      immediate operands during input context modification.
+ *                      NULL for output contexts.
+ */
+static bool BS3_NEAR_CODE Bs3Cg1RunContextModifier(PBS3CG1STATE pThis, PBS3REGCTX pCtx, PCBS3CG1TESTHDR pHdr,
+                                                   unsigned off, unsigned cb,
+                                                   PCBS3REGCTX pEflCtx, uint8_t BS3_FAR *pbInstr)
+{
+    uint8_t const BS3_FAR *pbCode = (uint8_t const BS3_FAR *)(pHdr + 1) + off;
+    int                    cbLeft = cb;
+    while (cbLeft-- > 0)
+    {
+        /*
+         * Decode the instruction.
+         */
+        uint8_t const   bOpcode = *pbCode++;
+        unsigned        cbValue;
+        unsigned        cbDst;
+        BS3CG1DST       idxField;
+        BS3PTRUNION     PtrField;
+
+        /* Expand the destiation field (can be escaped). */
+        switch (bOpcode & BS3CG1_CTXOP_DST_MASK)
+        {
+            case BS3CG1_CTXOP_OP1:
+                idxField = pThis->aOperands[0].idxField;
+                if (idxField == BS3CG1DST_INVALID)
+                    idxField = BS3CG1DST_OP1;
+                break;
+
+            case BS3CG1_CTXOP_OP2:
+                idxField = pThis->aOperands[1].idxField;
+                if (idxField == BS3CG1DST_INVALID)
+                    idxField = BS3CG1DST_OP2;
+                break;
+
+            case BS3CG1_CTXOP_EFL:
+                idxField = BS3CG1DST_EFL;
+                break;
+
+            case BS3CG1_CTXOP_DST_ESC:
+                if (cbLeft-- > 0)
+                {
+                    idxField = (BS3CG1DST)*pbCode++;
+                    if (idxField <= BS3CG1DST_OP4)
+                    {
+                        if (idxField > BS3CG1DST_INVALID)
+                        {
+                            uint8_t idxField2 = pThis->aOperands[idxField - BS3CG1DST_OP1].idxField;
+                            if (idxField2 != BS3CG1DST_INVALID)
+                                idxField = idxField2;
+                            break;
+                        }
+                    }
+                    else if (idxField < BS3CG1DST_END)
+                        break;
+                    return Bs3TestFailedF("Malformed context instruction: idxField=%d", idxField);
+                }
+                /* fall thru */
+            default:
+                return Bs3TestFailed("Malformed context instruction: Destination");
+        }
+
+
+        /* Expand value size (can be escaped). */
+        switch (bOpcode & BS3CG1_CTXOP_SIZE_MASK)
+        {
+            case BS3CG1_CTXOP_1_BYTE:   cbValue =  1; break;
+            case BS3CG1_CTXOP_2_BYTES:  cbValue =  2; break;
+            case BS3CG1_CTXOP_4_BYTES:  cbValue =  4; break;
+            case BS3CG1_CTXOP_8_BYTES:  cbValue =  8; break;
+            case BS3CG1_CTXOP_16_BYTES: cbValue = 16; break;
+            case BS3CG1_CTXOP_32_BYTES: cbValue = 32; break;
+            case BS3CG1_CTXOP_12_BYTES: cbValue = 12; break;
+            case BS3CG1_CTXOP_SIZE_ESC:
+                if (cbLeft-- > 0)
+                {
+                    cbValue = *pbCode++;
+                    if (cbValue)
+                        break;
+                }
+                /* fall thru */
+            default:
+                return Bs3TestFailed("Malformed context instruction: size");
+        }
+
+        /* Make sure there is enough instruction bytes for the value. */
+        if (cbValue <= cbLeft)
+        { /* likely */ }
+        else
+            return Bs3TestFailedF("Malformed context instruction: %u bytes value, %u bytes left", cbValue, cbLeft);
+
+        /*
+         * Do value processing specific to the target field size.
+         */
+        cbDst = g_acbBs3Cg1DstFields[idxField];
+        if (cbDst == BS3CG1DSTSIZE_OPERAND)
+            cbDst = pThis->aOperands[idxField - BS3CG1DST_OP1].cbOp;
+        else if (cbDst == BS3CG1DSTSIZE_OPERAND_SIZE_GRP)
+            cbDst = pThis->cbOperand;
+        if (cbDst <= 8)
+        {
+            unsigned const offField = g_aoffBs3Cg1DstFields[idxField];
+
+            /*
+             * Deal with fields up to 8-byte wide.
+             */
+            /* Get the value. */
+            uint64_t uValue;
+            if ((bOpcode & BS3CG1_CTXOP_SIGN_EXT))
+                switch (cbValue)
+                {
+                    case 1: uValue = *(int8_t   const BS3_FAR *)pbCode; break;
+                    case 2: uValue = *(int16_t  const BS3_FAR *)pbCode; break;
+                    case 4: uValue = *(int32_t  const BS3_FAR *)pbCode; break;
+                    default:
+                        if (cbValue >= 8)
+                        {
+                            uValue = *(uint64_t const BS3_FAR *)pbCode;
+                            break;
+                        }
+                        return Bs3TestFailedF("Malformed context instruction: %u bytes value (%u dst)", cbValue, cbDst);
+                }
+            else
+                switch (cbValue)
+                {
+                    case 1: uValue = *(uint8_t  const BS3_FAR *)pbCode; break;
+                    case 2: uValue = *(uint16_t const BS3_FAR *)pbCode; break;
+                    case 4: uValue = *(uint32_t const BS3_FAR *)pbCode; break;
+                    default:
+                        if (cbValue >= 8)
+                        {
+                            uValue = *(uint64_t const BS3_FAR *)pbCode;
+                            break;
+                        }
+                        return Bs3TestFailedF("Malformed context instruction: %u bytes value (%u dst)", cbValue, cbDst);
+                }
+
+            /* Find the field. */
+            if (offField < sizeof(BS3REGCTX))
+                PtrField.pu8 = (uint8_t BS3_FAR *)pCtx + offField;
+            /* Non-register operands: */
+            else if ((unsigned)(idxField - BS3CG1DST_OP1) < 4U)
+            {
+                unsigned const idxOp = idxField - BS3CG1DST_OP1;
+
+                switch (pThis->aOperands[idxOp].enmLocation)
+                {
+                    case BS3CG1OPLOC_IMM:
+                        if (pbInstr)
+                            PtrField.pu8 = &pbInstr[pThis->aOperands[idxOp].off];
+                        else
+                            return Bs3TestFailedF("Immediate operand referenced in output context!");
+                        break;
+
+                    case BS3CG1OPLOC_MEM:
+                        if (!pbInstr)
+                            return Bs3TestFailedF("Read only operand specified in output!");
+                        PtrField.pu8 = &pThis->pbDataPg[X86_PAGE_SIZE - pThis->aOperands[idxOp].off];
+                        break;
+
+                    case BS3CG1OPLOC_MEM_RW:
+                        if (pbInstr)
+                            PtrField.pu8 = &pThis->pbDataPg[X86_PAGE_SIZE - pThis->aOperands[idxOp].off];
+                        else
+                            PtrField.pu8 = pThis->MemOp.ab;
+                        break;
+
+                    default:
+                        if (pThis->enmEncoding != pThis->enmEncodingNonInvalid)
+                            goto l_advance_to_next;
+                        return Bs3TestFailedF("Internal error: cbDst=%u idxField=%d (%d) offField=%#x: enmLocation=%u off=%#x idxField=%u",
+                                              cbDst, idxField, idxOp, offField, pThis->aOperands[idxOp].enmLocation,
+                                              pThis->aOperands[idxOp].off, pThis->aOperands[idxOp].idxField);
+                }
+            }
+            /* Special field: Copying in undefined EFLAGS from the result context. */
+            else if (idxField == BS3CG1DST_EFL_UNDEF)
+            {
+                if (!pEflCtx || (bOpcode & BS3CG1_CTXOP_OPERATOR_MASK) != BS3CG1_CTXOP_ASSIGN)
+                    return Bs3TestFailed("Invalid BS3CG1DST_EFL_UNDEF usage");
+                PtrField.pu32 = &pCtx->rflags.u32;
+                uValue = (*PtrField.pu32 & ~(uint32_t)uValue) | (pEflCtx->rflags.u32 & (uint32_t)uValue);
+            }
+            /* Special field: Expected value (in/result) exception. */
+            else if (idxField == BS3CG1DST_VALUE_XCPT)
+            {
+                if (!pEflCtx || (bOpcode & BS3CG1_CTXOP_OPERATOR_MASK) != BS3CG1_CTXOP_ASSIGN || cbDst != 1)
+                    return Bs3TestFailed("Invalid BS3CG1DST_VALUE_XCPT usage");
+                PtrField.pu8 = &pThis->bValueXcpt;
+            }
+            /* FPU and FXSAVE format. */
+            else if (   pThis->pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT
+                     && offField - sizeof(BS3REGCTX) <= RT_UOFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[15]) )
+            {
+                if (!pThis->fWorkExtCtx)
+                    return Bs3TestFailedF("Extended context disabled: Field %d @ %#x LB %u\n", idxField, offField, cbDst);
+                PtrField.pb = (uint8_t *)pThis->pExtCtx + offField - sizeof(BS3REGCTX);
+            }
+            /** @todo other FPU fields and FPU state formats. */
+            else
+                return Bs3TestFailedF("Todo implement me: cbDst=%u idxField=%d offField=%#x", cbDst, idxField, offField);
+
+#ifdef BS3CG1_DEBUG_CTX_MOD
+            switch (cbDst)
+            {
+                case 1:
+                    BS3CG1_DPRINTF(("dbg: modify %s: %#04RX8 (LB %u) %s %#RX64 (LB %u)\n", g_aszBs3Cg1DstFields[idxField].sz,
+                                    *PtrField.pu8, cbDst, Bs3Cg1CtxOpToString(bOpcode), uValue, cbValue));
+                    break;
+                case 2:
+                    BS3CG1_DPRINTF(("dbg: modify %s: %#06RX16 (LB %u) %s %#RX64 (LB %u)\n", g_aszBs3Cg1DstFields[idxField].sz,
+                                    *PtrField.pu16, cbDst, Bs3Cg1CtxOpToString(bOpcode), uValue, cbValue));
+                    break;
+                case 4:
+                    BS3CG1_DPRINTF(("dbg: modify %s: %#010RX32 (LB %u) %s %#RX64 (LB %u)\n", g_aszBs3Cg1DstFields[idxField].sz,
+                                    *PtrField.pu32, cbDst, Bs3Cg1CtxOpToString(bOpcode), uValue, cbValue));
+                    break;
+                default:
+                    BS3CG1_DPRINTF(("dbg: modify %s: %#018RX64 (LB %u) %s %#RX64 (LB %u)\n", g_aszBs3Cg1DstFields[idxField].sz,
+                                    *PtrField.pu64, cbDst, Bs3Cg1CtxOpToString(bOpcode), uValue, cbValue));
+                    break;
+            }
+#endif
+
+            /* Modify the field. */
+            switch (cbDst)
+            {
+                case 1:
+                    switch (bOpcode & BS3CG1_CTXOP_OPERATOR_MASK)
+                    {
+                        case BS3CG1_CTXOP_ASSIGN:   *PtrField.pu8  =  (uint8_t)uValue; break;
+                        case BS3CG1_CTXOP_OR:       *PtrField.pu8 |=  (uint8_t)uValue; break;
+                        case BS3CG1_CTXOP_AND:      *PtrField.pu8 &=  (uint8_t)uValue; break;
+                        case BS3CG1_CTXOP_AND_INV:  *PtrField.pu8 &= ~(uint8_t)uValue; break;
+                    }
+                    break;
+
+                case 2:
+                    switch (bOpcode & BS3CG1_CTXOP_OPERATOR_MASK)
+                    {
+                        case BS3CG1_CTXOP_ASSIGN:   *PtrField.pu16  =  (uint16_t)uValue; break;
+                        case BS3CG1_CTXOP_OR:       *PtrField.pu16 |=  (uint16_t)uValue; break;
+                        case BS3CG1_CTXOP_AND:      *PtrField.pu16 &=  (uint16_t)uValue; break;
+                        case BS3CG1_CTXOP_AND_INV:  *PtrField.pu16 &= ~(uint16_t)uValue; break;
+                    }
+                    break;
+
+                case 4:
+                    if ((unsigned)(idxField - BS3CG1DST_XMM0_DW0_ZX) <= (unsigned)(BS3CG1DST_XMM15_DW0_ZX - BS3CG1DST_XMM0_DW0_ZX))
+                    {
+                        PtrField.pu32[1] = 0;
+                        PtrField.pu64[1] = 0;
+                    }
+                    else if (offField <= RT_OFFSETOF(BS3REGCTX, r15)) /* Clear the top dword. */
+                        PtrField.pu32[1] = 0;
+                    switch (bOpcode & BS3CG1_CTXOP_OPERATOR_MASK)
+                    {
+                        case BS3CG1_CTXOP_ASSIGN:   *PtrField.pu32  =  (uint32_t)uValue; break;
+                        case BS3CG1_CTXOP_OR:       *PtrField.pu32 |=  (uint32_t)uValue; break;
+                        case BS3CG1_CTXOP_AND:      *PtrField.pu32 &=  (uint32_t)uValue; break;
+                        case BS3CG1_CTXOP_AND_INV:  *PtrField.pu32 &= ~(uint32_t)uValue; break;
+                    }
+                    break;
+
+                case 8:
+                    if ((unsigned)(idxField - BS3CG1DST_XMM0_LO_ZX) <= (unsigned)(BS3CG1DST_XMM15_LO_ZX - BS3CG1DST_XMM0_LO_ZX))
+                        PtrField.pu64[1] = 0;
+                    switch (bOpcode & BS3CG1_CTXOP_OPERATOR_MASK)
+                    {
+                        case BS3CG1_CTXOP_ASSIGN:   *PtrField.pu64  =  (uint64_t)uValue; break;
+                        case BS3CG1_CTXOP_OR:       *PtrField.pu64 |=  (uint64_t)uValue; break;
+                        case BS3CG1_CTXOP_AND:      *PtrField.pu64 &=  (uint64_t)uValue; break;
+                        case BS3CG1_CTXOP_AND_INV:  *PtrField.pu64 &= ~(uint64_t)uValue; break;
+                    }
+                    break;
+
+                default:
+                    return Bs3TestFailedF("Malformed context instruction: cbDst=%u, expected 1, 2, 4, or 8", cbDst);
+            }
+
+#ifdef BS3CG1_DEBUG_CTX_MOD
+            switch (cbDst)
+            {
+                case 1:  BS3CG1_DPRINTF(("dbg:    --> %s: %#04RX8\n",   g_aszBs3Cg1DstFields[idxField].sz, *PtrField.pu8));  break;
+                case 2:  BS3CG1_DPRINTF(("dbg:    --> %s: %#06RX16\n",  g_aszBs3Cg1DstFields[idxField].sz, *PtrField.pu16)); break;
+                case 4:  BS3CG1_DPRINTF(("dbg:    --> %s: %#010RX32\n", g_aszBs3Cg1DstFields[idxField].sz, *PtrField.pu32)); break;
+                default: BS3CG1_DPRINTF(("dbg:    --> %s: %#018RX64\n", g_aszBs3Cg1DstFields[idxField].sz, *PtrField.pu64)); break;
+            }
+#endif
+
+        }
+        /*
+         * Deal with larger field (FPU, SSE, AVX, ...).
+         */
+        else
+        {
+            union
+            {
+                X86FPUREG   FpuReg;
+                X86XMMREG   XmmReg;
+                X86YMMREG   YmmReg;
+                X86ZMMREG   ZmmReg;
+                uint8_t     ab[sizeof(X86ZMMREG)];
+                uint32_t    au32[sizeof(X86ZMMREG) / sizeof(uint32_t)];
+            } Value;
+            unsigned const offField = g_aoffBs3Cg1DstFields[idxField];
+
+            if (!pThis->fWorkExtCtx)
+                return Bs3TestFailedF("Extended context disabled: Field %d @ %#x LB %u\n", idxField, offField, cbDst);
+
+            /* Copy the value into the union, doing the zero padding / extending. */
+            Bs3MemCpy(&Value, pbCode, cbValue);
+            if (cbValue < sizeof(Value))
+            {
+                if ((bOpcode & BS3CG1_CTXOP_SIGN_EXT) && (Value.ab[cbValue - 1] & 0x80))
+                    Bs3MemSet(&Value.ab[cbValue], 0xff, sizeof(Value) - cbValue);
+                else
+                    Bs3MemSet(&Value.ab[cbValue], 0x00, sizeof(Value) - cbValue);
+            }
+
+            /* Optimized access to XMM and STx registers. */
+            if (   pThis->pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT
+                && offField - sizeof(BS3REGCTX) <= RT_UOFFSETOF(BS3EXTCTX, Ctx.x87.aXMM[15]) )
+                PtrField.pb = (uint8_t *)pThis->pExtCtx + offField - sizeof(BS3REGCTX);
+            /* Non-register operands: */
+            else if ((unsigned)(idxField - BS3CG1DST_OP1) < 4U)
+            {
+                unsigned const idxOp = idxField - BS3CG1DST_OP1;
+                switch (pThis->aOperands[idxOp].enmLocation)
+                {
+                    case BS3CG1OPLOC_MEM:
+                        if (!pbInstr)
+                            return Bs3TestFailedF("Read only operand specified in output!");
+                        PtrField.pu8 = &pThis->pbDataPg[X86_PAGE_SIZE - pThis->aOperands[idxOp].off];
+                        break;
+
+                    case BS3CG1OPLOC_MEM_RW:
+                        if (pbInstr)
+                            PtrField.pu8 = &pThis->pbDataPg[X86_PAGE_SIZE - pThis->aOperands[idxOp].off];
+                        else
+                            PtrField.pu8 = pThis->MemOp.ab;
+                        break;
+
+                    default:
+                        return Bs3TestFailedF("Internal error: Field %d (%d) @ %#x LB %u: enmLocation=%u off=%#x idxField=%u",
+                                              idxField, idxOp, offField, cbDst, pThis->aOperands[idxOp].enmLocation,
+                                              pThis->aOperands[idxOp].off, pThis->aOperands[idxOp].idxField);
+                }
+            }
+            /* The YMM (AVX) and the first 16 ZMM (AVX512) registers have split storage in
+               the state, so they need special handling.  */
+            else
+            {
+                return Bs3TestFailedF("TODO: implement me: cbDst=%d idxField=%d (AVX and other weird state)", cbDst, idxField);
+            }
+
+            if (PtrField.pb)
+            {
+                /* Modify the field / memory. */
+                unsigned i;
+                if (cbDst & 3)
+                    return Bs3TestFailedF("Malformed context instruction: cbDst=%u, multiple of 4", cbDst);
+
+#ifdef BS3CG1_DEBUG_CTX_MOD
+                BS3CG1_DPRINTF(("dbg: modify %s: %.*Rhxs (LB %u) %s %.*Rhxs (LB %u)\n", g_aszBs3Cg1DstFields[idxField].sz,
+                                cbDst, PtrField.pb, cbDst, Bs3Cg1CtxOpToString(bOpcode), cbValue, Value.ab, cbValue));
+#endif
+
+                i = cbDst / 4;
+                while (i-- > 0)
+                {
+                    switch (bOpcode & BS3CG1_CTXOP_OPERATOR_MASK)
+                    {
+                        case BS3CG1_CTXOP_ASSIGN:   PtrField.pu32[i]  =  Value.au32[i]; break;
+                        case BS3CG1_CTXOP_OR:       PtrField.pu32[i] |=  Value.au32[i]; break;
+                        case BS3CG1_CTXOP_AND:      PtrField.pu32[i] &=  Value.au32[i]; break;
+                        case BS3CG1_CTXOP_AND_INV:  PtrField.pu32[i] &= ~Value.au32[i]; break;
+                    }
+                }
+
+#ifdef BS3CG1_DEBUG_CTX_MOD
+                BS3CG1_DPRINTF(("dbg:    --> %s: %.*Rhxs\n", g_aszBs3Cg1DstFields[idxField].sz, cbDst, PtrField.pb));
+#endif
+            }
+        }
+
+        /*
+         * Advance to the next instruction.
+         */
+l_advance_to_next:
+        pbCode += cbValue;
+        cbLeft -= cbValue;
+    }
+
+    return true;
+}
+
+
+/**
+ * Checks the result of a run.
+ *
+ * @returns true if successful, false if not.
+ * @param   pThis                   The state.
+ * @param   bTestXcptExpected       The exception causing the test code to stop
+ *                                  executing.
+ * @param   fInvalidEncodingPgFault Set if we've cut the instruction a byte
+ *                                  short and is expecting a \#PF on the page
+ *                                  boundrary rather than a \#UD.  Only set if
+ *                                  fInvalidEncoding is also set.
+ * @param   iEncoding               For error reporting.
+ */
+static bool BS3_NEAR_CODE Bs3Cg1CheckResult(PBS3CG1STATE pThis, uint8_t bTestXcptExpected,
+                                            bool fInvalidEncodingPgFault,  unsigned iEncoding)
+{
+    unsigned iOperand;
+
+    /*
+     * Check the exception state first.
+     */
+    uint8_t bExpectedXcpt;
+    uint8_t cbAdjustPc;
+    if (!pThis->fInvalidEncoding)
+    {
+        bExpectedXcpt = pThis->bAlignmentXcpt;
+        if (bExpectedXcpt == UINT8_MAX)
+            bExpectedXcpt = pThis->bValueXcpt;
+        if (bExpectedXcpt == UINT8_MAX)
+        {
+            cbAdjustPc    = pThis->cbCurInstr;
+            bExpectedXcpt = bTestXcptExpected;
+            if (bTestXcptExpected == X86_XCPT_PF)
+                pThis->Ctx.cr2.u = pThis->uCodePgFlat + X86_PAGE_SIZE;
+        }
+        else
+            cbAdjustPc = 0;
+    }
+    else
+    {
+        cbAdjustPc = 0;
+        if (!fInvalidEncodingPgFault)
+            bExpectedXcpt = X86_XCPT_UD;
+        else
+        {
+            bExpectedXcpt = X86_XCPT_PF;
+            pThis->Ctx.cr2.u = pThis->uCodePgFlat + X86_PAGE_SIZE;
+        }
+    }
+    if (RT_LIKELY(   pThis->TrapFrame.bXcpt     == bExpectedXcpt
+                  && pThis->TrapFrame.Ctx.rip.u == pThis->Ctx.rip.u + cbAdjustPc))
+    {
+        /*
+         * Check the register content.
+         */
+        bool fOkay = Bs3TestCheckRegCtxEx(&pThis->TrapFrame.Ctx, &pThis->Ctx,
+                                           cbAdjustPc, 0 /*cbSpAdjust*/, 0 /*fExtraEfl*/,
+                                           pThis->pszMode, iEncoding);
+
+        /*
+         * Check memory output operands.
+         */
+        if (!pThis->fInvalidEncoding)
+        {
+            iOperand = pThis->cOperands;
+            while (iOperand-- > 0)
+                if (pThis->aOperands[iOperand].enmLocation == BS3CG1OPLOC_MEM_RW)
+                {
+                    if (pThis->aOperands[iOperand].off)
+                    {
+                        BS3PTRUNION PtrUnion;
+                        PtrUnion.pb = &pThis->pbDataPg[X86_PAGE_SIZE - pThis->aOperands[iOperand].off];
+                        switch (pThis->aOperands[iOperand].cbOp)
+                        {
+                            case 1:
+                                if (*PtrUnion.pu8 == pThis->MemOp.ab[0])
+                                    continue;
+                                Bs3TestFailedF("op%u: Wrote %#04RX8, expected %#04RX8",
+                                               iOperand, *PtrUnion.pu8, pThis->MemOp.ab[0]);
+                                break;
+                            case 2:
+                                if (*PtrUnion.pu16 == pThis->MemOp.au16[0])
+                                    continue;
+                                Bs3TestFailedF("op%u: Wrote %#06RX16, expected %#06RX16",
+                                               iOperand, *PtrUnion.pu16, pThis->MemOp.au16[0]);
+                                break;
+                            case 4:
+                                if (*PtrUnion.pu32 == pThis->MemOp.au32[0])
+                                    continue;
+                                Bs3TestFailedF("op%u: Wrote %#010RX32, expected %#010RX32",
+                                               iOperand, *PtrUnion.pu32, pThis->MemOp.au32[0]);
+                                break;
+                            case 8:
+                                if (*PtrUnion.pu64 == pThis->MemOp.au64[0])
+                                    continue;
+                                Bs3TestFailedF("op%u: Wrote %#018RX64, expected %#018RX64",
+                                               iOperand, *PtrUnion.pu64, pThis->MemOp.au64[0]);
+                                break;
+                            default:
+                                if (Bs3MemCmp(PtrUnion.pb, pThis->MemOp.ab, pThis->aOperands[iOperand].cbOp) == 0)
+                                    continue;
+                                Bs3TestFailedF("op%u: Wrote %.*Rhxs, expected %.*Rhxs",
+                                               iOperand,
+                                               pThis->aOperands[iOperand].cbOp, PtrUnion.pb,
+                                               pThis->aOperands[iOperand].cbOp, pThis->MemOp.ab);
+                                break;
+                        }
+                    }
+                    else
+                        Bs3TestFailedF("op%u: off is zero\n", iOperand);
+                    fOkay = false;
+                }
+        }
+
+        /*
+         * Check extended context if enabled.
+         */
+        if (pThis->fWorkExtCtx)
+        {
+            PBS3EXTCTX pExpect = pThis->pExtCtx;
+            PBS3EXTCTX pResult = pThis->pResultExtCtx;
+            unsigned   i;
+            if (   pExpect->enmMethod == BS3EXTCTXMETHOD_XSAVE
+                || pExpect->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
+            {
+                /* Compare the x87 state, ASSUMING XCR0 bit 1 is set. */
+#define CHECK_FIELD(a_Field, a_szFmt) \
+    if (pResult->Ctx.a_Field != pExpect->Ctx.a_Field) fOkay = Bs3TestFailedF(a_szFmt, pResult->Ctx.a_Field, pExpect->Ctx.a_Field)
+                CHECK_FIELD(x87.FCW, "FCW: %#06x, expected %#06x");
+                CHECK_FIELD(x87.FSW, "FSW: %#06x, expected %#06x");
+                CHECK_FIELD(x87.FTW, "FTW: %#06x, expected %#06x");
+                //CHECK_FIELD(x87.FOP,      "FOP: %#06x, expected %#06x");
+                //CHECK_FIELD(x87.FPUIP,    "FPUIP:  %#010RX32, expected %#010RX32");
+                //CHECK_FIELD(x87.CS,       "FPUCS:  %#06x, expected %#06x");
+                //CHECK_FIELD(x87.Rsrvd1,   "Rsrvd1: %#06x, expected %#06x");
+                //CHECK_FIELD(x87.DP,       "FPUDP:  %#010RX32, expected %#010RX32");
+                //CHECK_FIELD(x87.DS,       "FPUDS:  %#06x, expected %#06x");
+                //CHECK_FIELD(x87.Rsrvd2,   "Rsrvd2: %#06x, expected %#06x");
+                CHECK_FIELD(x87.MXCSR,      "MXCSR:  %#010RX32, expected %#010RX32");
+#undef CHECK_FIELD
+                for (i = 0; i < RT_ELEMENTS(pExpect->Ctx.x87.aRegs); i++)
+                    if (   pResult->Ctx.x87.aRegs[i].au64[0] != pExpect->Ctx.x87.aRegs[i].au64[0]
+                        || pResult->Ctx.x87.aRegs[i].au16[4] != pExpect->Ctx.x87.aRegs[i].au16[4])
+                        fOkay = Bs3TestFailedF("ST[%u]: %c m=%#RX64 e=%d, expected %c m=%#RX64 e=%d", i,
+                                               pResult->Ctx.x87.aRegs[i].r80Ex.s.fSign ? '-' : '+',
+                                               pResult->Ctx.x87.aRegs[i].r80Ex.s.u64Mantissa,
+                                               pResult->Ctx.x87.aRegs[i].r80Ex.s.uExponent,
+                                               pExpect->Ctx.x87.aRegs[i].r80Ex.s.fSign ? '-' : '+',
+                                               pExpect->Ctx.x87.aRegs[i].r80Ex.s.u64Mantissa,
+                                               pExpect->Ctx.x87.aRegs[i].r80Ex.s.uExponent);
+                for (i = 0; i < (ARCH_BITS == 64 ? 16 : 8); i++)
+                    if (   pResult->Ctx.x87.aXMM[i].au64[0] != pExpect->Ctx.x87.aXMM[i].au64[0]
+                        || pResult->Ctx.x87.aXMM[i].au64[1] != pExpect->Ctx.x87.aXMM[i].au64[1])
+                        fOkay = Bs3TestFailedF("XMM%u: %#010RX64'%016RX64, expected %#010RX64'%08RX64", i,
+                                               pResult->Ctx.x87.aXMM[i].au64[1],
+                                               pResult->Ctx.x87.aXMM[i].au64[0],
+                                               pExpect->Ctx.x87.aXMM[i].au64[1],
+                                               pExpect->Ctx.x87.aXMM[i].au64[0]);
+            }
+            else
+                fOkay = Bs3TestFailedF("Unsupported extended CPU context method: %d", pExpect->enmMethod);
+        }
+
+        /*
+         * Done.
+         */
+        if (fOkay)
+            return true;
+
+        /*
+         * Report failure.
+         */
+        Bs3TestFailedF("ins#%RU32/test#%u: encoding #%u: %.*Rhxs%s",
+                       pThis->iInstr, pThis->iTest, iEncoding, pThis->cbCurInstr, pThis->abCurInstr,
+                       fInvalidEncodingPgFault ? " (cut short)" : "");
+    }
+    else
+        Bs3TestFailedF("ins#%RU32/test#%u: bXcpt=%#x expected %#x; rip=%RX64 expected %RX64; encoding#%u: %.*Rhxs%s",
+                       pThis->iInstr, pThis->iTest,
+                       pThis->TrapFrame.bXcpt, bExpectedXcpt,
+                       pThis->TrapFrame.Ctx.rip.u, pThis->Ctx.rip.u + cbAdjustPc,
+                       iEncoding, pThis->cbCurInstr, pThis->abCurInstr, fInvalidEncodingPgFault ? " (cut short)" : "");
+    Bs3TestPrintf("cpl=%u cbOperands=%u\n", pThis->uCpl, pThis->cbOperand);
+
+    /*
+     * Display memory operands.
+     */
+    for (iOperand = 0; iOperand < pThis->cOperands; iOperand++)
+    {
+        BS3PTRUNION PtrUnion;
+        switch (pThis->aOperands[iOperand].enmLocation)
+        {
+            case BS3CG1OPLOC_CTX:
+            {
+                uint8_t  idxField = pThis->aOperands[iOperand].idxField;
+                unsigned offField = g_aoffBs3Cg1DstFields[idxField];
+                if (offField <= sizeof(BS3REGCTX))
+                    PtrUnion.pb = (uint8_t BS3_FAR *)&pThis->Ctx + offField;
+                else
+                {
+                    Bs3TestPrintf("op%u: ctx%u: xxxx\n", iOperand, pThis->aOperands[iOperand].cbOp * 8);
+                    break;
+                }
+                switch (pThis->aOperands[iOperand].cbOp)
+                {
+                    case 1: Bs3TestPrintf("op%u: ctx08: %#04RX8\n", iOperand, *PtrUnion.pu8); break;
+                    case 2: Bs3TestPrintf("op%u: ctx16: %#06RX16\n", iOperand, *PtrUnion.pu16); break;
+                    case 4: Bs3TestPrintf("op%u: ctx32: %#010RX32\n", iOperand, *PtrUnion.pu32); break;
+                    case 8: Bs3TestPrintf("op%u: ctx64: %#018RX64\n", iOperand, *PtrUnion.pu64); break;
+                    default:
+                        Bs3TestPrintf("op%u: ctx%u: %.*Rhxs\n", iOperand, pThis->aOperands[iOperand].cbOp * 8,
+                                      pThis->aOperands[iOperand].cbOp, PtrUnion.pb);
+                        break;
+                }
+                break;
+            }
+
+            case BS3CG1OPLOC_IMM:
+                PtrUnion.pb = &pThis->pbCodePg[pThis->aOperands[iOperand].off];
+                switch (pThis->aOperands[iOperand].cbOp)
+                {
+                    case 1: Bs3TestPrintf("op%u: imm08: %#04RX8\n", iOperand, *PtrUnion.pu8); break;
+                    case 2: Bs3TestPrintf("op%u: imm16: %#06RX16\n", iOperand, *PtrUnion.pu16); break;
+                    case 4: Bs3TestPrintf("op%u: imm32: %#010RX32\n", iOperand, *PtrUnion.pu32); break;
+                    case 8: Bs3TestPrintf("op%u: imm64: %#018RX64\n", iOperand, *PtrUnion.pu64); break;
+                    default:
+                        Bs3TestPrintf("op%u: imm%u: %.*Rhxs\n", iOperand, pThis->aOperands[iOperand].cbOp * 8,
+                                      pThis->aOperands[iOperand].cbOp, PtrUnion.pb);
+                        break;
+                }
+                break;
+
+            case BS3CG1OPLOC_MEM:
+            case BS3CG1OPLOC_MEM_RW:
+                if (pThis->aOperands[iOperand].off)
+                {
+                    PtrUnion.pb = &pThis->pbDataPg[X86_PAGE_SIZE - pThis->aOperands[iOperand].off];
+                    switch (pThis->aOperands[iOperand].cbOp)
+                    {
+                        case 1: Bs3TestPrintf("op%u: result mem08: %#04RX8\n", iOperand, *PtrUnion.pu8); break;
+                        case 2: Bs3TestPrintf("op%u: result mem16: %#06RX16\n", iOperand, *PtrUnion.pu16); break;
+                        case 4: Bs3TestPrintf("op%u: result mem32: %#010RX32\n", iOperand, *PtrUnion.pu32); break;
+                        case 8: Bs3TestPrintf("op%u: result mem64: %#018RX64\n", iOperand, *PtrUnion.pu64); break;
+                        default:
+                            Bs3TestPrintf("op%u: result mem%u: %.*Rhxs\n", iOperand, pThis->aOperands[iOperand].cbOp * 8,
+                                          pThis->aOperands[iOperand].cbOp, PtrUnion.pb);
+                            break;
+                    }
+                    if (pThis->aOperands[iOperand].enmLocation == BS3CG1OPLOC_MEM_RW)
+                    {
+                        PtrUnion.pb = pThis->MemOp.ab;
+                        switch (pThis->aOperands[iOperand].cbOp)
+                        {
+                            case 1: Bs3TestPrintf("op%u: expect mem08: %#04RX8\n", iOperand, *PtrUnion.pu8); break;
+                            case 2: Bs3TestPrintf("op%u: expect mem16: %#06RX16\n", iOperand, *PtrUnion.pu16); break;
+                            case 4: Bs3TestPrintf("op%u: expect mem32: %#010RX32\n", iOperand, *PtrUnion.pu32); break;
+                            case 8: Bs3TestPrintf("op%u: expect mem64: %#018RX64\n", iOperand, *PtrUnion.pu64); break;
+                            default:
+                                Bs3TestPrintf("op%u: expect mem%u: %.*Rhxs\n", iOperand, pThis->aOperands[iOperand].cbOp * 8,
+                                              pThis->aOperands[iOperand].cbOp, PtrUnion.pb);
+                                break;
+                        }
+                    }
+                }
+                else
+                    Bs3TestPrintf("op%u: mem%u: zero off value!!\n", iOperand, pThis->aOperands[iOperand].cbOp * 8);
+                break;
+        }
+    }
+
+    /*
+     * Display contexts.
+     */
+    Bs3TestPrintf("-- Expected context:\n");
+    Bs3RegCtxPrint(&pThis->Ctx);
+    if (pThis->fWorkExtCtx)
+        Bs3TestPrintf("xcr0=%RX64\n", pThis->pExtCtx->fXcr0Saved);
+    Bs3TestPrintf("-- Actual context:\n");
+    Bs3TrapPrintFrame(&pThis->TrapFrame);
+    if (pThis->fWorkExtCtx)
+        Bs3TestPrintf("xcr0=%RX64\n", pThis->pResultExtCtx->fXcr0Saved);
+    Bs3TestPrintf("\n");
+    return false;
+}
+
+
+/**
+ * Destroys the state, freeing all allocations and such.
+ *
+ * @param   pThis               The state.
+ */
+static void BS3_NEAR_CODE Bs3Cg1Destroy(PBS3CG1STATE pThis)
+{
+    if (BS3_MODE_IS_PAGED(pThis->bMode))
+    {
+#if ARCH_BITS != 16
+        Bs3MemGuardedTestPageFree(pThis->pbCodePg);
+        Bs3MemGuardedTestPageFree(pThis->pbDataPg);
+#endif
+    }
+    else
+    {
+        Bs3MemFree(pThis->pbCodePg, X86_PAGE_SIZE);
+        Bs3MemFree(pThis->pbDataPg, X86_PAGE_SIZE);
+    }
+
+    if (pThis->pExtCtx)
+        Bs3MemFree(pThis->pExtCtx, pThis->pExtCtx->cb * 3);
+
+    pThis->pbCodePg       = NULL;
+    pThis->pbDataPg       = NULL;
+    pThis->pExtCtx        = NULL;
+    pThis->pResultExtCtx  = NULL;
+    pThis->pInitialExtCtx = NULL;
+}
+
+
+/**
+ * Initializes the state.
+ *
+ * @returns Success indicator (true/false)
+ * @param   pThis               The state.
+ * @param   bMode               The mode being tested.
+ */
+bool BS3_NEAR_CODE BS3_CMN_NM(Bs3Cg1Init)(PBS3CG1STATE pThis, uint8_t bMode)
+{
+    BS3MEMKIND const    enmMemKind = BS3_MODE_IS_RM_OR_V86(bMode) ? BS3MEMKIND_REAL
+                                   : !BS3_MODE_IS_64BIT_CODE(bMode) ? BS3MEMKIND_TILED : BS3MEMKIND_FLAT32;
+    unsigned            iRing;
+    unsigned            cb;
+    unsigned            i;
+    uint64_t            fFlags;
+    PBS3EXTCTX          pExtCtx;
+
+    Bs3MemSet(pThis, 0, sizeof(*pThis));
+
+    pThis->iFirstRing         = BS3_MODE_IS_V86(bMode)    ? 3 : 0;
+    pThis->iEndRing           = BS3_MODE_IS_RM_SYS(bMode) ? 1 : 4;
+    pThis->bMode              = bMode;
+    pThis->pszMode            = Bs3GetModeName(bMode);
+    pThis->pszModeShort       = Bs3GetModeNameShortLower(bMode);
+    pThis->bCpuVendor         = Bs3GetCpuVendor();
+    pThis->pchMnemonic        = g_achBs3Cg1Mnemonics;
+    pThis->pabOperands        = g_abBs3Cg1Operands;
+    pThis->pabOpcodes         = g_abBs3Cg1Opcodes;
+    pThis->fAdvanceMnemonic   = 1;
+
+    /* Allocate extended context structures. */
+    cb = Bs3ExtCtxGetSize(&fFlags);
+    pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 3);
+    if (!pExtCtx)
+        return Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 3);
+    pThis->pExtCtx        = pExtCtx;
+    pThis->pResultExtCtx  = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx + cb);
+    pThis->pInitialExtCtx = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx + cb + cb);
+
+    Bs3ExtCtxInit(pThis->pExtCtx, cb, fFlags);
+    Bs3ExtCtxInit(pThis->pResultExtCtx, cb, fFlags);
+    Bs3ExtCtxInit(pThis->pInitialExtCtx, cb, fFlags);
+    //Bs3TestPrintf("fCR0=%RX64 cbExtCtx=%#x method=%d\n", fFlags, cb, pExtCtx->enmMethod);
+
+    /* Allocate guarded exectuable and data memory. */
+    if (BS3_MODE_IS_PAGED(bMode))
+    {
+#if ARCH_BITS != 16
+        pThis->pbCodePg = Bs3MemGuardedTestPageAlloc(enmMemKind);
+        pThis->pbDataPg = Bs3MemGuardedTestPageAlloc(enmMemKind);
+        if (!pThis->pbCodePg || !pThis->pbDataPg)
+        {
+            Bs3TestFailedF("Bs3MemGuardedTestPageAlloc(%d) failed", enmMemKind);
+            Bs3MemPrintInfo();
+            Bs3Shutdown();
+            return Bs3TestFailedF("Bs3MemGuardedTestPageAlloc(%d) failed", enmMemKind);
+        }
+        if (   BS3_MODE_IS_64BIT_CODE(bMode)
+            && (uintptr_t)pThis->pbDataPg >= _2G)
+            return Bs3TestFailedF("pbDataPg=%p is above 2GB and not simple to address from 64-bit code", pThis->pbDataPg);
+#else
+        return Bs3TestFailed("WTF?! #1");
+#endif
+    }
+    else
+    {
+        pThis->pbCodePg = Bs3MemAlloc(enmMemKind, X86_PAGE_SIZE);
+        pThis->pbDataPg = Bs3MemAlloc(enmMemKind, X86_PAGE_SIZE);
+        if (!pThis->pbCodePg || !pThis->pbDataPg)
+        {
+            Bs3MemPrintInfo();
+            return Bs3TestFailedF("Bs3MemAlloc(%d,Pg) failed", enmMemKind);
+        }
+    }
+    pThis->uCodePgFlat = Bs3SelPtrToFlat(pThis->pbCodePg);
+    pThis->uDataPgFlat = Bs3SelPtrToFlat(pThis->pbDataPg);
+#if ARCH_BITS == 16
+    pThis->CodePgFar.sel = BS3_FP_SEG(pThis->pbCodePg);
+    pThis->CodePgFar.off = BS3_FP_OFF(pThis->pbCodePg);
+    pThis->CodePgRip     = BS3_FP_OFF(pThis->pbCodePg);
+    pThis->DataPgFar.sel = BS3_FP_SEG(pThis->pbDataPg);
+    pThis->DataPgFar.off = BS3_FP_OFF(pThis->pbDataPg);
+#else
+    if (BS3_MODE_IS_RM_OR_V86(bMode))
+    {
+        *(uint32_t *)&pThis->DataPgFar = Bs3SelFlatDataToRealMode(pThis->uDataPgFlat);
+        ASMCompilerBarrier();
+        pThis->CodePgFar.off = 0;
+        pThis->CodePgFar.sel = pThis->uCodePgFlat >> 4;
+        pThis->CodePgRip     = pThis->CodePgFar.off;
+    }
+    else if (BS3_MODE_IS_16BIT_CODE(bMode))
+    {
+        *(uint32_t *)&pThis->DataPgFar = Bs3SelFlatDataToProtFar16(pThis->uDataPgFlat);
+        ASMCompilerBarrier();
+        pThis->CodePgFar.sel = BS3_SEL_SPARE_00;
+        pThis->CodePgFar.off = 0;
+        pThis->CodePgRip     = 0;
+    }
+    else if (BS3_MODE_IS_32BIT_CODE(bMode))
+    {
+        *(uint32_t *)&pThis->DataPgFar = Bs3SelFlatDataToProtFar16(pThis->uDataPgFlat);
+        ASMCompilerBarrier();
+        pThis->CodePgFar.sel = 0;
+        pThis->CodePgFar.off = 0;
+        pThis->CodePgRip     = (uintptr_t)pThis->pbCodePg;
+    }
+    else
+    {
+        pThis->DataPgFar.off = 0;
+        pThis->DataPgFar.sel = 0;
+        pThis->CodePgFar.off = 0;
+        pThis->CodePgFar.sel = 0;
+        pThis->CodePgRip     = (uintptr_t)pThis->pbCodePg;
+    }
+#endif
+    BS3CG1_DPRINTF(("pbDataPg=%p %04x:%04x  pbCodePg=%p %04x:%04x\n",
+                    pThis->pbDataPg, pThis->DataPgFar.sel, pThis->DataPgFar.off,
+                    pThis->pbCodePg, pThis->CodePgFar.sel, pThis->CodePgFar.off));
+
+    /*
+     * Create basic context for each target ring.
+     *
+     * In protected 16-bit code we need set up code selectors that can access
+     * pbCodePg.
+     *
+     * In long mode we make sure the high 32-bits of GPRs (sans RSP) have some
+     * bits set so we can check that the implicit clearing is tested.
+     */
+    Bs3RegCtxSaveEx(&pThis->aInitialCtxs[pThis->iFirstRing], bMode, 1024 * 3);
+#if ARCH_BITS == 64
+    pThis->aInitialCtxs[pThis->iFirstRing].rax.u |= UINT64_C(0x0101010100000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].rbx.u |= UINT64_C(0x0202020200000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].rcx.u |= UINT64_C(0x0303030300000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].rdx.u |= UINT64_C(0x0404040400000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].rbp.u |= UINT64_C(0x0505050500000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].rdi.u |= UINT64_C(0x0606060600000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].rsi.u |= UINT64_C(0x0707070700000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].r8.u  |= UINT64_C(0x0808080800000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].r9.u  |= UINT64_C(0x0909090900000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].r10.u |= UINT64_C(0x1010101000000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].r11.u |= UINT64_C(0x1111111100000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].r12.u |= UINT64_C(0x1212121200000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].r13.u |= UINT64_C(0x1313131300000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].r14.u |= UINT64_C(0x1414141400000000);
+    pThis->aInitialCtxs[pThis->iFirstRing].r15.u |= UINT64_C(0x1515151500000000);
+#endif
+
+    if (BS3_MODE_IS_RM_OR_V86(bMode))
+    {
+        pThis->aInitialCtxs[pThis->iFirstRing].cs = pThis->CodePgFar.sel;
+        BS3_ASSERT(pThis->iFirstRing + 1 == pThis->iEndRing);
+    }
+    else if (BS3_MODE_IS_16BIT_CODE(bMode))
+    {
+#if ARCH_BITS == 16
+        uintptr_t const uFlatCodePgSeg = Bs3SelPtrToFlat(BS3_FP_MAKE(BS3_FP_SEG(pThis->pbCodePg), 0));
+#else
+        uintptr_t const uFlatCodePgSeg = (uintptr_t)pThis->pbCodePg;
+#endif
+        for (iRing = pThis->iFirstRing + 1; iRing < pThis->iEndRing; iRing++)
+        {
+            Bs3MemCpy(&pThis->aInitialCtxs[iRing], &pThis->aInitialCtxs[pThis->iFirstRing], sizeof(pThis->aInitialCtxs[iRing]));
+            Bs3RegCtxConvertToRingX(&pThis->aInitialCtxs[iRing], iRing);
+        }
+        for (iRing = pThis->iFirstRing; iRing < pThis->iEndRing; iRing++)
+        {
+            pThis->aInitialCtxs[iRing].cs = BS3_SEL_SPARE_00 + iRing * 8 + iRing;
+            Bs3SelSetup16BitCode(&Bs3GdteSpare00 + iRing, uFlatCodePgSeg, iRing);
+        }
+    }
+    else
+    {
+        Bs3RegCtxSetRipCsFromCurPtr(&pThis->aInitialCtxs[pThis->iFirstRing], (FPFNBS3FAR)pThis->pbCodePg);
+        for (iRing = pThis->iFirstRing + 1; iRing < pThis->iEndRing; iRing++)
+        {
+            Bs3MemCpy(&pThis->aInitialCtxs[iRing], &pThis->aInitialCtxs[pThis->iFirstRing], sizeof(pThis->aInitialCtxs[iRing]));
+            Bs3RegCtxConvertToRingX(&pThis->aInitialCtxs[iRing], iRing);
+        }
+    }
+
+    /*
+     * Create an initial extended CPU context.
+     */
+    pExtCtx = pThis->pInitialExtCtx;
+    if (   pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
+        || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
+    {
+        pExtCtx->Ctx.x87.FCW   = X86_FCW_MASK_ALL | X86_FCW_PC_64 | X86_FCW_RC_NEAREST;
+        pExtCtx->Ctx.x87.FSW   = 0;
+        pExtCtx->Ctx.x87.MXCSR      = X86_MXCSR_IM | X86_MXCSR_DM | X86_MXCSR_RC_NEAREST;
+        pExtCtx->Ctx.x87.MXCSR_MASK = 0;
+        for (i = 0; i < RT_ELEMENTS(pExtCtx->Ctx.x87.aRegs); i++)
+        {
+            pExtCtx->Ctx.x87.aRegs[i].au16[0] = i << 4;
+            pExtCtx->Ctx.x87.aRegs[i].au16[1] = i << 4;
+            pExtCtx->Ctx.x87.aRegs[i].au16[2] = i << 4;
+            pExtCtx->Ctx.x87.aRegs[i].au16[3] = i << 4;
+        }
+        for (i = 0; i < RT_ELEMENTS(pExtCtx->Ctx.x87.aXMM); i++)
+        {
+            pExtCtx->Ctx.x87.aXMM[i].au16[0] = i;
+            pExtCtx->Ctx.x87.aXMM[i].au16[1] = i;
+            pExtCtx->Ctx.x87.aXMM[i].au16[2] = i;
+            pExtCtx->Ctx.x87.aXMM[i].au16[3] = i;
+            pExtCtx->Ctx.x87.aXMM[i].au16[4] = i;
+            pExtCtx->Ctx.x87.aXMM[i].au16[5] = i;
+            pExtCtx->Ctx.x87.aXMM[i].au16[6] = i;
+            pExtCtx->Ctx.x87.aXMM[i].au16[7] = i;
+        }
+        if (pExtCtx->fXcr0Nominal & XSAVE_C_YMM)
+            for (i = 0; i < RT_ELEMENTS(pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi); i++)
+            {
+                pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi[i].au16[0] = i << 8;
+                pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi[i].au16[1] = i << 8;
+                pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi[i].au16[2] = i << 8;
+                pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi[i].au16[3] = i << 8;
+                pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi[i].au16[4] = i << 8;
+                pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi[i].au16[5] = i << 8;
+                pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi[i].au16[6] = i << 8;
+                pExtCtx->Ctx.x.u.Intel.YmmHi.aYmmHi[i].au16[7] = i << 8;
+            }
+
+    }
+    //else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_ANCIENT)
+    else
+        return Bs3TestFailedF("Unsupported extended CPU context method: %d", pExtCtx->enmMethod);
+
+    return true;
+}
+
+
+static uint8_t BS3_NEAR_CODE BS3_CMN_NM(Bs3Cg1WorkerInner)(PBS3CG1STATE pThis)
+{
+    uint8_t  iRing;
+    unsigned iInstr;
+
+    /*
+     * Test the instructions.
+     */
+    for (iInstr = 0; iInstr < g_cBs3Cg1Instructions;
+         iInstr++,
+         pThis->pchMnemonic += pThis->fAdvanceMnemonic * pThis->cchMnemonic,
+         pThis->pabOperands += pThis->cOperands,
+         pThis->pabOpcodes  += pThis->cbOpcodes)
+    {
+        uint8_t const   bTestXcptExpected  = BS3_MODE_IS_PAGED(pThis->bMode) ? X86_XCPT_PF : X86_XCPT_UD;
+        bool            fOuterInvalidInstr = false;
+        unsigned        iCpuSetup;
+
+        /*
+         * Expand the instruction information into the state.
+         * Note! 16-bit will switch to a two level test header lookup once we exceed 64KB.
+         */
+        PCBS3CG1INSTR pInstr = &g_aBs3Cg1Instructions[iInstr];
+        pThis->iInstr                   = iInstr;
+        pThis->pTestHdr                 = (PCBS3CG1TESTHDR)&g_abBs3Cg1Tests[pInstr->offTests];
+        pThis->fFlags                   = pInstr->fFlags;
+        pThis->enmEncoding              = (BS3CG1ENC)pInstr->enmEncoding;
+        pThis->enmEncodingNonInvalid    = (BS3CG1ENC)pInstr->enmEncoding;
+        pThis->enmCpuTest               = (BS3CG1CPU)pInstr->enmCpuTest;
+        pThis->enmPrefixKind            = (BS3CG1PFXKIND)pInstr->enmPrefixKind;
+        pThis->enmXcptType              = (BS3CG1XCPTTYPE)pInstr->enmXcptType;
+        pThis->cchMnemonic              = pInstr->cchMnemonic;
+        if (pThis->fAdvanceMnemonic)
+            Bs3TestSubF("%s / %.*s", pThis->pszModeShort, pThis->cchMnemonic, pThis->pchMnemonic);
+        pThis->fAdvanceMnemonic         = pInstr->fAdvanceMnemonic;
+        pThis->cOperands                = pInstr->cOperands;
+        pThis->cbOpcodes                = pInstr->cbOpcodes;
+        switch (pThis->cOperands)
+        {
+            case 4: pThis->aenmOperands[3] = (BS3CG1OP)pThis->pabOperands[3];
+            case 3: pThis->aenmOperands[2] = (BS3CG1OP)pThis->pabOperands[2];
+            case 2: pThis->aenmOperands[1] = (BS3CG1OP)pThis->pabOperands[1];
+            case 1: pThis->aenmOperands[0] = (BS3CG1OP)pThis->pabOperands[0];
+        }
+        switch (pThis->cbOpcodes)
+        {
+            case 4: pThis->abOpcodes[3] = pThis->pabOpcodes[3];
+            case 3: pThis->abOpcodes[2] = pThis->pabOpcodes[2];
+            case 2: pThis->abOpcodes[1] = pThis->pabOpcodes[1];
+            case 1: pThis->abOpcodes[0] = pThis->pabOpcodes[0];
+        }
+
+        /*
+         * Check if the CPU supports the instruction.
+         */
+        if (   !Bs3Cg1CpuSetupFirst(pThis)
+            || (pThis->fFlags & (BS3CG1INSTR_F_UNUSED | BS3CG1INSTR_F_INVALID)))
+            fOuterInvalidInstr = true;
+
+        /* Switch the encoder for some of the invalid instructions on non-intel CPUs. */
+        if (   (pThis->fFlags & BS3CG1INSTR_F_INTEL_DECODES_INVALID)
+            && pThis->bCpuVendor != BS3CPUVENDOR_INTEL
+            && (   (pThis->fFlags & (BS3CG1INSTR_F_UNUSED | BS3CG1INSTR_F_INVALID))
+                || (BS3_MODE_IS_64BIT_CODE(pThis->bMode) && (pThis->fFlags & BS3CG1INSTR_F_INVALID_64BIT))
+                || fOuterInvalidInstr ) )
+            pThis->enmEncoding = Bs3Cg1CalcNoneIntelInvalidEncoding(pThis->enmEncoding);
+
+        for (iCpuSetup = 0;; iCpuSetup++)
+        {
+            unsigned iEncoding;
+            unsigned iEncodingNext;
+
+            /*
+             * Prep the operands and encoding handling.
+             */
+            if (!Bs3Cg1EncodePrep(pThis))
+                break;
+
+            /*
+             * Encode the instruction in various ways and check out the test values.
+             */
+            for (iEncoding = 0;; iEncoding = iEncodingNext)
+            {
+                /*
+                 * Encode the next instruction variation.
+                 */
+                pThis->fInvalidEncoding = fOuterInvalidInstr;
+                iEncodingNext = Bs3Cg1EncodeNext(pThis, iEncoding);
+                if (iEncodingNext <= iEncoding)
+                    break;
+                BS3CG1_DPRINTF(("\ndbg: Encoding #%u: cbCurInst=%u: %.*Rhxs  fInvalidEncoding=%d\n",
+                                iEncoding, pThis->cbCurInstr, pThis->cbCurInstr, pThis->abCurInstr, pThis->fInvalidEncoding));
+
+                /*
+                 * Do the rings.
+                 */
+                for (iRing = pThis->iFirstRing + pThis->fSameRingNotOkay; iRing < pThis->iEndRing; iRing++)
+                {
+                    PCBS3CG1TESTHDR pHdr;
+
+                    pThis->uCpl = iRing;
+                    BS3CG1_DPRINTF(("dbg:  Ring %u\n", iRing));
+
+                    /*
+                     * Do the tests one by one.
+                     */
+                    pHdr = pThis->pTestHdr;
+                    for (pThis->iTest = 0;; pThis->iTest++)
+                    {
+                        if (Bs3Cg1RunSelector(pThis, pHdr))
+                        {
+                            /* Okay, set up the execution context. */
+                            unsigned         offCode;
+                            uint8_t BS3_FAR *pbCode;
+
+                            Bs3MemCpy(&pThis->Ctx, &pThis->aInitialCtxs[iRing], sizeof(pThis->Ctx));
+                            if (pThis->fWorkExtCtx)
+                                Bs3ExtCtxCopy(pThis->pExtCtx, pThis->pInitialExtCtx);
+                            if (BS3_MODE_IS_PAGED(pThis->bMode))
+                            {
+                                offCode = X86_PAGE_SIZE - pThis->cbCurInstr;
+                                pbCode = &pThis->pbCodePg[offCode];
+                                //if (iEncoding > 0) { pbCode[-1] = 0xf4; offCode--; }
+                            }
+                            else
+                            {
+                                pbCode = pThis->pbCodePg;
+                                pbCode[pThis->cbCurInstr]     = 0x0f; /* UD2 */
+                                pbCode[pThis->cbCurInstr + 1] = 0x0b;
+                                offCode = 0;
+                            }
+                            pThis->Ctx.rip.u = pThis->CodePgRip + offCode;
+                            Bs3MemCpy(pbCode, pThis->abCurInstr, pThis->cbCurInstr);
+
+                            if (Bs3Cg1RunContextModifier(pThis, &pThis->Ctx, pHdr, pHdr->cbSelector, pHdr->cbInput, NULL, pbCode))
+                            {
+                                /* Run the instruction. */
+                                BS3CG1_DPRINTF(("dbg:  Running test #%u\n", pThis->iTest));
+                                //Bs3RegCtxPrint(&pThis->Ctx);
+                                if (pThis->fWorkExtCtx)
+                                    Bs3ExtCtxRestore(pThis->pExtCtx);
+                                Bs3TrapSetJmpAndRestore(&pThis->Ctx, &pThis->TrapFrame);
+                                if (pThis->fWorkExtCtx)
+                                    Bs3ExtCtxSave(pThis->pResultExtCtx);
+                                BS3CG1_DPRINTF(("dbg:  bXcpt=%#x rip=%RX64 -> %RX64\n",
+                                                pThis->TrapFrame.bXcpt, pThis->Ctx.rip.u, pThis->TrapFrame.Ctx.rip.u));
+
+                                /*
+                                 * Apply the output modification program to the context.
+                                 */
+                                pThis->Ctx.rflags.u32 &= ~X86_EFL_RF;
+                                pThis->Ctx.rflags.u32 |= pThis->TrapFrame.Ctx.rflags.u32 & X86_EFL_RF;
+                                pThis->bValueXcpt      = UINT8_MAX; //???
+                                if (   pThis->fInvalidEncoding
+                                    || pThis->bAlignmentXcpt != UINT8_MAX
+                                    || pThis->bValueXcpt     != UINT8_MAX
+                                    || Bs3Cg1RunContextModifier(pThis, &pThis->Ctx, pHdr,
+                                                                pHdr->cbSelector + pHdr->cbInput, pHdr->cbOutput,
+                                                                &pThis->TrapFrame.Ctx, NULL /*pbCode*/))
+                                {
+                                    Bs3Cg1CheckResult(pThis, bTestXcptExpected, false /*fInvalidEncodingPgFault*/, iEncoding);
+                                }
+
+                                /*
+                                 * If this is an invalid encoding or instruction, check that we
+                                 * get a page fault when shortening it by one byte.
+                                 * (Since we didn't execute the output context modifier, we don't
+                                 * need to re-initialize the start context.)
+                                 */
+                                if (   pThis->fInvalidEncoding
+                                    && BS3_MODE_IS_PAGED(pThis->bMode)
+                                    && pThis->cbCurInstr)
+                                {
+                                    pbCode  += 1;
+                                    offCode += 1;
+                                    pThis->Ctx.rip.u = pThis->CodePgRip + offCode;
+                                    Bs3MemCpy(pbCode, pThis->abCurInstr, pThis->cbCurInstr - 1);
+
+                                    /* Run the instruction. */
+                                    BS3CG1_DPRINTF(("dbg:  Running test #%u (cut short #PF)\n", pThis->iTest));
+                                    //Bs3RegCtxPrint(&pThis->Ctx);
+                                    if (pThis->fWorkExtCtx)
+                                        Bs3ExtCtxRestore(pThis->pExtCtx);
+                                    Bs3TrapSetJmpAndRestore(&pThis->Ctx, &pThis->TrapFrame);
+                                    if (pThis->fWorkExtCtx)
+                                        Bs3ExtCtxSave(pThis->pResultExtCtx);
+                                    BS3CG1_DPRINTF(("dbg:  bXcpt=%#x rip=%RX64 -> %RX64 (cut short #PF)\n",
+                                                    pThis->TrapFrame.bXcpt, pThis->Ctx.rip.u, pThis->TrapFrame.Ctx.rip.u));
+
+                                    /* Check it */
+                                    pThis->Ctx.rflags.u32 &= ~X86_EFL_RF;
+                                    pThis->Ctx.rflags.u32 |= pThis->TrapFrame.Ctx.rflags.u32 & X86_EFL_RF;
+                                    Bs3Cg1CheckResult(pThis, X86_XCPT_PF, true /*fInvalidEncodingPgFault*/, iEncoding);
+                                }
+                            }
+                        }
+                        else
+                            BS3CG1_DPRINTF(("dbg:  Skipping #%u\n", pThis->iTest));
+
+                        /* advance */
+                        if (pHdr->fLast)
+                        {
+                            BS3CG1_DPRINTF(("dbg:  Last\n\n"));
+                            break;
+                        }
+                        pHdr = (PCBS3CG1TESTHDR)((uint8_t BS3_FAR *)(pHdr + 1) + pHdr->cbInput + pHdr->cbOutput + pHdr->cbSelector);
+                    }
+                }
+            }
+
+            /*
+             * Clean up (segment registers, etc) and get the next CPU config.
+             */
+            Bs3Cg1EncodeCleanup(pThis);
+            if (!Bs3Cg1CpuSetupNext(pThis, iCpuSetup, &fOuterInvalidInstr))
+                break;
+            if (pThis->fFlags & (BS3CG1INSTR_F_UNUSED | BS3CG1INSTR_F_INVALID))
+                fOuterInvalidInstr = true;
+        }
+    }
+
+    return 0;
+}
+
+
+BS3_DECL_FAR(uint8_t) BS3_CMN_NM(Bs3Cg1Worker)(uint8_t bMode)
+{
+    uint8_t     bRet = 1;
+    BS3CG1STATE This;
+
+#if 0
+    /* (for debugging) */
+    if (bMode != BS3_MODE_PP32)
+        return BS3TESTDOMODE_SKIPPED;
+#endif
+
+    if (BS3_CMN_NM(Bs3Cg1Init)(&This, bMode))
+    {
+        bRet = BS3_CMN_NM(Bs3Cg1WorkerInner)(&This);
+        Bs3TestSubDone();
+    }
+    Bs3Cg1Destroy(&This);
+
+#if 0
+    /* (for debugging) */
+    //if (bMode == BS3_MODE_PP32)
+    {
+        Bs3TestTerm();
+        Bs3Shutdown();
+    }
+#endif
+    return bRet;
+}
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1.c
similarity index 78%
copy from src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c
copy to src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1.c
index 7e322d0..d0d44c8 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1.c
@@ -1,6 +1,6 @@
-/* $Id: bs3-cpu-instr-2.c $ */
+/* $Id: bs3-cpu-generated-1.c $ */
 /** @file
- * BS3Kit - bs3-cpu-instr-2, 16-bit C code.
+ * BS3Kit - bs3-cpu-generated-1, 16-bit C code.
  */
 
 /*
@@ -29,35 +29,30 @@
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
 #include <bs3kit.h>
+#include "bs3-cpu-generated-1.h"
 
 
 /*********************************************************************************************************************************
 *   Internal Functions                                                                                                           *
 *********************************************************************************************************************************/
-BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_mul);
-BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_imul);
-BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_div);
-BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_idiv);
+BS3TESTMODEBYMAX_PROTOTYPES_CMN(Bs3Cg1Worker);
 
 
 /*********************************************************************************************************************************
 *   Global Variables                                                                                                             *
 *********************************************************************************************************************************/
-static const BS3TESTMODEENTRY g_aModeTests[] =
+static const BS3TESTMODEBYMAXENTRY g_aModeTest[] =
 {
-    BS3TESTMODEENTRY_CMN("mul", bs3CpuInstr2_mul),
-    BS3TESTMODEENTRY_CMN("imul", bs3CpuInstr2_imul),
-    BS3TESTMODEENTRY_CMN("div", bs3CpuInstr2_div),
-    BS3TESTMODEENTRY_CMN("idiv", bs3CpuInstr2_idiv),
+    BS3TESTMODEBYMAXENTRY_CMN(NULL, Bs3Cg1Worker),
 };
 
 
 BS3_DECL(void) Main_rm()
 {
     Bs3InitAll_rm();
-    Bs3TestInit("bs3-cpu-instr-2");
+    Bs3TestInit("bs3-cpu-generated-1");
 
-    Bs3TestDoModes_rm(g_aModeTests, RT_ELEMENTS(g_aModeTests));
+    Bs3TestDoModesByMax_rm(g_aModeTest, RT_ELEMENTS(g_aModeTest));
 
     Bs3TestTerm();
 }
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1.h b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1.h
new file mode 100644
index 0000000..99a97ee
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-generated-1.h
@@ -0,0 +1,657 @@
+/* $Id: bs3-cpu-generated-1.h $ */
+/** @file
+ * BS3Kit - bs3-cpu-generated-1, common header file.
+ */
+
+/*
+ * Copyright (C) 2007-2016 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+#ifndef ___bs3_cpu_generated_1_h___
+#define ___bs3_cpu_generated_1_h___
+
+#include <bs3kit.h>
+#include <iprt/assert.h>
+
+
+/**
+ * Operand details.
+ *
+ * Currently simply using the encoding from the reference manuals.
+ */
+typedef enum BS3CG1OP
+{
+    BS3CG1OP_INVALID = 0,
+
+    BS3CG1OP_Eb,
+    BS3CG1OP_Ev,
+    BS3CG1OP_Wss,
+    BS3CG1OP_Wsd,
+    BS3CG1OP_Wps,
+    BS3CG1OP_Wpd,
+    BS3CG1OP_Wdq,
+    BS3CG1OP_WqZxReg,
+
+    BS3CG1OP_Gb,
+    BS3CG1OP_Gv,
+    BS3CG1OP_Uq,
+    BS3CG1OP_UqHi,
+    BS3CG1OP_Vss,
+    BS3CG1OP_VssZxReg,
+    BS3CG1OP_Vsd,
+    BS3CG1OP_Vps,
+    BS3CG1OP_Vpd,
+    BS3CG1OP_Vq,
+    BS3CG1OP_Vdq,
+
+    BS3CG1OP_Ib,
+    BS3CG1OP_Iz,
+
+    BS3CG1OP_AL,
+    BS3CG1OP_rAX,
+
+    BS3CG1OP_Ma,
+    BS3CG1OP_MbRO,
+    BS3CG1OP_MdRO,
+    BS3CG1OP_MdWO,
+    BS3CG1OP_Mq,
+
+    BS3CG1OP_END
+} BS3CG1OP;
+/** Pointer to a const operand enum. */
+typedef const BS3CG1OP BS3_FAR *PCBS3CG1OP;
+
+
+/**
+ * Instruction encoding format.
+ *
+ * This duplicates some of the info in the operand array, however it makes it
+ * easier to figure out encoding variations.
+ */
+typedef enum BS3CG1ENC
+{
+    BS3CG1ENC_INVALID = 0,
+
+    BS3CG1ENC_MODRM_Eb_Gb,
+    BS3CG1ENC_MODRM_Ev_Gv,
+    BS3CG1ENC_MODRM_Wss_Vss,
+    BS3CG1ENC_MODRM_Wsd_Vsd,
+    BS3CG1ENC_MODRM_Wps_Vps,
+    BS3CG1ENC_MODRM_Wpd_Vpd,
+    BS3CG1ENC_MODRM_WqZxReg_Vq,
+
+    BS3CG1ENC_MODRM_Gb_Eb,
+    BS3CG1ENC_MODRM_Gv_Ev,
+    BS3CG1ENC_MODRM_Gv_Ma, /**< bound instruction */
+    BS3CG1ENC_MODRM_Vq_UqHi,
+    BS3CG1ENC_MODRM_Vq_Mq,
+    BS3CG1ENC_MODRM_Vdq_Wdq,
+    BS3CG1ENC_MODRM_VssZxReg_Wss,
+    BS3CG1ENC_MODRM_MbRO,
+    BS3CG1ENC_MODRM_MdRO,
+    BS3CG1ENC_MODRM_MdWO,
+
+    BS3CG1ENC_VEX_MODRM_MdWO,
+
+    BS3CG1ENC_FIXED,
+    BS3CG1ENC_FIXED_AL_Ib,
+    BS3CG1ENC_FIXED_rAX_Iz,
+
+    BS3CG1ENC_MODRM_MOD_EQ_3, /**< Unused or invalid instruction. */
+    BS3CG1ENC_MODRM_MOD_NE_3, /**< Unused or invalid instruction. */
+
+    BS3CG1ENC_END
+} BS3CG1ENC;
+
+
+/**
+ * Prefix sensitivitiy kind.
+ */
+typedef enum BS3CG1PFXKIND
+{
+    BS3CG1PFXKIND_INVALID = 0,
+
+    BS3CG1PFXKIND_NO_F2_F3_66,           /**< No 66, F2 or F3 prefixes allowed as that would alter the meaning. */
+    BS3CG1PFXKIND_REQ_F2,                /**< Requires F2 (REPNE) prefix as part of the instr encoding. */
+    BS3CG1PFXKIND_REQ_F3,                /**< Requires F3 (REPE) prefix as part of the instr encoding. */
+    BS3CG1PFXKIND_REQ_66,                /**< Requires 66 (OP SIZE) prefix as part of the instr encoding.  */
+
+    /** @todo more work to be done here...   */
+    BS3CG1PFXKIND_MODRM,
+    BS3CG1PFXKIND_MODRM_NO_OP_SIZES,
+
+    BS3CG1PFXKIND_END
+} BS3CG1PFXKIND;
+
+/**
+ * CPU selection or CPU ID.
+ */
+typedef enum BS3CG1CPU
+{
+    /** Works with an CPU. */
+    BS3CG1CPU_ANY = 0,
+    BS3CG1CPU_GE_80186,
+    BS3CG1CPU_GE_80286,
+    BS3CG1CPU_GE_80386,
+    BS3CG1CPU_GE_80486,
+    BS3CG1CPU_GE_Pentium,
+
+    BS3CG1CPU_SSE,
+    BS3CG1CPU_SSE2,
+    BS3CG1CPU_SSE3,
+    BS3CG1CPU_AVX,
+    BS3CG1CPU_AVX2,
+    BS3CG1CPU_CLFSH,
+    BS3CG1CPU_CLFLUSHOPT,
+
+    BS3CG1CPU_END
+} BS3CG1CPU;
+
+
+/**
+ * SSE & AVX exception types.
+ */
+typedef enum BS3CG1XCPTTYPE
+{
+    BS3CG1XCPTTYPE_NONE = 0,
+    /* SSE: */
+    BS3CG1XCPTTYPE_1,
+    BS3CG1XCPTTYPE_2,
+    BS3CG1XCPTTYPE_3,
+    BS3CG1XCPTTYPE_4,
+    BS3CG1XCPTTYPE_4UA,
+    BS3CG1XCPTTYPE_5,
+    BS3CG1XCPTTYPE_6,
+    BS3CG1XCPTTYPE_7,
+    BS3CG1XCPTTYPE_8,
+    BS3CG1XCPTTYPE_11,
+    BS3CG1XCPTTYPE_12,
+    /* EVEX: */
+    BS3CG1XCPTTYPE_E1,
+    BS3CG1XCPTTYPE_E1NF,
+    BS3CG1XCPTTYPE_E2,
+    BS3CG1XCPTTYPE_E3,
+    BS3CG1XCPTTYPE_E3NF,
+    BS3CG1XCPTTYPE_E4,
+    BS3CG1XCPTTYPE_E4NF,
+    BS3CG1XCPTTYPE_E5,
+    BS3CG1XCPTTYPE_E5NF,
+    BS3CG1XCPTTYPE_E6,
+    BS3CG1XCPTTYPE_E6NF,
+    BS3CG1XCPTTYPE_E7NF,
+    BS3CG1XCPTTYPE_E9,
+    BS3CG1XCPTTYPE_E9NF,
+    BS3CG1XCPTTYPE_E10,
+    BS3CG1XCPTTYPE_E11,
+    BS3CG1XCPTTYPE_E12,
+    BS3CG1XCPTTYPE_E12NF,
+    BS3CG1XCPTTYPE_END
+} BS3CG1XCPTTYPE;
+AssertCompile(BS3CG1XCPTTYPE_END <= 32);
+
+
+/**
+ * Generated instruction info.
+ */
+typedef struct BS3CG1INSTR
+{
+    /** The opcode size.   */
+    uint32_t    cbOpcodes : 2;
+    /** The number of operands.   */
+    uint32_t    cOperands : 2;
+    /** The length of the mnemonic. */
+    uint32_t    cchMnemonic : 4;
+    /** Whether to advance the mnemonic array pointer. */
+    uint32_t    fAdvanceMnemonic : 1;
+    /** Offset into g_abBs3Cg1Tests of the first test. */
+    uint32_t    offTests : 23;
+    /** BS3CG1ENC values. */
+    uint32_t    enmEncoding : 10;
+    /** BS3CG1PFXKIND values. */
+    uint32_t    enmPrefixKind : 4;
+    /** CPU test / CPU ID bit test (BS3CG1CPU). */
+    uint32_t    enmCpuTest : 6;
+    /** Exception type (BS3CG1XCPTTYPE)   */
+    uint32_t    enmXcptType : 5;
+    /** Currently unused bits. */
+    uint32_t    uUnused : 6;
+    /** BS3CG1INSTR_F_XXX. */
+    uint32_t    fFlags;
+} BS3CG1INSTR;
+AssertCompileSize(BS3CG1INSTR, 12);
+/** Pointer to a const instruction. */
+typedef BS3CG1INSTR const BS3_FAR *PCBS3CG1INSTR;
+
+
+/** @name BS3CG1INSTR_F_XXX
+ * @{ */
+/** Defaults to SS rather than DS. */
+#define BS3CG1INSTR_F_DEF_SS                UINT32_C(0x00000001)
+/** Invalid instruction in 64-bit mode. */
+#define BS3CG1INSTR_F_INVALID_64BIT         UINT32_C(0x00000002)
+/** Unused instruction. */
+#define BS3CG1INSTR_F_UNUSED                UINT32_C(0x00000004)
+/** Invalid instruction. */
+#define BS3CG1INSTR_F_INVALID               UINT32_C(0x00000008)
+/** Only intel does full ModR/M(, ++) decoding for invalid instruction.
+ * Always used with BS3CG1INSTR_F_INVALID or BS3CG1INSTR_F_UNUSED. */
+#define BS3CG1INSTR_F_INTEL_DECODES_INVALID UINT32_C(0x00000010)
+/** @} */
+
+
+/**
+ * Test header.
+ */
+typedef struct BS3CG1TESTHDR
+{
+    /** The size of the selector program in bytes.
+     * This is also the offset of the input context modification program.  */
+    uint32_t    cbSelector : 8;
+    /** The size of the input context modification program in bytes.
+     * This immediately follows the selector program.  */
+    uint32_t    cbInput    : 12;
+    /** The size of the output context modification program in bytes.
+     * This immediately follows the input context modification program.  The
+     * program takes the result of the input program as starting point. */
+    uint32_t    cbOutput   : 11;
+    /** Indicates whether this is the last test or not. */
+    uint32_t    fLast      : 1;
+} BS3CG1TESTHDR;
+AssertCompileSize(BS3CG1TESTHDR, 4);
+/** Pointer to a const test header. */
+typedef BS3CG1TESTHDR const BS3_FAR *PCBS3CG1TESTHDR;
+
+/** @name Opcode format for the BS3CG1 context modifier.
+ *
+ * Used by both the input and output context programs.
+ *
+ * The most common operations are encoded as a single byte opcode followed by
+ * one or more immediate bytes with data.
+ *
+ * @{ */
+#define BS3CG1_CTXOP_SIZE_MASK      UINT8_C(0x07)
+#define BS3CG1_CTXOP_1_BYTE         UINT8_C(0x00)
+#define BS3CG1_CTXOP_2_BYTES        UINT8_C(0x01)
+#define BS3CG1_CTXOP_4_BYTES        UINT8_C(0x02)
+#define BS3CG1_CTXOP_8_BYTES        UINT8_C(0x03)
+#define BS3CG1_CTXOP_16_BYTES       UINT8_C(0x04)
+#define BS3CG1_CTXOP_32_BYTES       UINT8_C(0x05)
+#define BS3CG1_CTXOP_12_BYTES       UINT8_C(0x06)
+#define BS3CG1_CTXOP_SIZE_ESC       UINT8_C(0x07)   /**< Separate byte encoding the value size following any destination escape byte. */
+
+#define BS3CG1_CTXOP_DST_MASK       UINT8_C(0x18)
+#define BS3CG1_CTXOP_OP1            UINT8_C(0x00)
+#define BS3CG1_CTXOP_OP2            UINT8_C(0x08)
+#define BS3CG1_CTXOP_EFL            UINT8_C(0x10)
+#define BS3CG1_CTXOP_DST_ESC        UINT8_C(0x18)   /**< Separate byte giving the destination follows immediately. */
+
+#define BS3CG1_CTXOP_SIGN_EXT       UINT8_C(0x20)   /**< Whether to sign-extend (set) the immediate value. */
+
+#define BS3CG1_CTXOP_OPERATOR_MASK  UINT8_C(0xc0)
+#define BS3CG1_CTXOP_ASSIGN         UINT8_C(0x00)   /**< Simple assignment operator (=) */
+#define BS3CG1_CTXOP_OR             UINT8_C(0x40)   /**< OR assignment operator (|=). */
+#define BS3CG1_CTXOP_AND            UINT8_C(0x80)   /**< AND assignment operator (&=). */
+#define BS3CG1_CTXOP_AND_INV        UINT8_C(0xc0)   /**< AND assignment operator of the inverted value (&~=). */
+/** @} */
+
+/**
+ * Escaped destination values
+ *
+ * These are just uppercased versions of TestInOut.kdFields, where dots are
+ * replaced by underscores.
+ */
+typedef enum BS3CG1DST
+{
+    BS3CG1DST_INVALID = 0,
+    /* Operands. */
+    BS3CG1DST_OP1,
+    BS3CG1DST_OP2,
+    BS3CG1DST_OP3,
+    BS3CG1DST_OP4,
+    /* Flags. */
+    BS3CG1DST_EFL,
+    BS3CG1DST_EFL_UNDEF, /**< Special field only valid in output context modifiers: EFLAGS |= Value & Ouput.EFLAGS; */
+    /* 8-bit GPRs. */
+    BS3CG1DST_AL,
+    BS3CG1DST_CL,
+    BS3CG1DST_DL,
+    BS3CG1DST_BL,
+    BS3CG1DST_AH,
+    BS3CG1DST_CH,
+    BS3CG1DST_DH,
+    BS3CG1DST_BH,
+    BS3CG1DST_SPL,
+    BS3CG1DST_BPL,
+    BS3CG1DST_SIL,
+    BS3CG1DST_DIL,
+    BS3CG1DST_R8L,
+    BS3CG1DST_R9L,
+    BS3CG1DST_R10L,
+    BS3CG1DST_R11L,
+    BS3CG1DST_R12L,
+    BS3CG1DST_R13L,
+    BS3CG1DST_R14L,
+    BS3CG1DST_R15L,
+    /* 16-bit GPRs. */
+    BS3CG1DST_AX,
+    BS3CG1DST_CX,
+    BS3CG1DST_DX,
+    BS3CG1DST_BX,
+    BS3CG1DST_SP,
+    BS3CG1DST_BP,
+    BS3CG1DST_SI,
+    BS3CG1DST_DI,
+    BS3CG1DST_R8W,
+    BS3CG1DST_R9W,
+    BS3CG1DST_R10W,
+    BS3CG1DST_R11W,
+    BS3CG1DST_R12W,
+    BS3CG1DST_R13W,
+    BS3CG1DST_R14W,
+    BS3CG1DST_R15W,
+    /* 32-bit GPRs. */
+    BS3CG1DST_EAX,
+    BS3CG1DST_ECX,
+    BS3CG1DST_EDX,
+    BS3CG1DST_EBX,
+    BS3CG1DST_ESP,
+    BS3CG1DST_EBP,
+    BS3CG1DST_ESI,
+    BS3CG1DST_EDI,
+    BS3CG1DST_R8D,
+    BS3CG1DST_R9D,
+    BS3CG1DST_R10D,
+    BS3CG1DST_R11D,
+    BS3CG1DST_R12D,
+    BS3CG1DST_R13D,
+    BS3CG1DST_R14D,
+    BS3CG1DST_R15D,
+    /* 64-bit GPRs. */
+    BS3CG1DST_RAX,
+    BS3CG1DST_RCX,
+    BS3CG1DST_RDX,
+    BS3CG1DST_RBX,
+    BS3CG1DST_RSP,
+    BS3CG1DST_RBP,
+    BS3CG1DST_RSI,
+    BS3CG1DST_RDI,
+    BS3CG1DST_R8,
+    BS3CG1DST_R9,
+    BS3CG1DST_R10,
+    BS3CG1DST_R11,
+    BS3CG1DST_R12,
+    BS3CG1DST_R13,
+    BS3CG1DST_R14,
+    BS3CG1DST_R15,
+    /* 16-bit, 32-bit or 64-bit registers according to operand size. */
+    BS3CG1DST_OZ_RAX,
+    BS3CG1DST_OZ_RCX,
+    BS3CG1DST_OZ_RDX,
+    BS3CG1DST_OZ_RBX,
+    BS3CG1DST_OZ_RSP,
+    BS3CG1DST_OZ_RBP,
+    BS3CG1DST_OZ_RSI,
+    BS3CG1DST_OZ_RDI,
+    BS3CG1DST_OZ_R8,
+    BS3CG1DST_OZ_R9,
+    BS3CG1DST_OZ_R10,
+    BS3CG1DST_OZ_R11,
+    BS3CG1DST_OZ_R12,
+    BS3CG1DST_OZ_R13,
+    BS3CG1DST_OZ_R14,
+    BS3CG1DST_OZ_R15,
+
+    /* Control registers.*/
+    BS3CG1DST_CR0,
+    BS3CG1DST_CR4,
+    BS3CG1DST_XCR0,
+
+    /* FPU registers. */
+    BS3CG1DST_FPU_FIRST,
+    BS3CG1DST_FCW = BS3CG1DST_FPU_FIRST,
+    BS3CG1DST_FSW,
+    BS3CG1DST_FTW,
+    BS3CG1DST_FOP,
+    BS3CG1DST_FPUIP,
+    BS3CG1DST_FPUCS,
+    BS3CG1DST_FPUDP,
+    BS3CG1DST_FPUDS,
+    BS3CG1DST_MXCSR,
+    BS3CG1DST_ST0,
+    BS3CG1DST_ST1,
+    BS3CG1DST_ST2,
+    BS3CG1DST_ST3,
+    BS3CG1DST_ST4,
+    BS3CG1DST_ST5,
+    BS3CG1DST_ST6,
+    BS3CG1DST_ST7,
+    /* MMX registers. */
+    BS3CG1DST_MM0,
+    BS3CG1DST_MM1,
+    BS3CG1DST_MM2,
+    BS3CG1DST_MM3,
+    BS3CG1DST_MM4,
+    BS3CG1DST_MM5,
+    BS3CG1DST_MM6,
+    BS3CG1DST_MM7,
+    /* SSE registers. */
+    BS3CG1DST_XMM0,
+    BS3CG1DST_XMM1,
+    BS3CG1DST_XMM2,
+    BS3CG1DST_XMM3,
+    BS3CG1DST_XMM4,
+    BS3CG1DST_XMM5,
+    BS3CG1DST_XMM6,
+    BS3CG1DST_XMM7,
+    BS3CG1DST_XMM8,
+    BS3CG1DST_XMM9,
+    BS3CG1DST_XMM10,
+    BS3CG1DST_XMM11,
+    BS3CG1DST_XMM12,
+    BS3CG1DST_XMM13,
+    BS3CG1DST_XMM14,
+    BS3CG1DST_XMM15,
+    BS3CG1DST_XMM0_LO,
+    BS3CG1DST_XMM1_LO,
+    BS3CG1DST_XMM2_LO,
+    BS3CG1DST_XMM3_LO,
+    BS3CG1DST_XMM4_LO,
+    BS3CG1DST_XMM5_LO,
+    BS3CG1DST_XMM6_LO,
+    BS3CG1DST_XMM7_LO,
+    BS3CG1DST_XMM8_LO,
+    BS3CG1DST_XMM9_LO,
+    BS3CG1DST_XMM10_LO,
+    BS3CG1DST_XMM11_LO,
+    BS3CG1DST_XMM12_LO,
+    BS3CG1DST_XMM13_LO,
+    BS3CG1DST_XMM14_LO,
+    BS3CG1DST_XMM15_LO,
+    BS3CG1DST_XMM0_HI,
+    BS3CG1DST_XMM1_HI,
+    BS3CG1DST_XMM2_HI,
+    BS3CG1DST_XMM3_HI,
+    BS3CG1DST_XMM4_HI,
+    BS3CG1DST_XMM5_HI,
+    BS3CG1DST_XMM6_HI,
+    BS3CG1DST_XMM7_HI,
+    BS3CG1DST_XMM8_HI,
+    BS3CG1DST_XMM9_HI,
+    BS3CG1DST_XMM10_HI,
+    BS3CG1DST_XMM11_HI,
+    BS3CG1DST_XMM12_HI,
+    BS3CG1DST_XMM13_HI,
+    BS3CG1DST_XMM14_HI,
+    BS3CG1DST_XMM15_HI,
+    BS3CG1DST_XMM0_LO_ZX,
+    BS3CG1DST_XMM1_LO_ZX,
+    BS3CG1DST_XMM2_LO_ZX,
+    BS3CG1DST_XMM3_LO_ZX,
+    BS3CG1DST_XMM4_LO_ZX,
+    BS3CG1DST_XMM5_LO_ZX,
+    BS3CG1DST_XMM6_LO_ZX,
+    BS3CG1DST_XMM7_LO_ZX,
+    BS3CG1DST_XMM8_LO_ZX,
+    BS3CG1DST_XMM9_LO_ZX,
+    BS3CG1DST_XMM10_LO_ZX,
+    BS3CG1DST_XMM11_LO_ZX,
+    BS3CG1DST_XMM12_LO_ZX,
+    BS3CG1DST_XMM13_LO_ZX,
+    BS3CG1DST_XMM14_LO_ZX,
+    BS3CG1DST_XMM15_LO_ZX,
+    BS3CG1DST_XMM0_DW0,
+    BS3CG1DST_XMM1_DW0,
+    BS3CG1DST_XMM2_DW0,
+    BS3CG1DST_XMM3_DW0,
+    BS3CG1DST_XMM4_DW0,
+    BS3CG1DST_XMM5_DW0,
+    BS3CG1DST_XMM6_DW0,
+    BS3CG1DST_XMM7_DW0,
+    BS3CG1DST_XMM8_DW0,
+    BS3CG1DST_XMM9_DW0,
+    BS3CG1DST_XMM10_DW0,
+    BS3CG1DST_XMM11_DW0,
+    BS3CG1DST_XMM12_DW0,
+    BS3CG1DST_XMM13_DW0,
+    BS3CG1DST_XMM14_DW0,
+    BS3CG1DST_XMM15_DW0,
+    BS3CG1DST_XMM0_DW0_ZX,
+    BS3CG1DST_XMM1_DW0_ZX,
+    BS3CG1DST_XMM2_DW0_ZX,
+    BS3CG1DST_XMM3_DW0_ZX,
+    BS3CG1DST_XMM4_DW0_ZX,
+    BS3CG1DST_XMM5_DW0_ZX,
+    BS3CG1DST_XMM6_DW0_ZX,
+    BS3CG1DST_XMM7_DW0_ZX,
+    BS3CG1DST_XMM8_DW0_ZX,
+    BS3CG1DST_XMM9_DW0_ZX,
+    BS3CG1DST_XMM10_DW0_ZX,
+    BS3CG1DST_XMM11_DW0_ZX,
+    BS3CG1DST_XMM12_DW0_ZX,
+    BS3CG1DST_XMM13_DW0_ZX,
+    BS3CG1DST_XMM14_DW0_ZX,
+    BS3CG1DST_XMM15_DW0_ZX,
+    /* AVX registers. */
+    BS3CG1DST_YMM0,
+    BS3CG1DST_YMM1,
+    BS3CG1DST_YMM2,
+    BS3CG1DST_YMM3,
+    BS3CG1DST_YMM4,
+    BS3CG1DST_YMM5,
+    BS3CG1DST_YMM6,
+    BS3CG1DST_YMM7,
+    BS3CG1DST_YMM8,
+    BS3CG1DST_YMM9,
+    BS3CG1DST_YMM10,
+    BS3CG1DST_YMM11,
+    BS3CG1DST_YMM12,
+    BS3CG1DST_YMM13,
+    BS3CG1DST_YMM14,
+    BS3CG1DST_YMM15,
+
+    /* Special fields: */
+    BS3CG1DST_SPECIAL_START,
+    BS3CG1DST_VALUE_XCPT = BS3CG1DST_SPECIAL_START, /**< Expected exception based on input or result. */
+
+    BS3CG1DST_END
+} BS3CG1DST;
+AssertCompile(BS3CG1DST_END <= 256);
+
+/** @name Selector opcode definitions.
+ *
+ * Selector programs are very simple, they are zero or more predicate tests
+ * that are ANDed together.  If a predicate test fails, the test is skipped.
+ *
+ * One instruction is encoded as byte, where the first bit indicates what kind
+ * of test and the 7 remaining bits indicates which predicate to check.
+ *
+ * @{ */
+#define BS3CG1SEL_OP_KIND_MASK  UINT8_C(0x01)   /**< The operator part (put in lower bit to reduce switch value range). */
+#define BS3CG1SEL_OP_IS_TRUE    UINT8_C(0x00)   /**< Check that the predicate is true. */
+#define BS3CG1SEL_OP_IS_FALSE   UINT8_C(0x01)   /**< Check that the predicate is false. */
+#define BS3CG1SEL_OP_PRED_SHIFT 1               /**< Shift factor for getting/putting a BS3CG1PRED value into/from a byte. */
+/** @} */
+
+/**
+ * Test selector predicates (values are shifted by BS3CG1SEL_OP_PRED_SHIFT).
+ */
+typedef enum BS3CG1PRED
+{
+    BS3CG1PRED_INVALID = 0,
+
+    /* Operand size. */
+    BS3CG1PRED_SIZE_O16,
+    BS3CG1PRED_SIZE_O32,
+    BS3CG1PRED_SIZE_O64,
+    /* Execution ring. */
+    BS3CG1PRED_RING_0,
+    BS3CG1PRED_RING_1,
+    BS3CG1PRED_RING_2,
+    BS3CG1PRED_RING_3,
+    BS3CG1PRED_RING_0_THRU_2,
+    BS3CG1PRED_RING_1_THRU_3,
+    /* Basic code mode. */
+    BS3CG1PRED_CODE_64BIT,
+    BS3CG1PRED_CODE_32BIT,
+    BS3CG1PRED_CODE_16BIT,
+    /* CPU modes. */
+    BS3CG1PRED_MODE_REAL,
+    BS3CG1PRED_MODE_PROT,
+    BS3CG1PRED_MODE_LONG,
+    BS3CG1PRED_MODE_V86,
+    BS3CG1PRED_MODE_SMM,
+    BS3CG1PRED_MODE_VMX,
+    BS3CG1PRED_MODE_SVM,
+    /* Paging on/off */
+    BS3CG1PRED_PAGING_ON,
+    BS3CG1PRED_PAGING_OFF,
+    /* CPU Vendors. */
+    BS3CG1PRED_VENDOR_AMD,
+    BS3CG1PRED_VENDOR_INTEL,
+    BS3CG1PRED_VENDOR_VIA,
+
+    BS3CG1PRED_END
+} BS3CG1PRED;
+
+
+/** The test instructions (generated). */
+extern const BS3CG1INSTR BS3_FAR_DATA   g_aBs3Cg1Instructions[];
+/** The number of test instructions (generated). */
+extern const uint16_t BS3_FAR_DATA      g_cBs3Cg1Instructions;
+/** The mnemonics (generated).
+ * Variable length sequence of mnemonics that runs in parallel to
+ * g_aBs3Cg1Instructions. */
+extern const char BS3_FAR_DATA          g_achBs3Cg1Mnemonics[];
+/** The opcodes (generated).
+ * Variable length sequence of opcode bytes that runs in parallel to
+ * g_aBs3Cg1Instructions, advancing by BS3CG1INSTR::cbOpcodes each time. */
+extern const uint8_t BS3_FAR_DATA       g_abBs3Cg1Opcodes[];
+/** The operands (generated).
+ * Variable length sequence of opcode values (BS3CG1OP) that runs in
+ * parallel to g_aBs3Cg1Instructions, advancing by BS3CG1INSTR::cOperands. */
+extern const uint8_t BS3_FAR_DATA       g_abBs3Cg1Operands[];
+/** The test data that BS3CG1INSTR.
+ * In order to simplify generating these, we use a byte array. */
+extern const uint8_t BS3_FAR_DATA       g_abBs3Cg1Tests[];
+
+
+#endif
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2-template.c b/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2-template.c
index 2656539..22418ba 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2-template.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2-template.c
@@ -50,6 +50,16 @@ extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_imul_xBX_ud2);
 extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_imul_xCX_xBX_ud2);
 extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_div_xBX_ud2);
 extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_idiv_xBX_ud2);
+# if ARCH_BITS == 64
+extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_cmpxchg16b_rdi_ud2);
+extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_lock_cmpxchg16b_rdi_ud2);
+extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_o16_cmpxchg16b_rdi_ud2);
+extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_lock_o16_cmpxchg16b_rdi_ud2);
+extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_repz_cmpxchg16b_rdi_ud2);
+extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_lock_repz_cmpxchg16b_rdi_ud2);
+extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_repnz_cmpxchg16b_rdi_ud2);
+extern FNBS3FAR     BS3_CMN_NM(bs3CpuInstr2_lock_repnz_cmpxchg16b_rdi_ud2);
+# endif
 #endif
 
 
@@ -536,6 +546,131 @@ BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_idiv)(uint8_t bMode)
 }
 
 
+# if ARCH_BITS == 64
+BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_cmpxchg16b)(uint8_t bMode)
+{
+    BS3REGCTX       Ctx;
+    BS3REGCTX       ExpectCtx;
+    BS3TRAPFRAME    TrapFrame;
+    RTUINT128U      au128[3];
+    PRTUINT128U     pau128       = RT_ALIGN_PT(&au128[0], sizeof(RTUINT128U), PRTUINT128U);
+    bool const      fSupportCX16 = RT_BOOL(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16);
+    unsigned        iFlags;
+    unsigned        offBuf;
+    unsigned        iMatch;
+    unsigned        iWorker;
+    static struct
+    {
+        bool        fLocked;
+        uint8_t     offUd2;
+        FNBS3FAR   *pfnWorker;
+    } const s_aWorkers[] =
+    {
+        {   false,  4,  BS3_CMN_NM(bs3CpuInstr2_cmpxchg16b_rdi_ud2) },
+        {   false,  5,  BS3_CMN_NM(bs3CpuInstr2_o16_cmpxchg16b_rdi_ud2) },
+        {   false,  5,  BS3_CMN_NM(bs3CpuInstr2_repz_cmpxchg16b_rdi_ud2) },
+        {   false,  5,  BS3_CMN_NM(bs3CpuInstr2_repnz_cmpxchg16b_rdi_ud2) },
+        {   true, 1+4,  BS3_CMN_NM(bs3CpuInstr2_lock_cmpxchg16b_rdi_ud2) },
+        {   true, 1+5,  BS3_CMN_NM(bs3CpuInstr2_lock_o16_cmpxchg16b_rdi_ud2) },
+        {   true, 1+5,  BS3_CMN_NM(bs3CpuInstr2_lock_repz_cmpxchg16b_rdi_ud2) },
+        {   true, 1+5,  BS3_CMN_NM(bs3CpuInstr2_lock_repnz_cmpxchg16b_rdi_ud2) },
+    };
+
+    /* Ensure the structures are allocated before we sample the stack pointer. */
+    Bs3MemSet(&Ctx, 0, sizeof(Ctx));
+    Bs3MemSet(&ExpectCtx, 0, sizeof(ExpectCtx));
+    Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
+    Bs3MemSet(pau128, 0, sizeof(pau128[0]) * 2);
+
+    /*
+     * Create test context.
+     */
+    Bs3RegCtxSaveEx(&Ctx, bMode, 512);
+    if (!fSupportCX16)
+        Bs3TestPrintf("Note! CMPXCHG16B is not supported by the CPU!\n");
+
+    /*
+     * One loop with the normal variant and one with the locked one
+     */
+    g_usBs3TestStep = 0;
+    for (iWorker = 0; iWorker < RT_ELEMENTS(s_aWorkers); iWorker++)
+    {
+        Bs3RegCtxSetRipCsFromCurPtr(&Ctx, s_aWorkers[iWorker].pfnWorker);
+
+        /*
+         * One loop with all status flags set, and one with them clear.
+         */
+        Ctx.rflags.u16 |= X86_EFL_STATUS_BITS;
+        for (iFlags = 0; iFlags < 2; iFlags++)
+        {
+            Bs3MemCpy(&ExpectCtx, &Ctx, sizeof(ExpectCtx));
+
+            for (offBuf = 0; offBuf < sizeof(RTUINT128U); offBuf++)
+            {
+#  define CX16_OLD_LO       UINT64_C(0xabb6345dcc9c4bbd)
+#  define CX16_OLD_HI       UINT64_C(0x7b06ea35749549ab)
+#  define CX16_MISMATCH_LO  UINT64_C(0xbace3e3590f18981)
+#  define CX16_MISMATCH_HI  UINT64_C(0x9b385e8bfd5b4000)
+#  define CX16_STORE_LO     UINT64_C(0x5cbd27d251f6559b)
+#  define CX16_STORE_HI     UINT64_C(0x17ff434ed1b54963)
+
+                PRTUINT128U pBuf = (PRTUINT128U)&pau128->au8[offBuf];
+
+                ExpectCtx.rax.u = Ctx.rax.u = CX16_MISMATCH_LO;
+                ExpectCtx.rdx.u = Ctx.rdx.u = CX16_MISMATCH_HI;
+                for (iMatch = 0; iMatch < 2; iMatch++)
+                {
+                    uint8_t bExpectXcpt;
+                    pBuf->s.Lo = CX16_OLD_LO;
+                    pBuf->s.Hi = CX16_OLD_HI;
+                    ExpectCtx.rdi.u = Ctx.rdi.u = (uintptr_t)pBuf;
+                    Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame);
+                    g_usBs3TestStep++;
+                    //Bs3TestPrintf("Test: iFlags=%d offBuf=%d iMatch=%u iWorker=%u\n", iFlags, offBuf, iMatch, iWorker);
+                    bExpectXcpt = X86_XCPT_UD;
+                    if (fSupportCX16)
+                    {
+                        if (offBuf & 15)
+                        {
+                            bExpectXcpt = X86_XCPT_GP;
+                            ExpectCtx.rip.u = Ctx.rip.u;
+                            ExpectCtx.rflags.u32 = Ctx.rflags.u32;
+                        }
+                        else
+                        {
+                            ExpectCtx.rax.u = CX16_OLD_LO;
+                            ExpectCtx.rdx.u = CX16_OLD_HI;
+                            if (iMatch & 1)
+                                ExpectCtx.rflags.u32 = Ctx.rflags.u32 | X86_EFL_ZF;
+                            else
+                                ExpectCtx.rflags.u32 = Ctx.rflags.u32 & ~X86_EFL_ZF;
+                            ExpectCtx.rip.u = Ctx.rip.u + s_aWorkers[iWorker].offUd2;
+                        }
+                        ExpectCtx.rflags.u32 |= X86_EFL_RF;
+                    }
+                    if (   !Bs3TestCheckRegCtxEx(&TrapFrame.Ctx, &ExpectCtx, 0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/,
+                                                 0 /*fExtraEfl*/, "lm64", 0 /*idTestStep*/)
+                        || TrapFrame.bXcpt != bExpectXcpt)
+                    {
+                        if (TrapFrame.bXcpt != bExpectXcpt)
+                            Bs3TestFailedF("Expected bXcpt=#%x, got %#x (%#x)", bExpectXcpt, TrapFrame.bXcpt, TrapFrame.uErrCd);
+                        Bs3TestFailedF("^^^ iWorker=%d iFlags=%d offBuf=%d iMatch=%u\n", iWorker, iFlags, offBuf, iMatch);
+                        ASMHalt();
+                    }
+
+                    ExpectCtx.rax.u = Ctx.rax.u = CX16_OLD_LO;
+                    ExpectCtx.rdx.u = Ctx.rdx.u = CX16_OLD_HI;
+                }
+            }
+            Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS;
+        }
+    }
+
+    return 0;
+}
+# endif /* ARCH_BITS == 64 */
+
+
 #endif /* BS3_INSTANTIATING_CMN */
 
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2-template.mac b/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2-template.mac
index 85bdeed..fa3422a 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2-template.mac
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2-template.mac
@@ -82,6 +82,87 @@ BS3_PROC_BEGIN_CMN bs3CpuInstr2_idiv_xBX_ud2, BS3_PBC_NEAR
         jmp     .again
 BS3_PROC_END_CMN   bs3CpuInstr2_idiv_xBX_ud2
 
+
+ %if TMPL_BITS == 64
+BS3_PROC_BEGIN_CMN bs3CpuInstr2_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR
+        cmpxchg16b [rdi]
+.again:
+        ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 4)
+BS3_PROC_END_CMN   bs3CpuInstr2_cmpxchg16b_rdi_ud2
+
+
+BS3_PROC_BEGIN_CMN bs3CpuInstr2_lock_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR
+        lock cmpxchg16b [rdi]
+.again:
+        ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 5)
+BS3_PROC_END_CMN   bs3CpuInstr2_lock_cmpxchg16b_rdi_ud2
+
+
+BS3_PROC_BEGIN_CMN bs3CpuInstr2_o16_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR
+        o16 cmpxchg16b [rdi]
+.again:
+        ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 5)
+BS3_PROC_END_CMN   bs3CpuInstr2_o16_cmpxchg16b_rdi_ud2
+
+
+BS3_PROC_BEGIN_CMN bs3CpuInstr2_lock_o16_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR
+        db 0f0h, 066h
+        cmpxchg16b [rdi]
+.again:
+        ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 6)
+BS3_PROC_END_CMN   bs3CpuInstr2_lock_o16_cmpxchg16b_rdi_ud2
+
+
+BS3_PROC_BEGIN_CMN bs3CpuInstr2_repz_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR
+        repz cmpxchg16b [rdi]
+.again:
+        ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 5)
+BS3_PROC_END_CMN   bs3CpuInstr2_repz_cmpxchg16b_rdi_ud2
+
+
+BS3_PROC_BEGIN_CMN bs3CpuInstr2_lock_repz_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR
+        db 0f0h, 0f3h
+        cmpxchg16b [rdi]
+.again:
+        ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 6)
+BS3_PROC_END_CMN   bs3CpuInstr2_lock_repz_cmpxchg16b_rdi_ud2
+
+BS3_PROC_BEGIN_CMN bs3CpuInstr2_repnz_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR
+        repnz cmpxchg16b [rdi]
+.again:
+        ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 5)
+BS3_PROC_END_CMN   bs3CpuInstr2_repnz_cmpxchg16b_rdi_ud2
+
+
+BS3_PROC_BEGIN_CMN bs3CpuInstr2_lock_repnz_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR
+        db 0f0h, 0f2h
+        cmpxchg16b [rdi]
+.again:
+        ud2
+        jmp     .again
+AssertCompile(.again - BS3_LAST_LABEL == 6)
+BS3_PROC_END_CMN   bs3CpuInstr2_lock_repnz_cmpxchg16b_rdi_ud2
+
+;; @todo figure out this fudge. sigh.
+times (348) db 0cch ; fudge to avoid 'rderr' during boot.
+
+ %endif ; TMPL_BITS == 64
+
+
 %endif ; BS3_INSTANTIATING_CMN
 
 %include "bs3kit-template-footer.mac"   ; reset environment
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c b/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c
index 7e322d0..b048752 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-2.c
@@ -38,6 +38,7 @@ BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_mul);
 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_imul);
 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_div);
 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_idiv);
+BS3TESTMODE_PROTOTYPES_CMN_64(bs3CpuInstr2_cmpxchg16b);
 
 
 /*********************************************************************************************************************************
@@ -49,6 +50,7 @@ static const BS3TESTMODEENTRY g_aModeTests[] =
     BS3TESTMODEENTRY_CMN("imul", bs3CpuInstr2_imul),
     BS3TESTMODEENTRY_CMN("div", bs3CpuInstr2_div),
     BS3TESTMODEENTRY_CMN("idiv", bs3CpuInstr2_idiv),
+    BS3TESTMODEENTRY_CMN_64("cmpxchg16b", bs3CpuInstr2_cmpxchg16b),
 };
 
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/Makefile.kmk b/src/VBox/ValidationKit/bootsectors/bs3kit/Makefile.kmk
index 1054907..b71422f 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/Makefile.kmk
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/Makefile.kmk
@@ -54,7 +54,9 @@ bs3-bootsector_SOURCES  = bs3-bootsector.asm
 VBOX_BS3KIT_COMMON_SOURCES = \
 	bs3-cmn-A20Disable.asm \
 	bs3-cmn-A20Enable.asm \
+	bs3-cmn-GetCpuVendor.c \
 	bs3-cmn-GetModeName.c \
+	bs3-cmn-GetModeNameShortLower.c \
 	bs3-cmn-KbdRead.asm \
 	bs3-cmn-KbdWait.asm \
 	bs3-cmn-KbdWrite.asm \
@@ -82,12 +84,14 @@ VBOX_BS3KIT_COMMON_SOURCES = \
        bs3-cmn-MemAllocZ.c \
        bs3-cmn-MemFree.c \
        bs3-cmn-MemGuardedTestPage.c \
+       bs3-cmn-MemPrintInfo.c \
        bs3-cmn-PagingData.c \
        bs3-cmn-PagingInitRootForPP.c \
        bs3-cmn-PagingInitRootForPAE.c \
        bs3-cmn-PagingInitRootForLM.c \
        bs3-cmn-PagingAlias.c \
        bs3-cmn-PagingProtect.c \
+       bs3-cmn-PagingQueryAddressInfo.c \
        bs3-cmn-PagingSetupCanonicalTraps.c \
        bs3-cmn-pic-data.c \
        bs3-cmn-PicMaskAll.c \
@@ -105,6 +109,13 @@ VBOX_BS3KIT_COMMON_SOURCES = \
        bs3-cmn-RegCtxSetRipCsFromCurPtr.c \
        bs3-cmn-RegCtxSetRipCsFromFlat.c \
        bs3-cmn-RegCtxSetRipCsFromLnkPtr.c \
+       bs3-cmn-ExtCtxInit.c \
+       bs3-cmn-ExtCtxSave.asm \
+       bs3-cmn-ExtCtxRestore.asm \
+       bs3-cmn-ExtCtxGetSize.c \
+       bs3-cmn-ExtCtxAlloc.c \
+       bs3-cmn-ExtCtxFree.c \
+       bs3-cmn-ExtCtxCopy.c \
        bs3-cmn-SelFar32ToFlat32.c \
        bs3-cmn-SelFar32ToFlat32NoClobber.asm \
        bs3-cmn-SelProtFar32ToFlat32.c \
@@ -118,6 +129,8 @@ VBOX_BS3KIT_COMMON_SOURCES = \
        bs3-cmn-SelProtFar16DataToFlat.asm \
        bs3-cmn-SelFlatDataToProtFar16.asm \
        bs3-cmn-SelFlatDataToRealMode.asm \
+       bs3-cmn-SelSetup16BitData.c \
+       bs3-cmn-SelSetup16BitCode.c \
        bs3-cmn-SlabInit.c \
        bs3-cmn-SlabAlloc.c \
        bs3-cmn-SlabAllocEx.c \
@@ -186,6 +199,10 @@ VBOX_BS3KIT_COMMON_SOURCES = \
        ../../../Runtime/common/asm/ASMSerializeInstruction-cpuid.asm \
        ../../../Runtime/common/asm/ASMSerializeInstruction-iret.asm \
        ../../../Runtime/common/asm/ASMSerializeInstruction-rdtscp.asm \
+       ../../../Runtime/common/asm/ASMCpuIdExSlow.asm \
+       ../../../Runtime/common/asm/ASMCpuId.asm \
+       ../../../Runtime/common/asm/ASMGetXcr0.asm \
+       ../../../Runtime/common/asm/ASMSetXcr0.asm \
 
 # The 16-bit BS3Kit library.
 LIBRARIES += bs3kit-common-16
@@ -222,6 +239,10 @@ bs3kit-common-16_bs3-cmn-UInt64Div.c_CFLAGS = -oh -d0 # -d1+ vs -d0 saves 0x6a3-
 
 $(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,ASMMemFirstMismatchingU8,8)
 $(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,ASMMemFirstNonZero,6)
+$(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,ASMCpuIdExSlow,32)
+$(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,ASMCpuId,20)
+$(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,ASMSetXcr0,8)
+$(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,ASMGetXcr0,0)
 -include $(PATH_SUB_CURRENT)/bs3kit-autostubs.kmk # manually generated from headers, see bottom of this file.
 
 # The 32-bit BS3Kit library.
@@ -260,6 +281,7 @@ bs3kit-common-64_SOURCES  = $(VBOX_BS3KIT_COMMON_SOURCES) \
 #
 VBOX_BS3KIT_MODE_SOURCES = \
 	bs3-mode-Name.asm \
+	bs3-mode-NameShortLower.asm \
 	bs3-mode-SwitchToRM.asm \
 	bs3-mode-SwitchToPE16.asm \
 	bs3-mode-SwitchToPE16_32.asm \
@@ -282,6 +304,7 @@ VBOX_BS3KIT_MODE_SOURCES = \
 	bs3-mode-SwitchToLM64.asm \
 	bs3-mode-SwitchToLM32.asm \
 	bs3-mode-SwitchToLM16.asm \
+	bs3-mode-SwitchTo32BitAndCallC.asm \
 	bs3-mode-EnteredMode.asm \
 	bs3-mode-PagingGetRootForPP16.asm \
 	bs3-mode-PagingGetRootForPP32.asm \
@@ -292,6 +315,7 @@ VBOX_BS3KIT_MODE_SOURCES = \
 	bs3-mode-TrapSystemCallHandler.asm \
 	bs3-mode-TestDoModes.c \
 	bs3-mode-TestDoModesByOne.c \
+	bs3-mode-TestDoModesByMax.c \
 	bs3-mode-TestDoModesHlp.asm \
 
 # The 16-bit real mode BS3Kit library.
@@ -302,7 +326,9 @@ bs3kit-rm_DEFS     = TMPL_MODE=BS3_MODE_RM
 bs3kit-rm_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
 	bs3-first-rm.asm \
 	bs3-mode-CpuDetect.asm \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 
 # The 16-bit BS3Kit library for 16-bit protected kernel+tss.
@@ -312,7 +338,9 @@ bs3kit-pe16_INSTTYPE = none
 bs3kit-pe16_DEFS     = TMPL_MODE=BS3_MODE_PE16
 bs3kit-pe16_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
 	bs3-first-pe16.asm \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 #	bs3-mode-CpuDetect.asm
 
 # The 32-bit BS3Kit library for 16-bit protected kernel+tss.
@@ -321,7 +349,9 @@ bs3kit-pe16_32_TEMPLATE = VBoxBS3KitImg32
 bs3kit-pe16_32_INSTTYPE = none
 bs3kit-pe16_32_DEFS     = TMPL_MODE=BS3_MODE_PE16_32
 bs3kit-pe16_32_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 # The v86 BS3Kit library for 16-bit protected kernel+tss.
 LIBRARIES += bs3kit-pe16_v86
@@ -329,7 +359,9 @@ bs3kit-pe16_v86_TEMPLATE = VBoxBS3KitImg
 bs3kit-pe16_v86_INSTTYPE = none
 bs3kit-pe16_v86_DEFS     = TMPL_MODE=BS3_MODE_PE16_V86
 bs3kit-pe16_v86_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 # The 32-bit BS3Kit library for 32-bit protected kernel+tss.
 LIBRARIES += bs3kit-pe32
@@ -345,7 +377,9 @@ bs3kit-pe32_16_TEMPLATE = VBoxBS3KitImg
 bs3kit-pe32_16_INSTTYPE = none
 bs3kit-pe32_16_DEFS     = TMPL_MODE=BS3_MODE_PE32_16
 bs3kit-pe32_16_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 # The v8086 BS3Kit library for 32-bit protected kernel+tss.
 LIBRARIES += bs3kit-pev86
@@ -361,7 +395,9 @@ bs3kit-pp16_INSTTYPE = none
 bs3kit-pp16_DEFS     = TMPL_MODE=BS3_MODE_PP16
 bs3kit-pp16_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
 	bs3-mode-CpuDetect.asm \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 # The 32-bit BS3Kit library for 16-bit paged protected kernel+tss.
 LIBRARIES += bs3kit-pp16_32
@@ -391,7 +427,9 @@ bs3kit-pp32_16_TEMPLATE = VBoxBS3KitImg
 bs3kit-pp32_16_INSTTYPE = none
 bs3kit-pp32_16_DEFS     = TMPL_MODE=BS3_MODE_PP32_16
 bs3kit-pp32_16_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 # The v8086 BS3Kit library for 32-bit paged protected kernel+tss.
 LIBRARIES += bs3kit-ppv86
@@ -408,7 +446,9 @@ bs3kit-pae16_INSTTYPE = none
 bs3kit-pae16_DEFS     = TMPL_MODE=BS3_MODE_PAE16
 bs3kit-pae16_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
 	bs3-mode-CpuDetect.asm \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 # The 16-bit BS3Kit library for 16-bit PAE paged protected kernel+tss.
 LIBRARIES += bs3kit-pae16_32
@@ -437,7 +477,9 @@ bs3kit-pae32_16_TEMPLATE = VBoxBS3KitImg
 bs3kit-pae32_16_INSTTYPE = none
 bs3kit-pae32_16_DEFS     = TMPL_MODE=BS3_MODE_PAE32_16
 bs3kit-pae32_16_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 # The v8086 BS3Kit library for 32-bit PAE paged protected kernel+tss.
 LIBRARIES += bs3kit-paev86
@@ -453,7 +495,9 @@ bs3kit-lm16_TEMPLATE = VBoxBS3KitImg
 bs3kit-lm16_INSTTYPE = none
 bs3kit-lm16_DEFS     = TMPL_MODE=BS3_MODE_LM16
 bs3kit-lm16_SOURCES  = $(VBOX_BS3KIT_MODE_SOURCES) \
+	bs3-mode-TestDoModesStub.asm \
 	bs3-mode-TestDoModesByOneStub.asm \
+	bs3-mode-TestDoModesByMaxStub.asm \
 
 # The 32-bit long mode BS3Kit library.
 LIBRARIES += bs3kit-lm32
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/VBoxBs3ObjConverter.cpp b/src/VBox/ValidationKit/bootsectors/bs3kit/VBoxBs3ObjConverter.cpp
index 27913fe..1611453 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/VBoxBs3ObjConverter.cpp
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/VBoxBs3ObjConverter.cpp
@@ -70,6 +70,9 @@
 *********************************************************************************************************************************/
 /** Verbosity level. */
 static unsigned g_cVerbose = 0;
+/** Indicates that it's output from the 16-bit watcom C or C++ compiler.
+ * We will do some massaging for fixup records when this is used.  */
+static bool     g_f16BitWatcomC = false;
 
 
 /**
@@ -628,14 +631,28 @@ static bool omfWriter_LNamesEnd(POMFWRITER pThis)
 }
 
 
-static bool omfWriter_SegDef(POMFWRITER pThis, uint8_t bSegAttr, uint32_t cbSeg, uint16_t idxSegName, uint16_t idxSegClass)
+static bool omfWriter_SegDef(POMFWRITER pThis, uint8_t bSegAttr, uint32_t cbSeg, uint16_t idxSegName, uint16_t idxSegClass,
+                             uint16_t idxOverlay = 1 /* NULL entry */)
 {
     return omfWriter_RecBegin(pThis, OMF_SEGDEF32)
         && omfWriter_RecAddU8(pThis, bSegAttr)
         && omfWriter_RecAddU32(pThis, cbSeg)
         && omfWriter_RecAddIdx(pThis, idxSegName)
         && omfWriter_RecAddIdx(pThis, idxSegClass)
-        && omfWriter_RecAddIdx(pThis, 1) /* overlay name index = NULL entry */
+        && omfWriter_RecAddIdx(pThis, idxOverlay)
+        && omfWriter_RecEndWithCrc(pThis);
+}
+
+static bool omfWriter_SegDef16(POMFWRITER pThis, uint8_t bSegAttr, uint32_t cbSeg, uint16_t idxSegName, uint16_t idxSegClass,
+                               uint16_t idxOverlay = 1 /* NULL entry */)
+{
+    Assert(cbSeg <= UINT16_MAX);
+    return omfWriter_RecBegin(pThis, OMF_SEGDEF16)
+        && omfWriter_RecAddU8(pThis, bSegAttr)
+        && omfWriter_RecAddU16(pThis, cbSeg)
+        && omfWriter_RecAddIdx(pThis, idxSegName)
+        && omfWriter_RecAddIdx(pThis, idxSegClass)
+        && omfWriter_RecAddIdx(pThis, idxOverlay)
         && omfWriter_RecEndWithCrc(pThis);
 }
 
@@ -1608,6 +1625,8 @@ static bool convertElfSymbolsToPubDefsAndExtDefs(POMFWRITER pThis, PCELFDETAILS
                                  bBind, iSym, pszSymName);
             }
         }
+        else if (idxSection == SHN_COMMON)
+            return error(pThis->pszSrc, "Symbol #%u (%s) is in the unsupported 'common' section.\n", iSym, pszSymName);
         else
             return error(pThis->pszSrc, "Unsupported or invalid section number %#x for symbol #%u (%s)\n",
                          idxSection, iSym, pszSymName);
@@ -3698,6 +3717,7 @@ typedef struct OMFSEGDEF
     const char *pchClass;
     const char *pchOverlay;
     bool        fUse32;
+    bool        f32bitRec;
 } OMFSEGDEF;
 typedef OMFSEGDEF *POMFSEGDEF;
 
@@ -3710,7 +3730,7 @@ typedef struct OMFGRPDEF
     uint16_t    idxName;
     uint8_t     cchName;
     uint16_t    cSegDefs;
-    uint16_t   *pidxSegDefs;
+    uint16_t   *paidxSegDefs;
 } OMFGRPDEF;
 typedef OMFGRPDEF *POMFGRPDEF;
 
@@ -3759,12 +3779,27 @@ typedef OMFSEGLINES *POMFSEGLINES;
 
 
 /**
+ * OMF details allocation that needs to be freed when done.
+ */
+typedef struct OMFDETAILSALLOC
+{
+    /** Pointer to the next allocation. */
+    struct OMFDETAILSALLOC *pNext;
+    /** The allocated bytes. */
+    uint8_t                 abData[RT_FLEXIBLE_ARRAY];
+} OMFDETAILSALLOC;
+typedef OMFDETAILSALLOC *POMFDETAILSALLOC;
+
+/**
  * OMF conversion details.
  *
  * Keeps information relevant to the conversion and CV8 debug info.
  */
 typedef struct OMFDETAILS
 {
+    /** The input file name. */
+    const char     *pszFile;
+
     /** Set if it has line numbers. */
     bool            fLineNumbers;
     /** Set if we think this may be a 32-bit OMF file. */
@@ -3801,6 +3836,8 @@ typedef struct OMFDETAILS
         const char *pszClass1;
         /** The secondary class name. */
         const char *pszClass2;
+        /** The main segment name, NULL if not applicable (CGROUP16). */
+        const char *pszSeg;
         /** The name length. */
         uint8_t     cchName;
         /** The primary class name length. */
@@ -3833,6 +3870,9 @@ typedef struct OMFDETAILS
     uint32_t        cSegLines;
     /** Segment line numbers, indexed by segment number. */
     POMFSEGLINES    paSegLines;
+
+    /** List of allocations that needs freeing. */
+    POMFDETAILSALLOC    pAllocHead;
 } OMFDETAILS;
 typedef OMFDETAILS *POMFDETAILS;
 typedef OMFDETAILS const *PCOMFDETAILS;
@@ -3869,6 +3909,35 @@ typedef OMFDETAILS const *PCOMFDETAILS;
         else return error("???", "Out of memory!\n"); \
     } while (0)
 
+#define OMF_EXPLODE_LNAME(a_pOmfStuff, a_idxName, a_pchName, a_cchName, a_Name) \
+            do { \
+                if ((a_idxName) < (a_pOmfStuff)->cLNames) \
+                { \
+                    a_cchName = (uint8_t)*(a_pOmfStuff)->papchLNames[(a_idxName)]; \
+                    a_pchName = (a_pOmfStuff)->papchLNames[(a_idxName)] + 1; \
+                } \
+                else return error((a_pOmfStuff)->pszFile, "Invalid LNAME reference %#x in " #a_Name "!\n", a_idxName); \
+            } while (0)
+
+
+/**
+ * Allocates memory that will be freed when we're done converting.
+ *
+ * @returns Pointer tot he memory.
+ * @param   pOmfStuff   The OMF details data.
+ * @param   cbNeeded    The amount of memory required.
+ */
+static void *omfDetails_Alloc(POMFDETAILS pOmfStuff, size_t cbNeeded)
+{
+    POMFDETAILSALLOC pAlloc = (POMFDETAILSALLOC)malloc(RT_OFFSETOF(OMFDETAILSALLOC, abData[cbNeeded]));
+    if (pAlloc)
+    {
+        pAlloc->pNext = pOmfStuff->pAllocHead;
+        pOmfStuff->pAllocHead = pAlloc;
+        return &pAlloc->abData[0];
+    }
+    return NULL;
+}
 
 /**
  * Adds a line number to the CV8 debug info.
@@ -4071,6 +4140,7 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
     uint32_t        offSrcInfo = UINT32_MAX;
 
     memset(pOmfStuff, 0, sizeof(*pOmfStuff));
+    pOmfStuff->pszFile      = pszFile;
     pOmfStuff->iDebSymNm    = UINT16_MAX;
     pOmfStuff->iSymbolsNm   = UINT16_MAX;
     pOmfStuff->iSymbolsSeg  = UINT16_MAX;
@@ -4087,21 +4157,22 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
     pOmfStuff->cGrpDefs = 1;
 
     /* Groups we seek. */
-#define OMF_INIT_WANTED_GROUP(a_idx, a_szName, a_szClass1, a_szClass2, a_idxReplace) \
+#define OMF_INIT_WANTED_GROUP(a_idx, a_szName, a_szClass1, a_szClass2, a_pszSeg, a_idxReplace) \
         pOmfStuff->aGroups[a_idx].pszName   = a_szName; \
         pOmfStuff->aGroups[a_idx].cchName   = sizeof(a_szName) - 1; \
         pOmfStuff->aGroups[a_idx].pszClass1 = a_szClass1; \
         pOmfStuff->aGroups[a_idx].cchClass1 = sizeof(a_szClass1) - 1; \
         pOmfStuff->aGroups[a_idx].pszClass2 = a_szClass2; \
         pOmfStuff->aGroups[a_idx].cchClass2 = sizeof(a_szClass2) - 1; \
+        pOmfStuff->aGroups[a_idx].pszSeg    = a_pszSeg; \
         pOmfStuff->aGroups[a_idx].fNeeded   = false; \
         pOmfStuff->aGroups[a_idx].idxGroup  = UINT16_MAX; \
         pOmfStuff->aGroups[a_idx].idxName   = UINT16_MAX; \
         pOmfStuff->aGroups[a_idx].idxReplaceGrp = a_idxReplace
-    OMF_INIT_WANTED_GROUP(0, "CGROUP16",         "BS3CLASS16CODE",   "CODE", OMF_REPLACE_GRP_CGROUP16);
-    OMF_INIT_WANTED_GROUP(1, "BS3GROUPRMTEXT16", "BS3CLASS16RMCODE", "",     OMF_REPLACE_GRP_RMCODE);
-    OMF_INIT_WANTED_GROUP(2, "BS3GROUPX0TEXT16", "BS3CLASS16X0CODE", "",     OMF_REPLACE_GRP_X0CODE);
-    OMF_INIT_WANTED_GROUP(3, "BS3GROUPX1TEXT16", "BS3CLASS16X1CODE", "",     OMF_REPLACE_GRP_X1CODE);
+    OMF_INIT_WANTED_GROUP(0, "CGROUP16",         "BS3CLASS16CODE",   "CODE", NULL,          OMF_REPLACE_GRP_CGROUP16);
+    OMF_INIT_WANTED_GROUP(1, "BS3GROUPRMTEXT16", "BS3CLASS16RMCODE", "",     "BS3RMTEXT16", OMF_REPLACE_GRP_RMCODE);
+    OMF_INIT_WANTED_GROUP(2, "BS3GROUPX0TEXT16", "BS3CLASS16X0CODE", "",     "BS3X0TEXT16", OMF_REPLACE_GRP_X0CODE);
+    OMF_INIT_WANTED_GROUP(3, "BS3GROUPX1TEXT16", "BS3CLASS16X1CODE", "",     "BS3X1TEXT16", OMF_REPLACE_GRP_X1CODE);
 
     /*
      * Process the OMF records.
@@ -4141,15 +4212,6 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
                 a_u32 = RT_MAKE_U32_FROM_U8(pbRec[offRec], pbRec[offRec + 1], pbRec[offRec + 2], pbRec[offRec + 3]); \
                 offRec += 4; \
             } while (0)
-#define OMF_EXPLODE_LNAME(a_idxName, a_pchName, a_cchName, a_Name) \
-            do { \
-                if ((a_idxName) < pOmfStuff->cLNames) \
-                { \
-                    a_cchName = (uint8_t)*pOmfStuff->papchLNames[(a_idxName)]; \
-                    a_pchName = pOmfStuff->papchLNames[(a_idxName)] + 1; \
-                } \
-                else return error(pszFile, "Invalid LNAME reference %#x in " #a_Name "!\n", a_idxName); \
-            } while (0)
 
         switch (bRecType)
         {
@@ -4236,6 +4298,7 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
             case OMF_PUBDEF32:
             case OMF_LPUBDEF32:
                 pOmfStuff->fProbably32bit = true;
+                /* fall thru */
             case OMF_PUBDEF16:
             case OMF_LPUBDEF16:
                 if (g_cVerbose > 0)
@@ -4304,8 +4367,9 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
                 POMFSEGDEF pSegDef = &pOmfStuff->paSegDefs[pOmfStuff->cSegDefs++];
 
                 OMF_CHECK_RET(1 + (bRecType == OMF_SEGDEF16 ? 2 : 4) + 1 + 1 + 1, SEGDEF);
-                pSegDef->bSegAttr = pbRec[offRec++];
-                pSegDef->fUse32   = pSegDef->bSegAttr & 1;
+                pSegDef->f32bitRec  = bRecType == OMF_SEGDEF32;
+                pSegDef->bSegAttr   = pbRec[offRec++];
+                pSegDef->fUse32     = pSegDef->bSegAttr & 1;
                 if ((pSegDef->bSegAttr >> 5) == 0)
                 {
                     /* A=0: skip frame number of offset. */
@@ -4319,14 +4383,14 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
                 OMF_READ_IDX(pSegDef->idxName, SEGDEF);
                 OMF_READ_IDX(pSegDef->idxClass, SEGDEF);
                 OMF_READ_IDX(pSegDef->idxOverlay, SEGDEF);
-                OMF_EXPLODE_LNAME(pSegDef->idxName, pSegDef->pchName, pSegDef->cchName, SEGDEF);
-                OMF_EXPLODE_LNAME(pSegDef->idxClass, pSegDef->pchClass, pSegDef->cchClass, SEGDEF);
-                OMF_EXPLODE_LNAME(pSegDef->idxOverlay, pSegDef->pchOverlay, pSegDef->cchOverlay, SEGDEF);
+                OMF_EXPLODE_LNAME(pOmfStuff, pSegDef->idxName, pSegDef->pchName, pSegDef->cchName, SEGDEF);
+                OMF_EXPLODE_LNAME(pOmfStuff, pSegDef->idxClass, pSegDef->pchClass, pSegDef->cchClass, SEGDEF);
+                OMF_EXPLODE_LNAME(pOmfStuff, pSegDef->idxOverlay, pSegDef->pchOverlay, pSegDef->cchOverlay, SEGDEF);
                 break;
             }
 
             /*
-             * Must count segment definitions to figure the index of our segment.
+             * Must count segment definitions to figure the index of our group.
              */
             case OMF_GRPDEF:
             {
@@ -4334,7 +4398,7 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
                 POMFGRPDEF pGrpDef = &pOmfStuff->paGrpDefs[pOmfStuff->cGrpDefs];
 
                 OMF_READ_IDX(pGrpDef->idxName, GRPDEF);
-                OMF_EXPLODE_LNAME(pGrpDef->idxName, pGrpDef->pchName, pGrpDef->cchName, GRPDEF);
+                OMF_EXPLODE_LNAME(pOmfStuff, pGrpDef->idxName, pGrpDef->pchName, pGrpDef->cchName, GRPDEF);
 
                 unsigned j = RT_ELEMENTS(pOmfStuff->aGroups);
                 while (j-- > 0)
@@ -4345,14 +4409,14 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
                     }
 
                 pGrpDef->cSegDefs    = 0;
-                pGrpDef->pidxSegDefs = NULL;
-                while (offRec + 2 + 1 < cbRec)
+                pGrpDef->paidxSegDefs = NULL;
+                while (offRec + 2 + 1 <= cbRec)
                 {
                     if (pbRec[offRec] != 0xff)
                         return error(pszFile, "Unsupported GRPDEF member type: %#x\n", pbRec[offRec]);
                     offRec++;
-                    OMF_GROW_TABLE_RET_ERR(uint16_t, pGrpDef->pidxSegDefs, pGrpDef->cSegDefs, 16);
-                    OMF_READ_IDX(pGrpDef->pidxSegDefs[pGrpDef->cSegDefs], GRPDEF);
+                    OMF_GROW_TABLE_RET_ERR(uint16_t, pGrpDef->paidxSegDefs, pGrpDef->cSegDefs, 16);
+                    OMF_READ_IDX(pGrpDef->paidxSegDefs[pGrpDef->cSegDefs], GRPDEF);
                     pGrpDef->cSegDefs++;
                 }
                 pOmfStuff->cGrpDefs++;
@@ -4441,125 +4505,284 @@ static bool collectOmfDetails(const char *pszFile, uint8_t const *pbFile, size_t
 
 
 /**
- * Writes the debug segment definitions (names too).
+ * Adds a LNAMES entry (returns existing).
  *
  * @returns success indicator.
- * @param   pThis       The OMF writer.
- * @param   pOmfStuff   The OMF stuff with CV8 line number info.
+ * @param   pOmfStuff       The OMF stuff.
+ * @param   pszName         The name to add.
+ * @param   pidxName        Where to return the name index.
  */
-static bool convertOmfWriteDebugSegDefs(POMFWRITER pThis, POMFDETAILS pOmfStuff)
+static bool omfDetails_AddLName(POMFDETAILS pOmfStuff, const char *pszName, uint16_t *pidxName)
 {
-    if (   pOmfStuff->cSegLines == 0
-        || pOmfStuff->iSymbolsSeg != UINT16_MAX)
-        return true;
+    size_t const cchName = strlen(pszName);
 
     /*
-     * Emit the LNAMES we need.
+     * Check if we've already got the name.
      */
-#if 1
-    if (   pOmfStuff->iSymbolsNm == UINT16_MAX
-        || pOmfStuff->iDebSymNm == UINT16_MAX)
+    for (unsigned iName = 1; iName < pOmfStuff->cLNames; iName++)
+        if (   (unsigned char)pOmfStuff->papchLNames[iName][0] == cchName
+            && memcmp(pOmfStuff->papchLNames[iName] + 1, pszName, cchName) == 0)
+        {
+            *pidxName = iName;
+            return true;
+        }
+
+    /*
+     * Not found, append it.
+     */
+    char *pszCopy = (char *)omfDetails_Alloc(pOmfStuff, cchName + 2);
+    if (!pszCopy)
+        return false;
+    *(unsigned char *)&pszCopy[0] = (unsigned char)cchName;
+    memcpy(pszCopy + 1, pszName, cchName + 1);
+
+    OMF_GROW_TABLE_RET_ERR(char *, pOmfStuff->papchLNames, pOmfStuff->cLNames, 16);
+    pOmfStuff->papchLNames[pOmfStuff->cLNames] = (char *)pszCopy;
+    *pidxName = pOmfStuff->cLNames;
+    pOmfStuff->cLNames++;
+    return true;
+}
+
+
+/**
+ * Adds a SEGDEF (always adds a new one).
+ *
+ * @returns success indicator.
+ * @param   pOmfStuff   The OMF stuff.
+ * @param   bSegAttr    The OMF segment attributes.
+ * @param   cbSeg       The segment size.
+ * @param   idxSegName  The LNAMES index of the segment name.
+ * @param   idxSegClas  The LNAMES index of the segment class.
+ * @param   idxOverlay  The LNAMES index of the overlay name; pass 1.
+ * @param   fRec32      Set if SEGDEF32 should be emitted, clear for SEGDEF16.
+ * @param   pidxSeg     Where to return the segment index.
+ */
+static bool omfDetails_AddSegDef(POMFDETAILS pOmfStuff, uint8_t bSegAttr, uint32_t cbSeg, uint16_t idxSegName,
+                                 uint16_t idxSegClass, uint16_t idxOverlay, bool fRec32, uint16_t *pidxSeg)
+{
+    Assert(cbSeg <= UINT16_MAX || fRec32);
+    Assert(idxSegName < pOmfStuff->cLNames);
+    Assert(idxSegClass < pOmfStuff->cLNames);
+
+    OMF_GROW_TABLE_RET_ERR(OMFSEGDEF, pOmfStuff->paSegDefs, pOmfStuff->cSegDefs, 16);
+    POMFSEGDEF pSegDef = &pOmfStuff->paSegDefs[pOmfStuff->cSegDefs];
+
+    pSegDef->bSegAttr   = bSegAttr;
+    pSegDef->fUse32     = bSegAttr & 1;
+    pSegDef->f32bitRec  = fRec32;
+    pSegDef->cbSeg      = cbSeg;
+    pSegDef->idxName    = idxSegName;
+    pSegDef->idxClass   = idxSegClass;
+    pSegDef->idxOverlay = idxOverlay;
+
+    OMF_EXPLODE_LNAME(pOmfStuff, pSegDef->idxName, pSegDef->pchName, pSegDef->cchName, SEGDEF);
+    OMF_EXPLODE_LNAME(pOmfStuff, pSegDef->idxClass, pSegDef->pchClass, pSegDef->cchClass, SEGDEF);
+    OMF_EXPLODE_LNAME(pOmfStuff, pSegDef->idxOverlay, pSegDef->pchOverlay, pSegDef->cchOverlay, SEGDEF);
+
+    *pidxSeg = pOmfStuff->cSegDefs;
+    pOmfStuff->cSegDefs++;
+    return true;
+}
+
+
+/**
+ * Adds a SEGDEF if not found.
+ *
+ * @returns success indicator.
+ * @param   pOmfStuff   The OMF stuff.
+ * @param   bSegAttr    The OMF segment attributes.
+ * @param   cbSeg       The segment size.
+ * @param   idxSegName  The LNAMES index of the segment name.
+ * @param   idxSegClas  The LNAMES index of the segment class.
+ * @param   idxOverlay  The LNAMES index of the overlay name; pass 1.
+ * @param   fRec32      Set if SEGDEF32 should be emitted, clear for SEGDEF16.
+ * @param   pidxSeg     Where to return the segment index.
+ */
+static bool omfDetails_AddSegDefIfNeeded(POMFDETAILS pOmfStuff, uint8_t bSegAttr, uint32_t cbSeg, uint16_t idxSegName,
+                                         uint16_t idxSegClass, uint16_t idxOverlay, bool fRec32, uint16_t *pidxSeg)
+{
+    /* Search for name */
+    for (unsigned iSegDef = 1; iSegDef < pOmfStuff->cSegDefs; iSegDef++)
     {
-        if (   !omfWriter_LNamesBegin(pThis, true /*fAddZeroEntry*/)
-            || (   pOmfStuff->iSymbolsNm == UINT16_MAX
-                && !omfWriter_LNamesAdd(pThis, "$$SYMBOLS", &pOmfStuff->iSymbolsNm))
-            || (   pOmfStuff->iDebSymNm == UINT16_MAX
-                && !omfWriter_LNamesAdd(pThis, "DEBSYM", &pOmfStuff->iDebSymNm))
-            || !omfWriter_LNamesEnd(pThis) )
-            return false;
+        POMFSEGDEF pSegDef = &pOmfStuff->paSegDefs[iSegDef];
+        if (pSegDef->idxName == idxSegName)
+        {
+            if (   pSegDef->bSegAttr   != bSegAttr
+                || pSegDef->f32bitRec  != fRec32
+                || pSegDef->idxName    != idxSegName
+                || pSegDef->idxClass   != idxSegClass
+                || pSegDef->idxOverlay != idxOverlay)
+                return error(pOmfStuff->pszFile,
+                             "Existing SEGDEF differs: bSegAttr=%#x vs %#x, f32bitRec=%d vs %d, idxName=%#x vs %#x, idxClass=%#x vs %#x, idxOverlay=%#x vs %#x\n",
+                             pSegDef->bSegAttr,   bSegAttr,
+                             pSegDef->f32bitRec,  fRec32,
+                             pSegDef->idxName,    idxSegName,
+                             pSegDef->idxClass,   idxSegClass,
+                             pSegDef->idxOverlay, idxOverlay);
+            *pidxSeg = iSegDef;
+            return true;
+        }
     }
-#else
-    if (   !omfWriter_LNamesBegin(pThis, true /*fAddZeroEntry*/)
-        || !omfWriter_LNamesAdd(pThis, "$$SYMBOLS2", &pOmfStuff->iSymbolsNm)
-        || !omfWriter_LNamesAdd(pThis, "DEBSYM2", &pOmfStuff->iDebSymNm)
-        || !omfWriter_LNamesEnd(pThis) )
-        return false;
+    return omfDetails_AddSegDef(pOmfStuff, bSegAttr, cbSeg, idxSegName, idxSegClass, idxOverlay, fRec32, pidxSeg);
+}
+
+
+#if 0 /* unused */
+/**
+ * Looks up a GRPDEF in the .
+ *
+ * @returns Index (0..32K) if found, UINT16_MAX if not found.
+ * @param   pOmfStuff   The OMF stuff.
+ * @param   pchName     The name to look up.
+ * @param   cchName     The length of the name.
+ */
+static uint16_t omfDetails_GrpDefLookupN(POMFDETAILS pOmfStuff, const char *pchName, size_t cchName)
+{
+    unsigned iGrpDef = pOmfStuff->cGrpDefs;
+    while (iGrpDef-- > 0)
+    {
+        if (   pOmfStuff->paGrpDefs[iGrpDef].cchName == cchName
+            && memcmp(pOmfStuff->paGrpDefs[iGrpDef].pchName, pchName, cchName) == 0)
+            return iGrpDef;
+    }
+    return UINT16_MAX;
+}
 #endif
 
-    /*
-     * Emit the segment definitions.
-     */
-    pOmfStuff->iSymbolsSeg = pOmfStuff->cSegDefs++;
 
-    uint8_t   bSegAttr = 0;
-    bSegAttr |= 5 << 5; /* A: dword alignment */
-    bSegAttr |= 0 << 2; /* C: private */
-    bSegAttr |= 0 << 1; /* B: not big */
-    bSegAttr |= 1;      /* D: use32 */
+/**
+ * Adds an empty GRPDEF (always adds a new one).
+ *
+ * @returns success indicator.
+ * @param   pOmfStuff   The OMF stuff.
+ * @param   idxGrpName  The LNAMES index of the group name.
+ * @param   pidxGrp     Where to return the group index.
+ */
+static bool omfDetails_AddGrpDef(POMFDETAILS pOmfStuff, uint16_t idxGrpName, uint16_t *pidxGrp)
+{
+    Assert(idxGrpName < pOmfStuff->cLNames);
 
-    /* calc the segment size. */
-    uint32_t  cbSeg = 4; /* dword 4 */
-    cbSeg += 4 + 4 + RT_ALIGN_32(pOmfStuff->cbStrTab, 4);
-    cbSeg += 4 + 4 + pOmfStuff->cSrcInfo * sizeof(pOmfStuff->paSrcInfo[0]);
-    uint32_t i = pOmfStuff->cSegLines;
-    while (i-- > 0)
-        if (pOmfStuff->paSegLines[i].cFiles > 0)
-            cbSeg += 4 + 4 + pOmfStuff->paSegLines[i].cb;
-    return omfWriter_SegDef(pThis, bSegAttr, cbSeg, pOmfStuff->iSymbolsNm, pOmfStuff->iDebSymNm);
+    OMF_GROW_TABLE_RET_ERR(OMFGRPDEF, pOmfStuff->paGrpDefs, pOmfStuff->cGrpDefs, 8);
+    POMFGRPDEF pGrpDef = &pOmfStuff->paGrpDefs[pOmfStuff->cGrpDefs];
+
+    pGrpDef->idxName      = idxGrpName;
+    pGrpDef->cSegDefs     = 0;
+    pGrpDef->paidxSegDefs = NULL;
+
+    *pidxGrp = pOmfStuff->cGrpDefs;
+    pOmfStuff->cGrpDefs++;
+    return true;
 }
 
 
 /**
- * Writes additional segment group definitions.
+ * Adds a segment to an existing GRPDEF.
  *
  * @returns success indicator.
- * @param   pThis       The OMF writer.
- * @param   pOmfStuff   The OMF stuff with CV8 line number info.
+ * @param   pOmfStuff   The OMF stuff.
+ * @param   idxGrp      The GRPDEF index of the group to append a member to.
+ * @param   idxSeg      The SEGDEF index of the segment name.
  */
-static bool convertOmfWriteDebugGrpDefs(POMFWRITER pThis, POMFDETAILS pOmfStuff)
+static bool omfDetails_AddSegToGrpDef(POMFDETAILS pOmfStuff, uint16_t idxGrp, uint16_t idxSeg)
 {
-    if (pOmfStuff->cSegLines == 0)
-        return true;
+    Assert(idxGrp < pOmfStuff->cGrpDefs && idxGrp > 0);
+    Assert(idxSeg < pOmfStuff->cSegDefs && idxSeg > 0);
+
+    POMFGRPDEF pGrpDef = &pOmfStuff->paGrpDefs[idxGrp];
+    OMF_GROW_TABLE_RET_ERR(uint16_t, pGrpDef->paidxSegDefs, pGrpDef->cSegDefs, 16);
+    pGrpDef->paidxSegDefs[pGrpDef->cSegDefs] = idxSeg;
+    pGrpDef->cSegDefs++;
 
+    return true;
+}
+
+
+/**
+ * Marks 16-bit code segment groups that is used in the object file as needed.
+ *
+ * @param   pOmfStuff   The OMF stuff.
+ */
+static void convertOmfLookForNeededGroups(POMFDETAILS pOmfStuff)
+{
     /*
-     * See what (if anything) we need.
+     * Consult the groups in question.  We mark the groups which segments are
+     * included in the segment definitions as needed.
      */
-    uint8_t cNames  = 0;
-    uint8_t cGroups = 0;
-    unsigned j = RT_ELEMENTS(pOmfStuff->aGroups);
-    while (j-- > 0)
-        if (pOmfStuff->aGroups[j].fNeeded)
+    unsigned i = RT_ELEMENTS(pOmfStuff->aGroups);
+    while (i-- > 0)
+        if (pOmfStuff->aGroups[i].pszSeg)
         {
-            cNames  += pOmfStuff->aGroups[j].idxName  == UINT16_MAX;
-            cGroups += pOmfStuff->aGroups[j].idxGroup == UINT16_MAX;
+            const char * const  pszSegNm = pOmfStuff->aGroups[i].pszSeg;
+            size_t const        cchSegNm = strlen(pszSegNm);
+            for (unsigned iSegDef = 0; iSegDef < pOmfStuff->cSegDefs; iSegDef++)
+                if (   pOmfStuff->paSegDefs[iSegDef].cchName == cchSegNm
+                    && memcmp(pOmfStuff->paSegDefs[iSegDef].pchName, pszSegNm, cchSegNm) == 0)
+                {
+                    pOmfStuff->aGroups[i].fNeeded = true;
+                    break;
+                }
         }
+}
 
-    /*
-     * Add any names we need.
-     */
-    if (cNames)
-    {
-        if (!omfWriter_LNamesBegin(pThis, true))
-            return false;
-        j = RT_ELEMENTS(pOmfStuff->aGroups);
-        while (j-- > 0)
-            if (   pOmfStuff->aGroups[j].fNeeded
-                && pOmfStuff->aGroups[j].idxName == UINT16_MAX)
-                if (!omfWriter_LNamesAdd(pThis, pOmfStuff->aGroups[j].pszName, &pOmfStuff->aGroups[j].idxName))
-                    return false;
-        if (!omfWriter_LNamesEnd(pThis))
-            return false;
-    }
 
+/**
+ * Adds necessary group and segment definitions.
+ *
+ * @returns success indicator.
+ * @param   pOmfStuff   The OMF stuff.
+ */
+static bool convertOmfAddNeededGrpDefs(POMFDETAILS pOmfStuff)
+{
     /*
-     * Add any groups we need.
+     * Process the groups.
      */
-    if (cNames)
-    {
-        uint16_t iGrp = pOmfStuff->cGrpDefs; /* Shouldn't update cGrpDefs as it governs paGrpDefs. */
-        j = RT_ELEMENTS(pOmfStuff->aGroups);
-        while (j-- > 0)
+    unsigned j = RT_ELEMENTS(pOmfStuff->aGroups);
+    while (j-- > 0)
+        if (pOmfStuff->aGroups[j].fNeeded)
         {
-            if (   pOmfStuff->aGroups[j].fNeeded
-                && pOmfStuff->aGroups[j].idxGroup == UINT16_MAX)
+            if (pOmfStuff->aGroups[j].idxName == UINT16_MAX)
             {
-                if (   !omfWriter_GrpDefBegin(pThis, pOmfStuff->aGroups[j].idxName)
-                    || !omfWriter_GrpDefEnd(pThis))
+                Assert(pOmfStuff->aGroups[j].idxGroup == UINT16_MAX);
+                if (!omfDetails_AddLName(pOmfStuff, pOmfStuff->aGroups[j].pszName, &pOmfStuff->aGroups[j].idxName))
                     return false;
-                pOmfStuff->aGroups[j].idxGroup = iGrp++;
+            }
+            if (pOmfStuff->aGroups[j].idxGroup == UINT16_MAX)
+            {
+                if (!omfDetails_AddGrpDef(pOmfStuff, pOmfStuff->aGroups[j].idxName, &pOmfStuff->aGroups[j].idxGroup))
+                    return false;
+
+                if (pOmfStuff->aGroups[j].pszSeg)
+                {
+                    /* We need the segment class name. */
+                    uint16_t idxSegClass;
+                    if (!omfDetails_AddLName(pOmfStuff, pOmfStuff->aGroups[j].pszClass1, &idxSegClass))
+                        return false;
+
+                    /* Prep segment name buffer. */
+                    size_t   cchSegNm = strlen(pOmfStuff->aGroups[j].pszSeg);
+                    char     szSegNm[256+16];
+                    Assert(cchSegNm < 256);
+                    memcpy(szSegNm, pOmfStuff->aGroups[j].pszSeg, cchSegNm);
+
+                    /* Add the three segments. */
+                    static RTSTRTUPLE const s_aSuffixes[3] = { {RT_STR_TUPLE("_START")}, {RT_STR_TUPLE("")}, {RT_STR_TUPLE("_END")}, };
+                    for (unsigned iSuffix = 0; iSuffix < RT_ELEMENTS(s_aSuffixes); iSuffix++)
+                    {
+                        uint16_t idxSegNm;
+                        memcpy(&szSegNm[cchSegNm], s_aSuffixes[iSuffix].psz, s_aSuffixes[iSuffix].cch + 1);
+                        if (!omfDetails_AddLName(pOmfStuff, szSegNm, &idxSegNm))
+                            return false;
+                        uint8_t  const  fAlign = iSuffix == 1 ? OMF_SEG_ATTR_ALIGN_BYTE : OMF_SEG_ATTR_ALIGN_PARA;
+                        uint16_t        idxSeg;
+                        if (!omfDetails_AddSegDefIfNeeded(pOmfStuff, fAlign | OMF_SEG_ATTR_COMB_PUBLIC | OMF_SEG_ATTR_USE16,
+                                                          0, idxSegNm, idxSegClass, 1, false /*fRec*/, &idxSeg))
+                            return false;
+                        if (!omfDetails_AddSegToGrpDef(pOmfStuff, pOmfStuff->aGroups[j].idxGroup, idxSeg))
+                            return false;
+                    }
+                }
             }
         }
-    }
 
     /*
      * Replace group references in the segment lines table.
@@ -4575,6 +4798,50 @@ static bool convertOmfWriteDebugGrpDefs(POMFWRITER pThis, POMFDETAILS pOmfStuff)
 
 
 /**
+ * Adds the debug segment definitions (names too) to the OMF state.
+ *
+ * @returns success indicator.
+ * @param   pOmfStuff   The OMF stuff with CV8 line number info.
+ */
+static bool convertOmfAddDebugSegDefs(POMFDETAILS pOmfStuff)
+{
+    if (   pOmfStuff->cSegLines == 0
+        || pOmfStuff->iSymbolsSeg != UINT16_MAX)
+        return true;
+
+    /*
+     * Add the names we need.
+     */
+    if (   pOmfStuff->iSymbolsNm == UINT16_MAX
+        && !omfDetails_AddLName(pOmfStuff, "$$SYMBOLS", &pOmfStuff->iSymbolsNm))
+        return false;
+    if (   pOmfStuff->iDebSymNm == UINT16_MAX
+        && !omfDetails_AddLName(pOmfStuff, "DEBSYM", &pOmfStuff->iDebSymNm))
+        return false;
+
+    /*
+     * Add the segment definition.
+     */
+    uint8_t   bSegAttr = 0;
+    bSegAttr |= 5 << 5; /* A: dword alignment */
+    bSegAttr |= 0 << 2; /* C: private */
+    bSegAttr |= 0 << 1; /* B: not big */
+    bSegAttr |= 1;      /* D: use32 */
+
+    /* calc the segment size. */
+    uint32_t  cbSeg = 4; /* dword 4 */
+    cbSeg += 4 + 4 + RT_ALIGN_32(pOmfStuff->cbStrTab, 4);
+    cbSeg += 4 + 4 + pOmfStuff->cSrcInfo * sizeof(pOmfStuff->paSrcInfo[0]);
+    uint32_t i = pOmfStuff->cSegLines;
+    while (i-- > 0)
+        if (pOmfStuff->paSegLines[i].cFiles > 0)
+            cbSeg += 4 + 4 + pOmfStuff->paSegLines[i].cb;
+    return omfDetails_AddSegDef(pOmfStuff, bSegAttr, cbSeg, pOmfStuff->iSymbolsNm, pOmfStuff->iDebSymNm, 1 /*idxOverlay*/,
+                                true /*fRec32*/, &pOmfStuff->iSymbolsSeg);
+}
+
+
+/**
  * Writes the debug segment data.
  *
  * @returns success indicator.
@@ -4585,6 +4852,7 @@ static bool convertOmfWriteDebugData(POMFWRITER pThis, POMFDETAILS pOmfStuff)
 {
     if (pOmfStuff->cSegLines == 0)
         return true;
+    Assert(pOmfStuff->iSymbolsSeg != UINT16_MAX);
 
     /* Begin and write the CV version signature. */
     if (   !omfWriter_LEDataBegin(pThis, pOmfStuff->iSymbolsSeg, 0)
@@ -4680,6 +4948,62 @@ static bool convertOmfWriteDebugData(POMFWRITER pThis, POMFDETAILS pOmfStuff)
 
 
 /**
+ * Writes out all the segment group definitions.
+ *
+ * @returns success indicator.
+ * @param   pThis           The OMF writer.
+ * @param   pOmfStuff       The OMF stuff containing the segment defs.
+ * @param   pfFlushState    Pointer to the flush state variable.
+ */
+static bool convertOmfWriteAllSegDefs(POMFWRITER pThis, POMFDETAILS pOmfStuff, int *pfFlushState)
+{
+    if (*pfFlushState > 0)
+    {
+        for (unsigned iSegDef = 1; iSegDef < pOmfStuff->cSegDefs; iSegDef++)
+        {
+            if (!(pOmfStuff->paSegDefs[iSegDef].f32bitRec
+                  ? omfWriter_SegDef : omfWriter_SegDef16)(pThis, pOmfStuff->paSegDefs[iSegDef].bSegAttr,
+                                                           pOmfStuff->paSegDefs[iSegDef].cbSeg,
+                                                           pOmfStuff->paSegDefs[iSegDef].idxName,
+                                                           pOmfStuff->paSegDefs[iSegDef].idxClass,
+                                                           pOmfStuff->paSegDefs[iSegDef].idxOverlay))
+                    return false;
+        }
+        *pfFlushState = -1;
+    }
+    return true;
+}
+
+
+/**
+ * Writes out all the segment group definitions.
+ *
+ * @returns success indicator.
+ * @param   pThis       The OMF writer.
+ * @param   pOmfStuff   The OMF stuff containing the group defs.
+ * @param   pfFlushState    Pointer to the flush state variable.
+ */
+static bool convertOmfWriteAllGrpDefs(POMFWRITER pThis, POMFDETAILS pOmfStuff, int *pfFlushState)
+{
+    if (*pfFlushState > 0)
+    {
+        for (unsigned iGrpDef = 1; iGrpDef < pOmfStuff->cGrpDefs; iGrpDef++)
+        {
+            if (!omfWriter_GrpDefBegin(pThis, pOmfStuff->paGrpDefs[iGrpDef].idxName))
+                return false;
+            for (unsigned iSegDef = 0; iSegDef < pOmfStuff->paGrpDefs[iGrpDef].cSegDefs; iSegDef++)
+                if (!omfWriter_GrpDefAddSegDef(pThis, pOmfStuff->paGrpDefs[iGrpDef].paidxSegDefs[iSegDef]))
+                    return false;
+            if (!omfWriter_GrpDefEnd(pThis))
+                return false;
+        }
+        *pfFlushState = -1;
+    }
+    return true;
+}
+
+
+/**
  * This does the actual converting, passthru style.
  *
  * It only modifies, removes and inserts stuff it care about, the rest is passed
@@ -4691,11 +5015,17 @@ static bool convertOmfWriteDebugData(POMFWRITER pThis, POMFDETAILS pOmfStuff)
  * @param   cbFile      The size of the original file.
  * @param   pOmfStuff   The OMF stuff we've gathered during the first pass,
  *                      contains CV8 line number info if we converted anything.
+ * @param   fConvertLineNumbers     Whether we're converting line numbers and stuff.
  */
-static bool convertOmfPassthru(POMFWRITER pThis, uint8_t const *pbFile, size_t cbFile, POMFDETAILS pOmfStuff)
+static bool convertOmfPassthru(POMFWRITER pThis, uint8_t const *pbFile, size_t cbFile, POMFDETAILS pOmfStuff,
+                               bool fConvertLineNumbers)
 {
-    bool const  fConvertLineNumbers = true;
-    bool        fSeenTheAdr         = false;
+    int         fFlushLNames   = 1;
+    int         fFlushSegDefs  = 1;
+    int         fFlushGrpDefs  = 1;
+    bool        fSeenTheAdr    = false;
+    bool        fConvertFixupp = false;
+
     uint32_t    off = 0;
     while (off + 3 < cbFile)
     {
@@ -4711,6 +5041,13 @@ static bool convertOmfPassthru(POMFWRITER pThis, uint8_t const *pbFile, size_t c
                     a_idx = (((a_idx) & 0x7f) << 8) | pbRec[offRec++]; \
             } while (0)
 
+#define OMF_PEEK_IDX(a_idx, a_offRec) \
+            do { \
+                a_idx = pbRec[a_offRec]; \
+                if ((a_idx) & 0x80) \
+                    a_idx = (((a_idx) & 0x7f) << 8) | pbRec[(a_offRec) + 1]; \
+            } while (0)
+
         /*
          * Remove/insert switch.  will
          */
@@ -4790,38 +5127,104 @@ static bool convertOmfPassthru(POMFWRITER pThis, uint8_t const *pbFile, size_t c
                 break;
 
             /*
-             * Remove borland source file changes. Also, emit our SEGDEF
-             * before the pass marker.
+             * Remove borland source file changes. Also, make sure the group
+             * definitions are written out.
              */
             case OMF_COMENT:
-                if (fConvertLineNumbers)
+                if (pbRec[1] == OMF_CCLS_LINK_PASS_SEP)
                 {
-                    fSkip = pbRec[1] == OMF_CCLS_BORLAND_SRC_FILE;
-                    if (pbRec[1] == OMF_CCLS_LINK_PASS_SEP)
-                        if (   !convertOmfWriteDebugSegDefs(pThis, pOmfStuff)
-                            || !convertOmfWriteDebugGrpDefs(pThis, pOmfStuff))
-                            return false;
+                    Assert(fFlushSegDefs <= 0);
+                    if (   fFlushGrpDefs > 0
+                        && !convertOmfWriteAllGrpDefs(pThis, pOmfStuff, &fFlushGrpDefs))
+                        return false;
                 }
+                if (fConvertLineNumbers)
+                    fSkip = pbRec[1] == OMF_CCLS_BORLAND_SRC_FILE;
                 break;
 
             /*
-             * Redo these to the OMF writer is on top of the index thing.
+             * Redo these so the OMF writer is on top of the index thing.
              */
             case OMF_LNAMES:
-                if (!omfWriter_LNamesBegin(pThis, false /*fAddZeroEntry*/))
-                    return false;
-                while (offRec + 1 < cbRec)
+                if (fFlushLNames >= 0)
                 {
-                    uint8_t     cch = pbRec[offRec];
-                    const char *pch = (const char *)&pbRec[offRec + 1];
-                    if (!omfWriter_LNamesAddN(pThis, pch, cch, NULL))
+                    if (!omfWriter_LNamesBegin(pThis, false /*fAddZeroEntry*/))
+                        return false;
+                    if (!fFlushLNames)
+                    {
+                        while (offRec + 1 < cbRec)
+                        {
+                            uint8_t     cch = pbRec[offRec];
+                            const char *pch = (const char *)&pbRec[offRec + 1];
+                            if (!omfWriter_LNamesAddN(pThis, pch, cch, NULL))
+                                return false;
+                            offRec += cch + 1;
+                        }
+                    }
+                    else
+                    {
+                        /* Flush all LNAMES in one go. */
+                        for (unsigned i = 1; i < pOmfStuff->cLNames; i++)
+                            if (!omfWriter_LNamesAddN(pThis, pOmfStuff->papchLNames[i] + 1, *pOmfStuff->papchLNames[i], NULL))
+                                return false;
+                        fFlushLNames = -1;
+                    }
+                    if (!omfWriter_LNamesEnd(pThis))
                         return false;
-                    offRec += cch + 1;
                 }
-                if (!omfWriter_LNamesEnd(pThis))
+                fSkip = true;
+                break;
+
+            /*
+             * We may want to flush all the segments when we see the first one.
+             */
+            case OMF_SEGDEF16:
+            case OMF_SEGDEF32:
+                fSkip = fFlushSegDefs != 0;
+                if (!convertOmfWriteAllSegDefs(pThis, pOmfStuff, &fFlushSegDefs))
                     return false;
+                break;
 
-                fSkip = true;
+            /*
+             * We may want to flush all the groups when we see the first one.
+             */
+            case OMF_GRPDEF:
+                fSkip = fFlushGrpDefs != 0;
+                if (!convertOmfWriteAllGrpDefs(pThis, pOmfStuff, &fFlushGrpDefs))
+                    return false;
+                break;
+
+            /*
+             * Hook LEDATA to flush groups and figure out when to convert FIXUPP records.
+             */
+            case OMF_LEDATA16:
+            case OMF_LEDATA32:
+                if (   fFlushGrpDefs > 0
+                    && !convertOmfWriteAllGrpDefs(pThis, pOmfStuff, &fFlushGrpDefs))
+                    return false;
+                fConvertFixupp = false;
+#if 0
+                if (   g_f16BitWatcomC
+                    && bRecType == OMF_LEDATA16)
+                {
+                    /* Check if this is a code segment. */
+                    uint16_t idxSeg;
+                    OMF_PEEK_IDX(idxSeg, offRec);
+
+                }
+#endif
+                break;
+
+
+            /*
+             * Convert fixups for 16-bit code segments to groups.
+             * Deals with switch table trouble.
+             */
+            case OMF_FIXUPP16:
+                if (fConvertFixupp)
+                {
+                    /* Gave up on this for now, easier to drop the eyecatcher in the _START segments. */
+                }
                 break;
 
             /*
@@ -4830,13 +5233,8 @@ static bool convertOmfPassthru(POMFWRITER pThis, uint8_t const *pbFile, size_t c
             case OMF_MODEND16:
             case OMF_MODEND32:
                 if (fConvertLineNumbers)
-                {
-                    if (   convertOmfWriteDebugSegDefs(pThis, pOmfStuff)
-                        && convertOmfWriteDebugGrpDefs(pThis, pOmfStuff)
-                        && convertOmfWriteDebugData(pThis, pOmfStuff))
-                    { /* likely */ }
-                    else return false;
-                }
+                    if (!convertOmfWriteDebugData(pThis, pOmfStuff))
+                        return false;
                 break;
         }
 
@@ -4881,26 +5279,41 @@ static bool convertOmfPassthru(POMFWRITER pThis, uint8_t const *pbFile, size_t c
  */
 static bool convertOmfToOmf(const char *pszFile, uint8_t const *pbFile, size_t cbFile, FILE *pDst)
 {
+    bool const fConvertLineNumbers = true;
+
     /*
-     * Collect line number information.
+     * Collect line number information, names, segment defintions, groups definitions and such.
      */
     OMFDETAILS OmfStuff;
     if (!collectOmfDetails(pszFile, pbFile, cbFile, &OmfStuff))
         return false;
 
-    /*
-     * Instantiate the OMF writer and do pass-thru modifications.
-     */
-    bool fRc;
-    POMFWRITER pThis = omfWriter_Create(pszFile, 0, 0, pDst);
-    if (pThis)
+    /* Mark groups for 16-bit code segments used by this object file as needed
+       so we can reframe fixups to these segments correctly. */
+    convertOmfLookForNeededGroups(&OmfStuff);
+
+    /* Add debug segments definitions. */
+    bool fRc = true;
+    if (fConvertLineNumbers)
+        fRc = convertOmfAddDebugSegDefs(&OmfStuff);
+
+    /* Add any additional group defintions we may need (for 16-bit code segs). */
+    if (fRc)
+        fRc = convertOmfAddNeededGrpDefs(&OmfStuff);
+    if (fRc)
     {
-        fRc = convertOmfPassthru(pThis, pbFile, cbFile, &OmfStuff);
-        omfWriter_Destroy(pThis);
+        /*
+         * Instantiate the OMF writer and do pass-thru modifications.
+         */
+        POMFWRITER pThis = omfWriter_Create(pszFile, 0, 0, pDst);
+        if (pThis)
+        {
+            fRc = convertOmfPassthru(pThis, pbFile, cbFile, &OmfStuff, fConvertLineNumbers);
+            omfWriter_Destroy(pThis);
+        }
+        else
+            fRc = false;
     }
-    else
-        fRc = false;
-
 
     /*
      * Cleanup OmfStuff.
@@ -4916,6 +5329,14 @@ static bool convertOmfToOmf(const char *pszFile, uint8_t const *pbFile, size_t c
     free(OmfStuff.paSegLines);
     free(OmfStuff.paSrcInfo);
     free(OmfStuff.pchStrTab);
+
+    while (OmfStuff.pAllocHead)
+    {
+        POMFDETAILSALLOC pFreeMe = OmfStuff.pAllocHead;
+        OmfStuff.pAllocHead = OmfStuff.pAllocHead->pNext;
+        free(pFreeMe);
+    }
+
     return fRc;
 }
 
@@ -5022,7 +5443,9 @@ int main(int argc, char **argv)
             {
                 /* Convert long options to short ones. */
                 pszOpt--;
-                if (!strcmp(pszOpt, "--verbose"))
+                if (!strcmp(pszOpt, "--wcc"))
+                    pszOpt = "w";
+                else if (!strcmp(pszOpt, "--verbose"))
                     pszOpt = "v";
                 else if (!strcmp(pszOpt, "--version"))
                     pszOpt = "V";
@@ -5040,12 +5463,16 @@ int main(int argc, char **argv)
             {
                 switch (*pszOpt++)
                 {
+                    case 'w':
+                        g_f16BitWatcomC = true;
+                        break;
+
                     case 'v':
                         g_cVerbose++;
                         break;
 
                     case 'V':
-                        printf("%s\n", "$Revision: 110255 $");
+                        printf("%s\n", "$Revision: 115013 $");
                         return 0;
 
                     case '?':
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-c16-TrapRmV86Generic.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-c16-TrapRmV86Generic.asm
index f269340..94d81e1 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-c16-TrapRmV86Generic.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-c16-TrapRmV86Generic.asm
@@ -62,11 +62,7 @@ BS3_PROC_BEGIN Bs3TrapRmV86GenericEntries
 %endmacro
 
 %macro Bs3TrapRmV86GenericEntryErrCd 1
-        push    ax                      ; 1 byte:  Save AX                              (BP(+2) + 2)
-        mov     ax, i | 0ff00h          ; 2 bytes: AL = trap/interrupt number; AH=indicate have error code.
-        jmp     %1                      ; 3 bytes: Jump to handler code
-        ALIGNCODE(8)
-%assign i i+1
+        Bs3TrapRmV86GenericEntryNoErr %1    ; No error code pushed in real mode or V86 mode.
 %endmacro
 
 %assign i 0                             ; start counter.
@@ -112,6 +108,15 @@ AssertCompile(Bs3TrapRmV86GenericEntries_EndProc - Bs3TrapRmV86GenericEntries ==
 ;;
 ; Trap or interrupt with error code, faked if necessary.
 ;
+; early 386+ stack (movzx ebp, sp):
+;       [bp + 000h]     ebp
+;       [bp + 004h]     ax
+;       [bp + 006h]     errcd                   [bp'+0] <--- bp at jmp to common code.
+;       [bp + 008h]     cs                      [bp'+2]
+;       [bp + 00ah]     ip                      [bp'+4]
+;       [bp + 00ch]     flags                   [bp'+6]
+;      ([bp + 00eh]     post-iret sp value)     [bp'+8]
+;
 BS3_PROC_BEGIN _bs3TrapRmV86GenericTrapOrInt
 BS3_PROC_BEGIN bs3TrapRmV86GenericTrapOrInt
 CPU 386
@@ -159,7 +164,6 @@ CPU 386
 
         test    ah, 0ffh
         jz      .no_error_code
-;; @todo Do voodoo checks for 'int xx' or misguided hardware interrupts.
         mov     dx, [bp + 6]
         mov     [ss:bx + BS3TRAPFRAME.uErrCd], dx
 .no_error_code:
@@ -210,7 +214,6 @@ CPU 8086
 
         test    ah, 0ffh
         jz      .no_error_code
-;; @todo Do voodoo checks for 'int xx' or misguided hardware interrupts.
         mov     dx, [bp + 4]
         mov     [ss:bx + BS3TRAPFRAME.uErrCd], dx
 .no_error_code:
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxAlloc.c
similarity index 55%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxAlloc.c
index c5a8c77..3c697b4 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxAlloc.c
@@ -1,10 +1,10 @@
-/* $Id: bs3-cmn-SelFar32ToFlat32.c $ */
+/* $Id: bs3-cmn-ExtCtxAlloc.c $ */
 /** @file
- * BS3Kit - Bs3SelFar32ToFlat32
+ * BS3Kit - Bs3ExtCtxAlloc
  */
 
 /*
- * Copyright (C) 2007-2016 Oracle Corporation
+ * Copyright (C) 2007-2017 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
  * available from http://www.virtualbox.org. This file is free software;
@@ -24,14 +24,21 @@
  * terms and conditions of either the GPL or the CDDL or both.
  */
 
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
 #include "bs3kit-template-header.h"
 
 
-#undef Bs3SelFar32ToFlat32
-BS3_CMN_DEF(uint32_t, Bs3SelFar32ToFlat32,(uint32_t off, uint16_t uSel))
+#undef Bs3ExtCtxAlloc
+BS3_CMN_DEF(PBS3EXTCTX, Bs3ExtCtxAlloc,(BS3MEMKIND enmKind))
 {
-    if (g_bBs3CurrentMode == BS3_MODE_RM)
-        return ((uint32_t)uSel << 4) + off;
-    return Bs3SelProtFar32ToFlat32(off, uSel);
+    uint64_t   fFlags;
+    uint16_t   cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
+    PBS3EXTCTX pExtCtx = (PBS3EXTCTX)Bs3MemAlloc(enmKind, cbExtCtx);
+    if (pExtCtx)
+        return Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
+    return NULL;
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxCopy.c
similarity index 53%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxCopy.c
index c5a8c77..7bdfd78 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxCopy.c
@@ -1,10 +1,10 @@
-/* $Id: bs3-cmn-SelFar32ToFlat32.c $ */
+/* $Id: bs3-cmn-ExtCtxCopy.c $ */
 /** @file
- * BS3Kit - Bs3SelFar32ToFlat32
+ * BS3Kit - Bs3ExtCtxCopy
  */
 
 /*
- * Copyright (C) 2007-2016 Oracle Corporation
+ * Copyright (C) 2007-2017 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
  * available from http://www.virtualbox.org. This file is free software;
@@ -24,14 +24,20 @@
  * terms and conditions of either the GPL or the CDDL or both.
  */
 
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
 #include "bs3kit-template-header.h"
+#include <iprt/asm-amd64-x86.h>
 
 
-#undef Bs3SelFar32ToFlat32
-BS3_CMN_DEF(uint32_t, Bs3SelFar32ToFlat32,(uint32_t off, uint16_t uSel))
+#undef Bs3ExtCtxCopy
+BS3_CMN_DEF(PBS3EXTCTX, Bs3ExtCtxCopy,(PBS3EXTCTX pDst, PCBS3EXTCTX pSrc))
 {
-    if (g_bBs3CurrentMode == BS3_MODE_RM)
-        return ((uint32_t)uSel << 4) + off;
-    return Bs3SelProtFar32ToFlat32(off, uSel);
+    BS3_ASSERT(pDst->cb == pSrc->cb && pDst->enmMethod == pSrc->enmMethod && pDst->fXcr0Nominal == pSrc->fXcr0Nominal);
+    Bs3MemCpy(&pDst->Ctx, &pSrc->Ctx, pDst->cb - RT_OFFSETOF(BS3EXTCTX, Ctx));
+    pDst->fXcr0Saved = pSrc->fXcr0Saved;
+    return pDst;
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxFree.c
similarity index 56%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxFree.c
index c5a8c77..41b7cf1 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxFree.c
@@ -1,10 +1,10 @@
-/* $Id: bs3-cmn-SelFar32ToFlat32.c $ */
+/* $Id: bs3-cmn-ExtCtxFree.c $ */
 /** @file
- * BS3Kit - Bs3SelFar32ToFlat32
+ * BS3Kit - Bs3ExtCtxFree
  */
 
 /*
- * Copyright (C) 2007-2016 Oracle Corporation
+ * Copyright (C) 2007-2017 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
  * available from http://www.virtualbox.org. This file is free software;
@@ -24,14 +24,23 @@
  * terms and conditions of either the GPL or the CDDL or both.
  */
 
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
 #include "bs3kit-template-header.h"
 
 
-#undef Bs3SelFar32ToFlat32
-BS3_CMN_DEF(uint32_t, Bs3SelFar32ToFlat32,(uint32_t off, uint16_t uSel))
+#undef Bs3ExtCtxFree
+BS3_CMN_DEF(void, Bs3ExtCtxFree,(PBS3EXTCTX pExtCtx))
 {
-    if (g_bBs3CurrentMode == BS3_MODE_RM)
-        return ((uint32_t)uSel << 4) + off;
-    return Bs3SelProtFar32ToFlat32(off, uSel);
+    if (pExtCtx)
+    {
+        if (pExtCtx->u16Magic == BS3EXTCTX_MAGIC)
+        {
+            pExtCtx->u16Magic = ~BS3EXTCTX_MAGIC;
+            Bs3MemFree(pExtCtx, pExtCtx->cb);
+        }
+    }
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxGetSize.c
similarity index 53%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxGetSize.c
index 8eede3c..b682ea0 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxGetSize.c
@@ -1,10 +1,10 @@
-/* $Id: bs3-cmn-RegCtxPrint.c $ */
+/* $Id: bs3-cmn-ExtCtxGetSize.c $ */
 /** @file
- * BS3Kit - Bs3RegCtxPrint
+ * BS3Kit - Bs3ExtCtxGetSize
  */
 
 /*
- * Copyright (C) 2007-2016 Oracle Corporation
+ * Copyright (C) 2007-2017 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
  * available from http://www.virtualbox.org. This file is free software;
@@ -29,27 +29,31 @@
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
 #include "bs3kit-template-header.h"
+#include <iprt/asm-amd64-x86.h>
 
 
-#undef Bs3RegCtxPrint
-BS3_CMN_DEF(void, Bs3RegCtxPrint,(PCBS3REGCTX pRegCtx))
+#undef Bs3ExtCtxGetSize
+BS3_CMN_DEF(uint16_t, Bs3ExtCtxGetSize,(uint64_t BS3_FAR *pfFlags))
 {
-    //if (BS3_MODE_IS_64BIT_CODE(pRegCtx->bMode))
-    //{
-        Bs3TestPrintf("eax=%08RX32 ebx=%08RX32 ecx=%08RX32 edx=%08RX32 esi=%08RX32 edi=%08RX32\n",
-                      pRegCtx->rax.u32, pRegCtx->rbx.u32, pRegCtx->rcx.u32, pRegCtx->rdx.u32, pRegCtx->rsi.u32, pRegCtx->rdi.u32);
-        Bs3TestPrintf("eip=%08RX32 esp=%08RX32 ebp=%08RX32 efl=%08RX32 cr0=%08RX32 cr2=%08RX32\n",
-                      pRegCtx->rip.u32, pRegCtx->rsp.u32, pRegCtx->rbp.u32, pRegCtx->rflags.u32,
-                      pRegCtx->cr0.u32, pRegCtx->cr2.u32);
-        Bs3TestPrintf("cs=%04RX16   ds=%04RX16 es=%04RX16 fs=%04RX16 gs=%04RX16   ss=%04RX16 cr3=%08RX32 cr4=%08RX32\n",
-                      pRegCtx->cs, pRegCtx->ds, pRegCtx->es, pRegCtx->fs, pRegCtx->gs, pRegCtx->ss,
-                      pRegCtx->cr3.u32, pRegCtx->cr4.u32);
-        Bs3TestPrintf("tr=%04RX16 ldtr=%04RX16 cpl=%d   mode=%#x fbFlags=%#x\n",
-                      pRegCtx->tr, pRegCtx->ldtr, pRegCtx->bCpl, pRegCtx->bMode, pRegCtx->fbFlags);
-    //}
-    //else
-    //{
-    //
-    //}
+    uint32_t fEcx, fEdx;
+    *pfFlags = 0;
+
+    ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
+#if 1 /* To disable xsave/xrstor till IEM groks it... */
+    if (fEcx & X86_CPUID_FEATURE_ECX_XSAVE)
+    {
+        uint32_t fEax;
+        ASMCpuIdExSlow(13, 0, 0, 0, &fEax, NULL, &fEcx, &fEdx);
+        if (   fEcx >= sizeof(X86FXSTATE) + sizeof(X86XSAVEHDR)
+            && fEcx < _32K)
+        {
+            *pfFlags = fEax | ((uint64_t)fEdx << 32);
+            return RT_OFFSETOF(BS3EXTCTX, Ctx) + RT_ALIGN(fEcx, 256);
+        }
+    }
+#endif
+    if (fEdx & X86_CPUID_FEATURE_EDX_FXSR)
+        return RT_OFFSETOF(BS3EXTCTX, Ctx) + sizeof(X86FXSTATE);
+    return RT_OFFSETOF(BS3EXTCTX, Ctx) + sizeof(X86FPUSTATE);
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxInit.c
similarity index 53%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxInit.c
index 8eede3c..95b7d9e 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxInit.c
@@ -1,10 +1,10 @@
-/* $Id: bs3-cmn-RegCtxPrint.c $ */
+/* $Id: bs3-cmn-ExtCtxInit.c $ */
 /** @file
- * BS3Kit - Bs3RegCtxPrint
+ * BS3Kit - Bs3ExtCtxInit
  */
 
 /*
- * Copyright (C) 2007-2016 Oracle Corporation
+ * Copyright (C) 2007-2017 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
  * available from http://www.virtualbox.org. This file is free software;
@@ -29,27 +29,34 @@
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
 #include "bs3kit-template-header.h"
+#include <iprt/asm-amd64-x86.h>
 
 
-#undef Bs3RegCtxPrint
-BS3_CMN_DEF(void, Bs3RegCtxPrint,(PCBS3REGCTX pRegCtx))
+#undef Bs3ExtCtxInit
+BS3_CMN_DEF(PBS3EXTCTX, Bs3ExtCtxInit,(PBS3EXTCTX pExtCtx, uint16_t cbExtCtx, uint64_t fFlags))
 {
-    //if (BS3_MODE_IS_64BIT_CODE(pRegCtx->bMode))
-    //{
-        Bs3TestPrintf("eax=%08RX32 ebx=%08RX32 ecx=%08RX32 edx=%08RX32 esi=%08RX32 edi=%08RX32\n",
-                      pRegCtx->rax.u32, pRegCtx->rbx.u32, pRegCtx->rcx.u32, pRegCtx->rdx.u32, pRegCtx->rsi.u32, pRegCtx->rdi.u32);
-        Bs3TestPrintf("eip=%08RX32 esp=%08RX32 ebp=%08RX32 efl=%08RX32 cr0=%08RX32 cr2=%08RX32\n",
-                      pRegCtx->rip.u32, pRegCtx->rsp.u32, pRegCtx->rbp.u32, pRegCtx->rflags.u32,
-                      pRegCtx->cr0.u32, pRegCtx->cr2.u32);
-        Bs3TestPrintf("cs=%04RX16   ds=%04RX16 es=%04RX16 fs=%04RX16 gs=%04RX16   ss=%04RX16 cr3=%08RX32 cr4=%08RX32\n",
-                      pRegCtx->cs, pRegCtx->ds, pRegCtx->es, pRegCtx->fs, pRegCtx->gs, pRegCtx->ss,
-                      pRegCtx->cr3.u32, pRegCtx->cr4.u32);
-        Bs3TestPrintf("tr=%04RX16 ldtr=%04RX16 cpl=%d   mode=%#x fbFlags=%#x\n",
-                      pRegCtx->tr, pRegCtx->ldtr, pRegCtx->bCpl, pRegCtx->bMode, pRegCtx->fbFlags);
-    //}
-    //else
-    //{
-    //
-    //}
+    Bs3MemSet(pExtCtx, 0, cbExtCtx);
+    if (cbExtCtx >= RT_OFFSETOF(BS3EXTCTX, Ctx) + sizeof(X86FXSTATE) + sizeof(X86XSAVEHDR))
+    {
+        BS3_ASSERT(fFlags & XSAVE_C_X87);
+        pExtCtx->enmMethod = BS3EXTCTXMETHOD_XSAVE;
+        pExtCtx->Ctx.x.Hdr.bmXState = fFlags;
+    }
+    else if (cbExtCtx >= RT_OFFSETOF(BS3EXTCTX, Ctx) + sizeof(X86FXSTATE))
+    {
+        BS3_ASSERT(fFlags == 0);
+        pExtCtx->enmMethod = BS3EXTCTXMETHOD_FXSAVE;
+    }
+    else
+    {
+        BS3_ASSERT(fFlags == 0);
+        BS3_ASSERT(cbExtCtx >= RT_OFFSETOF(BS3EXTCTX, Ctx) + sizeof(X86FPUSTATE));
+        pExtCtx->enmMethod = BS3EXTCTXMETHOD_ANCIENT;
+    }
+    pExtCtx->cb             = cbExtCtx;
+    pExtCtx->u16Magic       = BS3EXTCTX_MAGIC;
+    pExtCtx->fXcr0Nominal   = fFlags;
+    pExtCtx->fXcr0Saved     = fFlags;
+    return pExtCtx;
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxRestore.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxRestore.asm
new file mode 100644
index 0000000..effa8e2
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxRestore.asm
@@ -0,0 +1,124 @@
+; $Id: bs3-cmn-ExtCtxRestore.asm $
+;; @file
+; BS3Kit - Bs3ExtCtxRestore.
+;
+
+;
+; Copyright (C) 2007-2017 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+; VirtualBox OSE distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+
+%include "bs3kit-template-header.mac"
+
+
+;;
+; Restores the extended CPU context (FPU, SSE, AVX, ++).
+;
+; @param    pExtCtx
+;
+BS3_PROC_BEGIN_CMN Bs3ExtCtxRestore, BS3_PBC_NEAR
+        push    xBP
+        mov     xBP, xSP
+        push    sAX
+        push    sCX
+        push    sDX
+        push    xBX
+BONLY16 push    es
+
+%if ARCH_BITS == 16
+        les     bx, [xBP + xCB + cbCurRetAddr]
+        mov     al, [es:bx + BS3EXTCTX.enmMethod]
+        cmp     al, BS3EXTCTXMETHOD_XSAVE
+        je      .do_16_xsave
+        cmp     al, BS3EXTCTXMETHOD_FXSAVE
+        je      .do_16_fxsave
+        cmp     al, BS3EXTCTXMETHOD_ANCIENT
+        je      .do_16_ancient
+        int3
+
+.do_16_ancient:
+        frstor  [es:bx + BS3EXTCTX.Ctx]
+        jmp     .return
+
+.do_16_fxsave:
+        fxrstor  [es:bx + BS3EXTCTX.Ctx]
+        jmp     .return
+
+.do_16_xsave:
+        xor     ecx, ecx
+        mov     eax, [es:bx + BS3EXTCTX.fXcr0Nominal]
+        mov     edx, [es:bx + BS3EXTCTX.fXcr0Nominal + 4]
+        xsetbv
+
+        xrstor  [es:bx + BS3EXTCTX.Ctx]
+
+        mov     eax, [es:bx + BS3EXTCTX.fXcr0Saved]
+        mov     edx, [es:bx + BS3EXTCTX.fXcr0Saved + 4]
+        xsetbv
+        ;jmp     .return
+
+%else
+BONLY32 mov     ebx, [xBP + xCB + cbCurRetAddr]
+BONLY64 mov     rbx, rcx
+
+        mov     al, [xBX + BS3EXTCTX.enmMethod]
+        cmp     al, BS3EXTCTXMETHOD_XSAVE
+        je      .do_xsave
+        cmp     al, BS3EXTCTXMETHOD_FXSAVE
+        je      .do_fxsave
+        cmp     al, BS3EXTCTXMETHOD_ANCIENT
+        je      .do_ancient
+        int3
+
+.do_ancient:
+        frstor  [xBX + BS3EXTCTX.Ctx]
+        jmp     .return
+
+.do_fxsave:
+BONLY32 fxrstor [xBX + BS3EXTCTX.Ctx]
+BONLY64 fxrstor64 [xBX + BS3EXTCTX.Ctx]
+        jmp     .return
+
+.do_xsave:
+        xor     ecx, ecx
+        mov     eax, [xBX + BS3EXTCTX.fXcr0Nominal]
+        mov     edx, [xBX + BS3EXTCTX.fXcr0Nominal + 4]
+        xsetbv
+
+BONLY32 xrstor  [xBX + BS3EXTCTX.Ctx]
+BONLY64 xrstor64 [xBX + BS3EXTCTX.Ctx]
+
+        mov     eax, [xBX + BS3EXTCTX.fXcr0Saved]
+        mov     edx, [xBX + BS3EXTCTX.fXcr0Saved + 4]
+        xsetbv
+        ;jmp     .return
+
+%endif
+
+.return:
+BONLY16 pop     es
+        pop     xBX
+        pop     sDX
+        pop     sCX
+        pop     sAX
+        mov     xSP, xBP
+        pop     xBP
+        ret
+BS3_PROC_END_CMN   Bs3ExtCtxRestore
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxSave.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxSave.asm
new file mode 100644
index 0000000..e092444
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-ExtCtxSave.asm
@@ -0,0 +1,130 @@
+; $Id: bs3-cmn-ExtCtxSave.asm $
+;; @file
+; BS3Kit - Bs3ExtCtxSave.
+;
+
+;
+; Copyright (C) 2007-2017 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+; VirtualBox OSE distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+
+%include "bs3kit-template-header.mac"
+
+
+;;
+; Saves the extended CPU context (FPU, SSE, AVX, ++).
+;
+; @param    pExtCtx
+;
+BS3_PROC_BEGIN_CMN Bs3ExtCtxSave, BS3_PBC_NEAR
+        push    xBP
+        mov     xBP, xSP
+        push    sAX
+        push    sCX
+        push    sDX
+        push    xBX
+BONLY16 push    es
+
+%if ARCH_BITS == 16
+        les     bx, [xBP + xCB + cbCurRetAddr]
+        mov     al, [es:bx + BS3EXTCTX.enmMethod]
+        cmp     al, BS3EXTCTXMETHOD_XSAVE
+        je      .do_16_xsave
+        cmp     al, BS3EXTCTXMETHOD_FXSAVE
+        je      .do_16_fxsave
+        cmp     al, BS3EXTCTXMETHOD_ANCIENT
+        je      .do_16_ancient
+        int3
+
+.do_16_ancient:
+        fnsave  [es:bx + BS3EXTCTX.Ctx]
+        jmp     .return
+
+.do_16_fxsave:
+        fxsave  [es:bx + BS3EXTCTX.Ctx]
+        jmp     .return
+
+.do_16_xsave:
+        xor     ecx, ecx
+        xgetbv
+        mov     [es:bx + BS3EXTCTX.fXcr0Saved], eax
+        mov     [es:bx + BS3EXTCTX.fXcr0Saved + 4], edx
+        mov     eax, [es:bx + BS3EXTCTX.fXcr0Nominal]
+        mov     edx, [es:bx + BS3EXTCTX.fXcr0Nominal + 4]
+        xsetbv
+
+        xsave   [es:bx + BS3EXTCTX.Ctx]
+
+        mov     eax, [es:bx + BS3EXTCTX.fXcr0Saved]
+        mov     edx, [es:bx + BS3EXTCTX.fXcr0Saved + 4]
+        xsetbv
+        ;jmp     .return
+
+%else
+BONLY32 mov     ebx, [xBP + xCB + cbCurRetAddr]
+BONLY64 mov     rbx, rcx
+
+        mov     al, [xBX + BS3EXTCTX.enmMethod]
+        cmp     al, BS3EXTCTXMETHOD_XSAVE
+        je      .do_xsave
+        cmp     al, BS3EXTCTXMETHOD_FXSAVE
+        je      .do_fxsave
+        cmp     al, BS3EXTCTXMETHOD_ANCIENT
+        je      .do_ancient
+        int3
+
+.do_ancient:
+        fnsave  [xBX + BS3EXTCTX.Ctx]
+        jmp     .return
+
+.do_fxsave:
+BONLY32 fxsave  [xBX + BS3EXTCTX.Ctx]
+BONLY64 fxsave64 [xBX + BS3EXTCTX.Ctx]
+        jmp     .return
+
+.do_xsave:
+        xor     ecx, ecx
+        xgetbv
+        mov     [xBX + BS3EXTCTX.fXcr0Saved], eax
+        mov     [xBX + BS3EXTCTX.fXcr0Saved + 4], edx
+        mov     eax, [xBX + BS3EXTCTX.fXcr0Nominal]
+        mov     edx, [xBX + BS3EXTCTX.fXcr0Nominal + 4]
+        xsetbv
+
+BONLY32 xsave   [xBX + BS3EXTCTX.Ctx]
+BONLY64 xsave64 [xBX + BS3EXTCTX.Ctx]
+
+        mov     eax, [xBX + BS3EXTCTX.fXcr0Saved]
+        mov     edx, [xBX + BS3EXTCTX.fXcr0Saved + 4]
+        xsetbv
+        ;jmp     .return
+
+%endif
+
+.return:
+BONLY16 pop     es
+        pop     xBX
+        pop     sDX
+        pop     sCX
+        pop     sAX
+        mov     xSP, xBP
+        pop     xBP
+        ret
+BS3_PROC_END_CMN   Bs3ExtCtxSave
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-GetCpuVendor.c
similarity index 59%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-GetCpuVendor.c
index c5a8c77..7c42a21 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-GetCpuVendor.c
@@ -1,10 +1,10 @@
-/* $Id: bs3-cmn-SelFar32ToFlat32.c $ */
+/* $Id: bs3-cmn-GetCpuVendor.c $ */
 /** @file
- * BS3Kit - Bs3SelFar32ToFlat32
+ * BS3Kit - Bs3GetCpuVendor
  */
 
 /*
- * Copyright (C) 2007-2016 Oracle Corporation
+ * Copyright (C) 2007-2017 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
  * available from http://www.virtualbox.org. This file is free software;
@@ -26,12 +26,24 @@
 
 #include "bs3kit-template-header.h"
 
+#include <iprt/asm-amd64-x86.h>
 
-#undef Bs3SelFar32ToFlat32
-BS3_CMN_DEF(uint32_t, Bs3SelFar32ToFlat32,(uint32_t off, uint16_t uSel))
+
+#undef Bs3GetCpuVendor
+BS3_CMN_DEF(BS3CPUVENDOR, Bs3GetCpuVendor,(void))
 {
-    if (g_bBs3CurrentMode == BS3_MODE_RM)
-        return ((uint32_t)uSel << 4) + off;
-    return Bs3SelProtFar32ToFlat32(off, uSel);
+    if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
+    {
+        uint32_t uEbx, uEcx, uEdx;
+        ASMCpuIdExSlow(0, 0, 0, 0, NULL, &uEbx, &uEcx, &uEdx);
+        if (ASMIsIntelCpuEx(uEbx, uEcx, uEdx))
+            return BS3CPUVENDOR_INTEL;
+        if (ASMIsAmdCpuEx(uEbx, uEcx, uEdx))
+            return BS3CPUVENDOR_AMD;
+        if (ASMIsViaCentaurCpuEx(uEbx, uEcx, uEdx))
+            return BS3CPUVENDOR_VIA;
+        return BS3CPUVENDOR_UNKNOWN;
+    }
+    return BS3CPUVENDOR_INTEL;
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-GetModeNameShortLower.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-GetModeNameShortLower.c
new file mode 100644
index 0000000..5fa54d5
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-GetModeNameShortLower.c
@@ -0,0 +1,62 @@
+/* $Id: bs3-cmn-GetModeNameShortLower.c $ */
+/** @file
+ * BS3Kit - Bs3GetModeNameShortLower
+ */
+
+/*
+ * Copyright (C) 2007-2016 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+#include "bs3kit-template-header.h"
+
+
+
+#undef Bs3GetModeNameShortLower
+BS3_CMN_DEF(const char BS3_FAR *, Bs3GetModeNameShortLower,(uint8_t bMode))
+{
+    switch (bMode)
+    {
+        case BS3_MODE_RM:           return g_szBs3ModeNameShortLower_rm;
+        case BS3_MODE_PE16:         return g_szBs3ModeNameShortLower_pe16;
+        case BS3_MODE_PE16_32:      return g_szBs3ModeNameShortLower_pe16_32;
+        case BS3_MODE_PE16_V86:     return g_szBs3ModeNameShortLower_pe16_v86;
+        case BS3_MODE_PE32:         return g_szBs3ModeNameShortLower_pe32;
+        case BS3_MODE_PE32_16:      return g_szBs3ModeNameShortLower_pe32_16;
+        case BS3_MODE_PEV86:        return g_szBs3ModeNameShortLower_pev86;
+        case BS3_MODE_PP16:         return g_szBs3ModeNameShortLower_pp16;
+        case BS3_MODE_PP16_32:      return g_szBs3ModeNameShortLower_pp16_32;
+        case BS3_MODE_PP16_V86:     return g_szBs3ModeNameShortLower_pp16_v86;
+        case BS3_MODE_PP32:         return g_szBs3ModeNameShortLower_pp32;
+        case BS3_MODE_PP32_16:      return g_szBs3ModeNameShortLower_pp32_16;
+        case BS3_MODE_PPV86:        return g_szBs3ModeNameShortLower_ppv86;
+        case BS3_MODE_PAE16:        return g_szBs3ModeNameShortLower_pae16;
+        case BS3_MODE_PAE16_32:     return g_szBs3ModeNameShortLower_pae16_32;
+        case BS3_MODE_PAE16_V86:    return g_szBs3ModeNameShortLower_pae16_v86;
+        case BS3_MODE_PAE32:        return g_szBs3ModeNameShortLower_pae32;
+        case BS3_MODE_PAE32_16:     return g_szBs3ModeNameShortLower_pae32_16;
+        case BS3_MODE_PAEV86:       return g_szBs3ModeNameShortLower_paev86;
+        case BS3_MODE_LM16:         return g_szBs3ModeNameShortLower_lm16;
+        case BS3_MODE_LM32:         return g_szBs3ModeNameShortLower_lm32;
+        case BS3_MODE_LM64:         return g_szBs3ModeNameShortLower_lm64;
+        case BS3_MODE_INVALID:      return "inv";
+        default:                    return "unk";
+    }
+}
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-MemPrintInfo.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-MemPrintInfo.c
new file mode 100644
index 0000000..9ba28d2
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-MemPrintInfo.c
@@ -0,0 +1,85 @@
+/* $Id: bs3-cmn-MemPrintInfo.c $ */
+/** @file
+ * BS3Kit - Bs3MemPrintInfo
+ */
+
+/*
+ * Copyright (C) 2007-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "bs3kit-template-header.h"
+#include "bs3-cmn-memory.h"
+#include <iprt/asm.h>
+
+
+/**
+ * Prints a slab control structure with allocation map.
+ *
+ * @param   pCtl                The slab control structure to print.
+ * @param   pszPrefix           The output prefix.
+ */
+static void Bs3MemPrintInfoSlabCtl(PBS3SLABCTL pCtl, const char BS3_FAR *pszPrefix)
+{
+    unsigned iChunk;
+    Bs3TestPrintf("%s / %#06x: %u of %u chunks free", pszPrefix, pCtl->cbChunk, pCtl->cFreeChunks, pCtl->cChunks);
+    for (iChunk = 0; iChunk < pCtl->cChunks; iChunk++)
+    {
+        if ((iChunk & 63) == 0)
+            Bs3TestPrintf("\n%s:", pszPrefix);
+        if (ASMBitTest(pCtl->bmAllocated, iChunk))
+            Bs3TestPrintf((iChunk & 7) != 0 ? "x" : " x");
+        else
+            Bs3TestPrintf((iChunk & 7) != 0 ? "-" : " -");
+    }
+    Bs3TestPrintf("\n");
+}
+
+
+
+/**
+ * Prints a summary of a slab allocation list (i.e. the heap).
+ *
+ * @param   paLists             Array of BS3_MEM_SLAB_LIST_COUNT lists.
+ * @param   pszPrefix           The output prefix.
+ */
+static void Bs3MemPrintInfoSlabList(PBS3SLABHEAD paLists, const char BS3_FAR *pszPrefix)
+{
+    unsigned iSlab;
+    for (iSlab = 0; iSlab < BS3_MEM_SLAB_LIST_COUNT; iSlab++)
+        if (paLists[iSlab].cSlabs)
+            Bs3TestPrintf("%s / %#06x: %u slabs, %RU32 of %RU32 chunks free\n",
+                          pszPrefix, paLists[iSlab].cbChunk, paLists[iSlab].cSlabs,
+                          paLists[iSlab].cFreeChunks, paLists[iSlab].cChunks);
+}
+
+
+#undef Bs3MemPrintInfo
+BS3_CMN_DEF(void, Bs3MemPrintInfo,(void))
+{
+    Bs3MemPrintInfoSlabList(g_aBs3LowSlabLists,        "Lower");
+    Bs3MemPrintInfoSlabList(g_aBs3LowSlabLists,        "Upper");
+    Bs3MemPrintInfoSlabCtl(&g_Bs3Mem4KLow.Core,        "4KLow");
+    Bs3MemPrintInfoSlabCtl(&g_Bs3Mem4KUpperTiled.Core, "Tiled");
+}
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingAlias.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingAlias.c
index eaf38cd..efc26e4 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingAlias.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingAlias.c
@@ -124,7 +124,7 @@ BS3_CMN_DEF(int, Bs3PagingAlias,(uint64_t uDst, uint64_t uPhysToAlias, uint32_t
              */
             while (cPages > 0)
             {
-                pPtePae = bs3PagingGetPte(cr3, g_bBs3CurrentMode, uDst, false, &rc);
+                pPtePae = bs3PagingGetPaePte(cr3, g_bBs3CurrentMode, uDst, false, &rc);
                 if (pPtePae)
                 {
                     uint32_t cLeftInPt = X86_PG_PAE_ENTRIES - ((uDst >> X86_PT_PAE_SHIFT) & X86_PT_PAE_MASK);
@@ -148,7 +148,7 @@ BS3_CMN_DEF(int, Bs3PagingAlias,(uint64_t uDst, uint64_t uPhysToAlias, uint32_t
             while (cPages > 0)
             {
                 uint32_t cLeftInPt = X86_PG_PAE_ENTRIES - ((uDst >> X86_PT_PAE_SHIFT) & X86_PT_PAE_MASK);
-                pPtePae = bs3PagingGetPte(cr3, g_bBs3CurrentMode, uDst, false, &rc);
+                pPtePae = bs3PagingGetPaePte(cr3, g_bBs3CurrentMode, uDst, false, &rc);
                 while (cLeftInPt > 0 && cPages > 0)
                 {
                     pPtePae->u = uPhysToAlias | fPte;
@@ -165,7 +165,7 @@ BS3_CMN_DEF(int, Bs3PagingAlias,(uint64_t uDst, uint64_t uPhysToAlias, uint32_t
     }
 #if ARCH_BITS == 16
     /*
-     * We can do this stuff in v8086 mode.
+     * We can't do this stuff in v8086 mode, so switch to 16-bit prot mode and do it there.
      */
     else
         return Bs3SwitchFromV86To16BitAndCallC((FPFNBS3FAR)Bs3PagingAlias_f16, sizeof(uint64_t)*3 + sizeof(uint32_t),
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingProtect.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingProtect.c
index defc3b8..db4b488 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingProtect.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingProtect.c
@@ -144,8 +144,8 @@ BS3_CMN_DEF(X86PTE BS3_FAR *, bs3PagingGetLegacyPte,(RTCCUINTXREG cr3, uint32_t
  *                              replacing large pages.
  * @param   prc                 Updated only on failure.
  */
-#undef bs3PagingGetPte
-BS3_CMN_DEF(X86PTEPAE BS3_FAR *, bs3PagingGetPte,(RTCCUINTXREG cr3, uint8_t bMode, uint64_t uFlat, bool fUseInvlPg, int *prc))
+#undef bs3PagingGetPaePte
+BS3_CMN_DEF(X86PTEPAE BS3_FAR *, bs3PagingGetPaePte,(RTCCUINTXREG cr3, uint8_t bMode, uint64_t uFlat, bool fUseInvlPg, int *prc))
 {
     X86PTEPAE BS3_FAR  *pPTE = NULL;
 #if TMPL_BITS == 16
@@ -173,7 +173,7 @@ BS3_CMN_DEF(X86PTEPAE BS3_FAR *, bs3PagingGetPte,(RTCCUINTXREG cr3, uint8_t bMod
                     if ((pPdpt->a[iPdpte].u & X86_PDPE_PG_MASK) <= uMaxAddr)
                         pPD = (X86PDPAE BS3_FAR *)Bs3XptrFlatToCurrent(pPdpt->a[iPdpte].u & ~(uint64_t)PAGE_OFFSET_MASK);
                     else
-                        BS3PAGING_DPRINTF1(("bs3PagingGetPte: out of range! iPdpte=%#x: %RX64 max=%RX32\n",
+                        BS3PAGING_DPRINTF1(("bs3PagingGetPaePte: out of range! iPdpte=%#x: %RX64 max=%RX32\n",
                                             iPdpte, pPdpt->a[iPdpte].u, (uint32_t)uMaxAddr));
                 }
                 else
@@ -202,13 +202,13 @@ BS3_CMN_DEF(X86PTEPAE BS3_FAR *, bs3PagingGetPte,(RTCCUINTXREG cr3, uint8_t bMod
             if ((pPdpt->a[iPdpte].u & X86_PDPE_PG_MASK) <= uMaxAddr)
                 pPD = (X86PDPAE BS3_FAR *)Bs3XptrFlatToCurrent(pPdpt->a[iPdpte].u & X86_PDPE_PG_MASK);
             else
-                BS3PAGING_DPRINTF1(("bs3PagingGetPte: out of range! iPdpte=%#x: %RX64 max=%RX32\n",
+                BS3PAGING_DPRINTF1(("bs3PagingGetPaePte: out of range! iPdpte=%#x: %RX64 max=%RX32\n",
                                     iPdpte, pPdpt->a[iPdpte].u, (uint32_t)uMaxAddr));
         }
         else
         {
             pPD = NULL;
-            BS3PAGING_DPRINTF1(("bs3PagingGetPte: out of range! uFlat=%#RX64 max=%RX32\n", uFlat, (uint32_t)uMaxAddr));
+            BS3PAGING_DPRINTF1(("bs3PagingGetPaePte: out of range! uFlat=%#RX64 max=%RX32\n", uFlat, (uint32_t)uMaxAddr));
         }
         if (pPD)
         {
@@ -219,7 +219,7 @@ BS3_CMN_DEF(X86PTEPAE BS3_FAR *, bs3PagingGetPte,(RTCCUINTXREG cr3, uint8_t bMod
                 if ((pPD->a[iPde].u & X86_PDE_PAE_PG_MASK) <= uMaxAddr)
                     pPTE = &((X86PTPAE BS3_FAR *)Bs3XptrFlatToCurrent(pPD->a[iPde].u & ~(uint64_t)PAGE_OFFSET_MASK))->a[iPte];
                 else
-                    BS3PAGING_DPRINTF1(("bs3PagingGetPte: out of range! iPde=%#x: %RX64 max=%RX32\n",
+                    BS3PAGING_DPRINTF1(("bs3PagingGetPaePte: out of range! iPde=%#x: %RX64 max=%RX32\n",
                                         iPde, pPD->a[iPde].u, (uint32_t)uMaxAddr));
             }
             else
@@ -248,7 +248,7 @@ BS3_CMN_DEF(X86PTEPAE BS3_FAR *, bs3PagingGetPte,(RTCCUINTXREG cr3, uint8_t bMod
         }
     }
     else
-        BS3PAGING_DPRINTF1(("bs3PagingGetPte: out of range! cr3=%#RX32 uMaxAddr=%#RX32\n", (uint32_t)cr3, (uint32_t)uMaxAddr));
+        BS3PAGING_DPRINTF1(("bs3PagingGetPaePte: out of range! cr3=%#RX32 uMaxAddr=%#RX32\n", (uint32_t)cr3, (uint32_t)uMaxAddr));
     return pPTE;
 }
 
@@ -313,7 +313,7 @@ BS3_CMN_DEF(int, Bs3PagingProtect,(uint64_t uFlat, uint64_t cb, uint64_t fSet, u
              */
             while (cb > 0)
             {
-                PX86PTEPAE pPte = BS3_CMN_FAR_NM(bs3PagingGetPte)(cr3, g_bBs3CurrentMode, uFlat, fUseInvlPg, &rc);
+                PX86PTEPAE pPte = BS3_CMN_FAR_NM(bs3PagingGetPaePte)(cr3, g_bBs3CurrentMode, uFlat, fUseInvlPg, &rc);
                 if (!pPte)
                     return rc;
 
@@ -345,7 +345,7 @@ BS3_CMN_DEF(int, Bs3PagingProtect,(uint64_t uFlat, uint64_t cb, uint64_t fSet, u
      * We can do this stuff in v8086 mode.
      */
     else
-        return Bs3SwitchFromV86To16BitAndCallC((FPFNBS3FAR)Bs3PagingProtect_f16, sizeof(uint64_t)*4, uFlat, cb, fSet, fClear);
+        return Bs3SwitchFromV86To16BitAndCallC((FPFNBS3FAR)Bs3PagingProtect_f16, sizeof(uint64_t) * 4, uFlat, cb, fSet, fClear);
 #endif
     return VINF_SUCCESS;
 }
@@ -361,3 +361,22 @@ BS3_CMN_DEF(int, Bs3PagingProtectPtr,(void *pv, size_t cb, uint64_t fSet, uint64
 #endif
 }
 
+
+#undef Bs3PagingGetPte
+BS3_CMN_DEF(void BS3_FAR *, Bs3PagingGetPte,(uint64_t uFlat, int *prc))
+{
+    RTCCUINTXREG const  cr3        = ASMGetCR3();
+    RTCCUINTXREG const  cr4        = g_uBs3CpuDetected & BS3CPU_F_CPUID ? ASMGetCR4() : 0;
+    bool const          fLegacyPTs = !(cr4 & X86_CR4_PAE);
+    bool const          fUseInvlPg = (g_uBs3CpuDetected & BS3CPU_TYPE_MASK) >= BS3CPU_80486;
+    int                 rc;
+    if (!prc)
+        prc = &rc;
+    if (!fLegacyPTs)
+        return BS3_CMN_FAR_NM(bs3PagingGetPaePte)(cr3,  g_bBs3CurrentMode, uFlat, fUseInvlPg, prc);
+    if (uFlat < _4G)
+        return BS3_CMN_FAR_NM(bs3PagingGetLegacyPte)(cr3, (uint32_t)uFlat, fUseInvlPg, prc);
+    *prc = VERR_OUT_OF_RANGE;
+    return NULL;
+}
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingQueryAddressInfo.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingQueryAddressInfo.c
new file mode 100644
index 0000000..5fd2a16
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingQueryAddressInfo.c
@@ -0,0 +1,149 @@
+/* $Id: bs3-cmn-PagingQueryAddressInfo.c $ */
+/** @file
+ * BS3Kit - Bs3PagingQueryAddressInfo
+ */
+
+/*
+ * Copyright (C) 2007-2016 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <bs3kit.h>
+#include <iprt/asm-amd64-x86.h>
+#include <VBox/err.h>
+
+
+#undef Bs3PagingQueryAddressInfo
+BS3_CMN_DEF(int, Bs3PagingQueryAddressInfo,(uint64_t uFlat, PBS3PAGINGINFO4ADDR pPgInfo))
+{
+    RTCCUINTXREG const  cr3        = ASMGetCR3();
+    RTCCUINTXREG const  cr4        = g_uBs3CpuDetected & BS3CPU_F_CPUID ? ASMGetCR4() : 0;
+    bool const          fLegacyPTs = !(cr4 & X86_CR4_PAE);
+    int                 rc = VERR_OUT_OF_RANGE;
+
+
+    pPgInfo->fFlags             = 0;
+    pPgInfo->u.apbEntries[0]    = NULL;
+    pPgInfo->u.apbEntries[1]    = NULL;
+    pPgInfo->u.apbEntries[2]    = NULL;
+    pPgInfo->u.apbEntries[3]    = NULL;
+
+    if (!fLegacyPTs)
+    {
+#if TMPL_BITS == 16
+        uint32_t const  uMaxAddr = BS3_MODE_IS_RM_OR_V86(g_bBs3CurrentMode) ? _1M - 1 : BS3_SEL_TILED_AREA_SIZE - 1;
+#else
+        uintptr_t const uMaxAddr = ~(uintptr_t)0;
+#endif
+        uint64_t const  fEfer    = g_uBs3CpuDetected & BS3CPU_F_LONG_MODE ? ASMRdMsr(MSR_K6_EFER) : 0;
+
+        pPgInfo->cEntries = fEfer & MSR_K6_EFER_LMA ? 4 : 3;
+        pPgInfo->cbEntry  = sizeof(X86PTEPAE);
+        if ((cr3 & X86_CR3_AMD64_PAGE_MASK) <= uMaxAddr)
+        {
+            if (   (fEfer & MSR_K6_EFER_LMA)
+                && X86_IS_CANONICAL(uFlat))
+            {
+                /* 48-bit long mode paging. */
+                pPgInfo->u.Pae.pPml4e  = (X86PML4E BS3_FAR *)Bs3XptrFlatToCurrent(cr3 & X86_CR3_AMD64_PAGE_MASK);
+                pPgInfo->u.Pae.pPml4e += (uFlat >> X86_PML4_SHIFT) & X86_PML4_MASK;
+                if (!pPgInfo->u.Pae.pPml4e->n.u1Present)
+                    rc = VERR_PAGE_NOT_PRESENT;
+                else if ((pPgInfo->u.Pae.pPml4e->u & X86_PML4E_PG_MASK) <= uMaxAddr)
+                {
+                    pPgInfo->u.Pae.pPdpe  = (X86PDPE BS3_FAR *)Bs3XptrFlatToCurrent(pPgInfo->u.Pae.pPml4e->u & X86_PML4E_PG_MASK);
+                    pPgInfo->u.Pae.pPdpe += (uFlat >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
+                    if (!pPgInfo->u.Pae.pPdpe->n.u1Present)
+                        rc = VERR_PAGE_NOT_PRESENT;
+                    else if (pPgInfo->u.Pae.pPdpe->b.u1Size)
+                        rc = VINF_SUCCESS;
+                    else
+                        rc = VINF_TRY_AGAIN;
+                }
+            }
+            else if (   !(fEfer & MSR_K6_EFER_LMA)
+                     && uFlat <= _4G)
+            {
+                /* 32-bit PAE paging. */
+                pPgInfo->u.Pae.pPdpe  = (X86PDPE BS3_FAR *)Bs3XptrFlatToCurrent(cr3 & X86_CR3_PAE_PAGE_MASK);
+                pPgInfo->u.Pae.pPdpe += ((uint32_t)uFlat >> X86_PDPT_SHIFT) & X86_PDPT_MASK_PAE;
+                if (!pPgInfo->u.Pae.pPdpe->n.u1Present)
+                    rc = VERR_PAGE_NOT_PRESENT;
+                else
+                    rc = VINF_TRY_AGAIN;
+            }
+
+            /* Common code for the PD and PT levels. */
+            if (   rc == VINF_TRY_AGAIN
+                && (pPgInfo->u.Pae.pPdpe->u & X86_PDPE_PG_MASK) <= uMaxAddr)
+            {
+                rc = VERR_OUT_OF_RANGE;
+                pPgInfo->u.Pae.pPde  = (X86PDEPAE BS3_FAR *)Bs3XptrFlatToCurrent(pPgInfo->u.Pae.pPdpe->u & X86_PDPE_PG_MASK);
+                pPgInfo->u.Pae.pPde += (uFlat >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK;
+                if (!pPgInfo->u.Pae.pPde->n.u1Present)
+                    rc = VERR_PAGE_NOT_PRESENT;
+                else if (pPgInfo->u.Pae.pPde->b.u1Size)
+                    rc = VINF_SUCCESS;
+                else if ((pPgInfo->u.Pae.pPde->u & X86_PDE_PAE_PG_MASK) <= uMaxAddr)
+                {
+                    pPgInfo->u.Pae.pPte = (X86PTEPAE BS3_FAR *)Bs3XptrFlatToCurrent(pPgInfo->u.Pae.pPde->u & X86_PDE_PAE_PG_MASK);
+                    rc = VINF_SUCCESS;
+                }
+            }
+            else if (rc == VINF_TRY_AGAIN)
+                rc = VERR_OUT_OF_RANGE;
+        }
+    }
+    else
+    {
+#if TMPL_BITS == 16
+        uint32_t const  uMaxAddr = BS3_MODE_IS_RM_OR_V86(g_bBs3CurrentMode) ? _1M - 1 : BS3_SEL_TILED_AREA_SIZE - 1;
+#else
+        uint32_t const  uMaxAddr = UINT32_MAX;
+#endif
+
+        pPgInfo->cEntries = 2;
+        pPgInfo->cbEntry  = sizeof(X86PTE);
+        if (   uFlat < _4G
+            && cr3 <= uMaxAddr)
+        {
+            pPgInfo->u.Legacy.pPde  = (X86PDE BS3_FAR *)Bs3XptrFlatToCurrent(cr3 & X86_CR3_PAGE_MASK);
+            pPgInfo->u.Legacy.pPde += ((uint32_t)uFlat >> X86_PD_SHIFT) & X86_PD_MASK;
+            if (!pPgInfo->u.Legacy.pPde->b.u1Present)
+                rc = VERR_PAGE_NOT_PRESENT;
+            else if (pPgInfo->u.Legacy.pPde->b.u1Size)
+                rc = VINF_SUCCESS;
+            else if (pPgInfo->u.Legacy.pPde->u <= uMaxAddr)
+            {
+                pPgInfo->u.Legacy.pPte  = (X86PTE BS3_FAR *)Bs3XptrFlatToCurrent(pPgInfo->u.Legacy.pPde->u & X86_PDE_PG_MASK);
+                pPgInfo->u.Legacy.pPte += ((uint32_t)uFlat >> X86_PT_SHIFT) & X86_PT_MASK;
+                if (pPgInfo->u.Legacy.pPte->n.u1Present)
+                    rc = VINF_SUCCESS;
+                else
+                    rc = VERR_PAGE_NOT_PRESENT;
+            }
+        }
+    }
+    return rc;
+}
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingSetupCanonicalTraps.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingSetupCanonicalTraps.c
index 41c6d7e..052431f 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingSetupCanonicalTraps.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PagingSetupCanonicalTraps.c
@@ -62,8 +62,8 @@ BS3_CMN_PROTO_STUB(void BS3_FAR *, Bs3PagingSetupCanonicalTraps,(void))
             /*
              * Get the page table entries first to avoid having to unmap things.
              */
-            paLoPtes = bs3PagingGetPte(g_PhysPagingRootLM, BS3_MODE_LM64, UINT64_C(0x00007fffffffe000), false, &rc);
-            paHiPtes = bs3PagingGetPte(g_PhysPagingRootLM, BS3_MODE_LM64, UINT64_C(0xffff800000000000), false, &rc);
+            paLoPtes = bs3PagingGetPaePte(g_PhysPagingRootLM, BS3_MODE_LM64, UINT64_C(0x00007fffffffe000), false, &rc);
+            paHiPtes = bs3PagingGetPaePte(g_PhysPagingRootLM, BS3_MODE_LM64, UINT64_C(0xffff800000000000), false, &rc);
             if (!paHiPtes || !paLoPtes)
             {
                 Bs3TestPrintf("warning: Bs3PagingSetupCanonicalTraps - failed to get PTEs!\n");
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PitIrqHandler.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PitIrqHandler.c
index 3a0a335..14166cb 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PitIrqHandler.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-PitIrqHandler.c
@@ -1,6 +1,6 @@
 /* $Id: bs3-cmn-PitIrqHandler.c $ */
 /** @file
- * BS3Kit - The PIT IRQ Handler and associated data..
+ * BS3Kit - The PIT IRQ Handler and associated data.
  */
 
 /*
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxConvertToRingX.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxConvertToRingX.c
index b04128a..33d88f9 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxConvertToRingX.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxConvertToRingX.c
@@ -71,7 +71,7 @@ static uint16_t bs3RegCtxConvertRealSegToRingX(uint16_t uSeg, uint8_t bRing)
  * Transforms a protected mode selector to a different ring.
  *
  * @returns Adjusted protected mode selector.
- * @param   uSeg            The current selector value.
+ * @param   uSel            The current selector value.
  * @param   bRing           The target ring.
  * @param   iReg            Register index.
  */
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
index 8eede3c..f71f9cc 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
@@ -4,7 +4,7 @@
  */
 
 /*
- * Copyright (C) 2007-2016 Oracle Corporation
+ * Copyright (C) 2007-2017 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
  * available from http://www.virtualbox.org. This file is free software;
@@ -34,8 +34,8 @@
 #undef Bs3RegCtxPrint
 BS3_CMN_DEF(void, Bs3RegCtxPrint,(PCBS3REGCTX pRegCtx))
 {
-    //if (BS3_MODE_IS_64BIT_CODE(pRegCtx->bMode))
-    //{
+    if (!BS3_MODE_IS_64BIT_CODE(pRegCtx->bMode))
+    {
         Bs3TestPrintf("eax=%08RX32 ebx=%08RX32 ecx=%08RX32 edx=%08RX32 esi=%08RX32 edi=%08RX32\n",
                       pRegCtx->rax.u32, pRegCtx->rbx.u32, pRegCtx->rcx.u32, pRegCtx->rdx.u32, pRegCtx->rsi.u32, pRegCtx->rdi.u32);
         Bs3TestPrintf("eip=%08RX32 esp=%08RX32 ebp=%08RX32 efl=%08RX32 cr0=%08RX32 cr2=%08RX32\n",
@@ -44,12 +44,24 @@ BS3_CMN_DEF(void, Bs3RegCtxPrint,(PCBS3REGCTX pRegCtx))
         Bs3TestPrintf("cs=%04RX16   ds=%04RX16 es=%04RX16 fs=%04RX16 gs=%04RX16   ss=%04RX16 cr3=%08RX32 cr4=%08RX32\n",
                       pRegCtx->cs, pRegCtx->ds, pRegCtx->es, pRegCtx->fs, pRegCtx->gs, pRegCtx->ss,
                       pRegCtx->cr3.u32, pRegCtx->cr4.u32);
-        Bs3TestPrintf("tr=%04RX16 ldtr=%04RX16 cpl=%d   mode=%#x fbFlags=%#x\n",
-                      pRegCtx->tr, pRegCtx->ldtr, pRegCtx->bCpl, pRegCtx->bMode, pRegCtx->fbFlags);
-    //}
-    //else
-    //{
-    //
-    //}
+    }
+    else
+    {
+        Bs3TestPrintf("rax=%016RX64 rbx=%016RX64 rcx=%016RX64 rdx=%016RX64\n",
+                      pRegCtx->rax.u64, pRegCtx->rbx.u64, pRegCtx->rcx.u64, pRegCtx->rdx.u64);
+        Bs3TestPrintf("rsi=%016RX64 rdi=%016RX64 r8 =%016RX64 r9 =%016RX64\n",
+                      pRegCtx->rsi.u64, pRegCtx->rdi.u64, pRegCtx->r8.u64, pRegCtx->r9.u64);
+        Bs3TestPrintf("r10=%016RX64 r11=%016RX64 r12=%016RX64 r13=%016RX64\n",
+                      pRegCtx->r10.u64, pRegCtx->r11.u64, pRegCtx->r12.u64, pRegCtx->r13.u64);
+        Bs3TestPrintf("r14=%016RX64 r15=%016RX64  cr0=%08RX64  cr4=%08RX64  cr3=%08RX64\n",
+                      pRegCtx->r14.u64, pRegCtx->r15.u64, pRegCtx->cr0.u64, pRegCtx->cr4.u64, pRegCtx->cr3.u64);
+        Bs3TestPrintf("rip=%016RX64 rsp=%016RX64 rbp=%016RX64 rfl=%08RX64\n",
+                      pRegCtx->rip.u64, pRegCtx->rsp.u64, pRegCtx->rbp.u64, pRegCtx->rflags.u32);
+        Bs3TestPrintf("cs=%04RX16   ds=%04RX16 es=%04RX16 fs=%04RX16 gs=%04RX16   ss=%04RX16            cr2=%016RX64\n",
+                      pRegCtx->cs, pRegCtx->ds, pRegCtx->es, pRegCtx->fs, pRegCtx->gs, pRegCtx->ss,
+                      pRegCtx->cr3.u64, pRegCtx->cr2.u64);
+    }
+    Bs3TestPrintf("tr=%04RX16 ldtr=%04RX16 cpl=%d   mode=%#x fbFlags=%#x\n",
+                  pRegCtx->tr, pRegCtx->ldtr, pRegCtx->bCpl, pRegCtx->bMode, pRegCtx->fbFlags);
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm
index a4791e5..b4455e0 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm
@@ -308,6 +308,8 @@ BS3_PROC_BEGIN_CMN Bs3RegCtxRestore, BS3_PBC_HYBRID
         cmp     byte [BS3_ONLY_16BIT(es:) BS3_DATA16_WRT(g_bBs3CurrentMode)], BS3_MODE_RM
         je      .skip_control_regs
 %endif
+        test    byte [xBX + BS3REGCTX.fbFlags], BS3REG_CTX_F_NO_TR_LDTR
+        jnz     .skip_control_regs
 
         ; LDTR
         sldt    ax
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxSaveEx.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxSaveEx.asm
index 5d7e255..0fb906e 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxSaveEx.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxSaveEx.asm
@@ -32,6 +32,9 @@
 ;*  External Symbols                                                                                                             *
 ;*********************************************************************************************************************************
 BS3_EXTERN_DATA16   g_bBs3CurrentMode
+%if ARCH_BITS != 64
+BS3_EXTERN_DATA16   g_uBs3CpuDetected
+%endif
 
 TMPL_BEGIN_TEXT
 BS3_EXTERN_CMN      Bs3Panic
@@ -96,6 +99,9 @@ TONLY16 CPU 8086
         BS3_CALL_CONV_PROLOG 3
         push    xBP
         mov     xBP, xSP
+%if ARCH_BITS == 64
+        push    rcx                     ; Save pRegCtx
+%endif
 
         ;
         ; Get the CPU bitcount part of the current mode.
@@ -110,18 +116,19 @@ TONLY16 CPU 8086
         ; Reserve extra stack space.  Make sure we've got 20h here in case we
         ; are saving a 64-bit context.
         ;
-        mov     ax, [xBP + xCB + cbCurRetAddr + sCB + xCB]
+TONLY16 mov     ax, [xBP + xCB + cbCurRetAddr + sCB + xCB]
+TNOT16  movzx   eax, word [xBP + xCB + cbCurRetAddr + sCB + xCB]
 %ifdef BS3_STRICT
-        cmp     ax, 4096
+        cmp     xAX, 4096
         jb      .extra_stack_ok
         call    Bs3Panic
 .extra_stack_ok:
 %endif
-        cmp     ax, 20h
+        cmp     xAX, 20h
         jae     .at_least_20h_extra_stack
-        add     ax, 20h
+        add     xAX, 20h
 .at_least_20h_extra_stack:
-        sub     sp, ax
+        sub     xSP, xAX
 
         ;
         ; Are we just saving the mode we're already in?
@@ -150,7 +157,6 @@ TONLY16 CPU 8086
         BS3_CALL_CONV_EPILOG 3
         BS3_HYBRID_RET
 
-
         ;
         ; Turns out we have to do switch to a different bitcount before saving.
         ;
@@ -216,7 +222,7 @@ TONLY16 CPU 386
         call    _Bs3SwitchTo%[TMPL_BITS]Bit_c16
 %endif
         BS3_SET_BITS TMPL_BITS
-        jmp     .return
+        jmp     .supplement_and_return
         TMPL_BEGIN_TEXT
 
 TONLY16 CPU 386
@@ -254,7 +260,7 @@ TONLY16 CPU 386
 
         call    _Bs3SwitchTo%[TMPL_BITS]Bit_c16
         BS3_SET_BITS TMPL_BITS
-        jmp     .return
+        jmp     .supplement_and_return
 TMPL_BEGIN_TEXT
 %endif
 
@@ -290,15 +296,17 @@ TMPL_BEGIN_TEXT
         je      .code_32_back_to_v86
         call    _Bs3SwitchTo16Bit_c32
         BS3_SET_BITS TMPL_BITS
-        jmp     .return
+        jmp     .supplement_and_return
 .code_32_back_to_v86:
         BS3_SET_BITS 32
         call    _Bs3SwitchTo16BitV86_c32
+        BS3_SET_BITS TMPL_BITS
+        jmp     .return
  %else
         call    _Bs3SwitchTo64Bit_c32
- %endif
         BS3_SET_BITS TMPL_BITS
-        jmp     .return
+        jmp     .supplement_and_return
+ %endif
 %endif
 
 
@@ -332,6 +340,111 @@ TMPL_BEGIN_TEXT
         BS3_SET_BITS TMPL_BITS
         jmp     .return
 %endif
-BS3_PROC_END_CMN   Bs3RegCtxSaveEx
 
 
+        ;
+        ; Supplement the state out of the current context and then return.
+        ;
+.supplement_and_return:
+%if ARCH_BITS == 16
+        CPU 8086
+        ; Skip 286 and older.  Also make 101% sure we not in real mode or v8086 mode.
+        cmp     byte [BS3_DATA16_WRT(g_uBs3CpuDetected)], BS3CPU_80386
+        jb      .return                 ; Just skip if 286 or older.
+        test    byte [BS3_DATA16_WRT(g_bBs3CurrentMode)], BS3_MODE_CODE_V86
+        jnz     .return
+        cmp     byte [BS3_DATA16_WRT(g_bBs3CurrentMode)], BS3_MODE_RM
+        jne     .return                 ; paranoia
+        CPU 386
+%endif
+
+        ; Load the context pointer into a suitable register.
+%if ARCH_BITS == 64
+ %define pRegCtx rcx
+        mov     rcx, [xBP - xCB]
+%elif ARCH_BITS == 32
+ %define pRegCtx ecx
+        mov     ecx, [xBP + xCB + cbCurRetAddr]
+%else
+ %define pRegCtx es:bx
+        push    es
+        push    bx
+        les     bx, [xBP + xCB + cbCurRetAddr]
+%endif
+%if ARCH_BITS == 64
+        ; If we're in 64-bit mode we can capture and restore the high bits.
+        test    byte [pRegCtx + BS3REGCTX.fbFlags], BS3REG_CTX_F_NO_AMD64
+        jz      .supplemented_64bit_registers
+        mov     [pRegCtx + BS3REGCTX.r8], r8
+        mov     [pRegCtx + BS3REGCTX.r9], r9
+        mov     [pRegCtx + BS3REGCTX.r10], r10
+        mov     [pRegCtx + BS3REGCTX.r11], r11
+        mov     [pRegCtx + BS3REGCTX.r12], r12
+        mov     [pRegCtx + BS3REGCTX.r13], r13
+        mov     [pRegCtx + BS3REGCTX.r14], r14
+        mov     [pRegCtx + BS3REGCTX.r15], r15
+        shr     rax, 32
+        mov     [pRegCtx + BS3REGCTX.rax + 4], eax
+        mov     rax, rbx
+        shr     rax, 32
+        mov     [pRegCtx + BS3REGCTX.rbx + 4], eax
+        mov     rax, rcx
+        shr     rax, 32
+        mov     [pRegCtx + BS3REGCTX.rcx + 4], eax
+        mov     rax, rdx
+        shr     rax, 32
+        mov     [pRegCtx + BS3REGCTX.rdx + 4], eax
+        mov     rax, rsp
+        shr     rax, 32
+        mov     [pRegCtx + BS3REGCTX.rsp + 4], eax
+        mov     rax, rbp
+        shr     rax, 32
+        mov     [pRegCtx + BS3REGCTX.rbp + 4], eax
+        mov     rax, rsi
+        shr     rax, 32
+        mov     [pRegCtx + BS3REGCTX.rsi + 4], eax
+        mov     rax, rdi
+        shr     rax, 32
+        mov     [pRegCtx + BS3REGCTX.rdi + 4], eax
+        and     byte [pRegCtx + BS3REGCTX.fbFlags], ~BS3REG_CTX_F_NO_AMD64
+.supplemented_64bit_registers:
+%endif
+        ; The rest requires ring-0 (at least during restore).
+        mov     ax, ss
+        test    ax, 3
+        jnz     .done_supplementing
+
+        ; Do control registers.
+        test    byte [pRegCtx + BS3REGCTX.fbFlags], BS3REG_CTX_F_NO_CR2_CR3 | BS3REG_CTX_F_NO_CR0_IS_MSW | BS3REG_CTX_F_NO_CR4
+        jz      .supplemented_control_registers
+        mov     sAX, cr0
+        mov     [pRegCtx + BS3REGCTX.cr0], sAX
+        mov     sAX, cr2
+        mov     [pRegCtx + BS3REGCTX.cr2], sAX
+        mov     sAX, cr3
+        mov     [pRegCtx + BS3REGCTX.cr3], sAX
+        and     byte [pRegCtx + BS3REGCTX.fbFlags], ~(BS3REG_CTX_F_NO_CR2_CR3 | BS3REG_CTX_F_NO_CR0_IS_MSW)
+
+%if ARCH_BITS != 64
+        test    byte [1 + BS3_DATA16_WRT(g_uBs3CpuDetected)], (BS3CPU_F_CPUID >> 8)
+        jz      .supplemented_control_registers
+%endif
+        mov     sAX, cr4
+        mov     [pRegCtx + BS3REGCTX.cr4], sAX
+        and     byte [pRegCtx + BS3REGCTX.fbFlags], ~BS3REG_CTX_F_NO_CR4
+.supplemented_control_registers:
+
+        ; Supply tr and ldtr if necessary
+        test    byte [pRegCtx + BS3REGCTX.fbFlags], BS3REG_CTX_F_NO_TR_LDTR
+        jz      .done_supplementing
+        str     [pRegCtx + BS3REGCTX.tr]
+        sldt    [pRegCtx + BS3REGCTX.ldtr]
+        and     byte [pRegCtx + BS3REGCTX.fbFlags], ~BS3REG_CTX_F_NO_TR_LDTR
+
+.done_supplementing:
+TONLY16 pop     bx
+TONLY16 pop     es
+        jmp     .return
+%undef pRegCtx
+BS3_PROC_END_CMN   Bs3RegCtxSaveEx
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
index c5a8c77..730a7bd 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFar32ToFlat32.c
@@ -30,7 +30,7 @@
 #undef Bs3SelFar32ToFlat32
 BS3_CMN_DEF(uint32_t, Bs3SelFar32ToFlat32,(uint32_t off, uint16_t uSel))
 {
-    if (g_bBs3CurrentMode == BS3_MODE_RM)
+    if (BS3_MODE_IS_RM_OR_V86(g_bBs3CurrentMode))
         return ((uint32_t)uSel << 4) + off;
     return Bs3SelProtFar32ToFlat32(off, uSel);
 }
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFlatDataToProtFar16.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFlatDataToProtFar16.asm
index 15d6c9b..1ee09b8 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFlatDataToProtFar16.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFlatDataToProtFar16.asm
@@ -65,7 +65,8 @@ BS3_PROC_BEGIN_CMN Bs3SelFlatDataToProtFar16, BS3_PBC_NEAR      ; Far stub gener
         jnz     .not_stack
         mov     dx, BS3_SEL_R0_SS16
 %else
-        mov     eax, [xBP + xCB + cbCurRetAddr]
+TNOT64  mov     eax, [xBP + xCB + cbCurRetAddr]
+TONLY64 mov     eax, ecx
         test    eax, 0ffff0000h
         jnz     .not_stack
         or      eax, BS3_SEL_R0_SS16 << 16
@@ -110,11 +111,8 @@ BS3_PROC_BEGIN_CMN Bs3SelFlatDataToProtFar16, BS3_PBC_NEAR      ; Far stub gener
 
 %else
         ; Convert upper 16-bit to tiled selector.
- %if TMPL_BITS == 32
-        mov     eax, [xBP + xCB + cbCurRetAddr]
- %else
-        mov     rax, rcx
- %endif
+TNOT64  mov     eax, [xBP + xCB + cbCurRetAddr]
+TONLY64 mov     rax, rcx
  %ifdef BS3_STRICT
         cmp     xAX, BS3_SEL_TILED_AREA_SIZE
         jb      .address_ok
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelProtModeCodeToRealMode.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelProtModeCodeToRealMode.asm
index 8660f3d..3866ff5 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelProtModeCodeToRealMode.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelProtModeCodeToRealMode.asm
@@ -83,7 +83,6 @@ AssertCompile(BS3_SEL_RING_SHIFT == 8)
         cmp     ax, BS3_SEL_R0_CS16_CNF_EO & 0xff
         je      .bs3text16
 .panic:
-hlt
         extern  BS3_CMN_NM(Bs3Panic)
         call    BS3_CMN_NM(Bs3Panic)
         jmp     .return
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelSetup16BitCode.c
similarity index 53%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelSetup16BitCode.c
index 8eede3c..4e0adf5 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelSetup16BitCode.c
@@ -1,6 +1,6 @@
-/* $Id: bs3-cmn-RegCtxPrint.c $ */
+/* $Id: bs3-cmn-SelSetup16BitCode.c $ */
 /** @file
- * BS3Kit - Bs3RegCtxPrint
+ * BS3Kit - Bs3SelSetup16BitCode
  */
 
 /*
@@ -28,28 +28,24 @@
 /*********************************************************************************************************************************
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
-#include "bs3kit-template-header.h"
+#include <bs3kit.h>
 
 
-#undef Bs3RegCtxPrint
-BS3_CMN_DEF(void, Bs3RegCtxPrint,(PCBS3REGCTX pRegCtx))
+#undef Bs3SelSetup16BitCode
+BS3_CMN_DEF(void, Bs3SelSetup16BitCode,(X86DESC BS3_FAR *pDesc, uint32_t uBaseAddr, uint8_t bDpl))
 {
-    //if (BS3_MODE_IS_64BIT_CODE(pRegCtx->bMode))
-    //{
-        Bs3TestPrintf("eax=%08RX32 ebx=%08RX32 ecx=%08RX32 edx=%08RX32 esi=%08RX32 edi=%08RX32\n",
-                      pRegCtx->rax.u32, pRegCtx->rbx.u32, pRegCtx->rcx.u32, pRegCtx->rdx.u32, pRegCtx->rsi.u32, pRegCtx->rdi.u32);
-        Bs3TestPrintf("eip=%08RX32 esp=%08RX32 ebp=%08RX32 efl=%08RX32 cr0=%08RX32 cr2=%08RX32\n",
-                      pRegCtx->rip.u32, pRegCtx->rsp.u32, pRegCtx->rbp.u32, pRegCtx->rflags.u32,
-                      pRegCtx->cr0.u32, pRegCtx->cr2.u32);
-        Bs3TestPrintf("cs=%04RX16   ds=%04RX16 es=%04RX16 fs=%04RX16 gs=%04RX16   ss=%04RX16 cr3=%08RX32 cr4=%08RX32\n",
-                      pRegCtx->cs, pRegCtx->ds, pRegCtx->es, pRegCtx->fs, pRegCtx->gs, pRegCtx->ss,
-                      pRegCtx->cr3.u32, pRegCtx->cr4.u32);
-        Bs3TestPrintf("tr=%04RX16 ldtr=%04RX16 cpl=%d   mode=%#x fbFlags=%#x\n",
-                      pRegCtx->tr, pRegCtx->ldtr, pRegCtx->bCpl, pRegCtx->bMode, pRegCtx->fbFlags);
-    //}
-    //else
-    //{
-    //
-    //}
+    pDesc->Gen.u16LimitLow = UINT16_C(0xffff);
+    pDesc->Gen.u16BaseLow  = (uint16_t)uBaseAddr;
+    pDesc->Gen.u8BaseHigh1 = (uint8_t)(uBaseAddr >> 16);
+    pDesc->Gen.u4Type      = X86_SEL_TYPE_ER_ACC;
+    pDesc->Gen.u1DescType  = 1; /* data/code */
+    pDesc->Gen.u2Dpl       = bDpl & 3;
+    pDesc->Gen.u1Present   = 1;
+    pDesc->Gen.u4LimitHigh = 0;
+    pDesc->Gen.u1Available = 0;
+    pDesc->Gen.u1Long      = 0;
+    pDesc->Gen.u1DefBig    = 0;
+    pDesc->Gen.u1Granularity = 0;
+    pDesc->Gen.u8BaseHigh2 = (uint8_t)(uBaseAddr >> 24);
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelSetup16BitData.c
similarity index 53%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelSetup16BitData.c
index 8eede3c..2659a93 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxPrint.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelSetup16BitData.c
@@ -1,6 +1,6 @@
-/* $Id: bs3-cmn-RegCtxPrint.c $ */
+/* $Id: bs3-cmn-SelSetup16BitData.c $ */
 /** @file
- * BS3Kit - Bs3RegCtxPrint
+ * BS3Kit - Bs3SelSetup16BitData
  */
 
 /*
@@ -28,28 +28,24 @@
 /*********************************************************************************************************************************
 *   Header Files                                                                                                                 *
 *********************************************************************************************************************************/
-#include "bs3kit-template-header.h"
+#include <bs3kit.h>
 
 
-#undef Bs3RegCtxPrint
-BS3_CMN_DEF(void, Bs3RegCtxPrint,(PCBS3REGCTX pRegCtx))
+#undef Bs3SelSetup16BitData
+BS3_CMN_DEF(void, Bs3SelSetup16BitData,(X86DESC BS3_FAR *pDesc, uint32_t uBaseAddr))
 {
-    //if (BS3_MODE_IS_64BIT_CODE(pRegCtx->bMode))
-    //{
-        Bs3TestPrintf("eax=%08RX32 ebx=%08RX32 ecx=%08RX32 edx=%08RX32 esi=%08RX32 edi=%08RX32\n",
-                      pRegCtx->rax.u32, pRegCtx->rbx.u32, pRegCtx->rcx.u32, pRegCtx->rdx.u32, pRegCtx->rsi.u32, pRegCtx->rdi.u32);
-        Bs3TestPrintf("eip=%08RX32 esp=%08RX32 ebp=%08RX32 efl=%08RX32 cr0=%08RX32 cr2=%08RX32\n",
-                      pRegCtx->rip.u32, pRegCtx->rsp.u32, pRegCtx->rbp.u32, pRegCtx->rflags.u32,
-                      pRegCtx->cr0.u32, pRegCtx->cr2.u32);
-        Bs3TestPrintf("cs=%04RX16   ds=%04RX16 es=%04RX16 fs=%04RX16 gs=%04RX16   ss=%04RX16 cr3=%08RX32 cr4=%08RX32\n",
-                      pRegCtx->cs, pRegCtx->ds, pRegCtx->es, pRegCtx->fs, pRegCtx->gs, pRegCtx->ss,
-                      pRegCtx->cr3.u32, pRegCtx->cr4.u32);
-        Bs3TestPrintf("tr=%04RX16 ldtr=%04RX16 cpl=%d   mode=%#x fbFlags=%#x\n",
-                      pRegCtx->tr, pRegCtx->ldtr, pRegCtx->bCpl, pRegCtx->bMode, pRegCtx->fbFlags);
-    //}
-    //else
-    //{
-    //
-    //}
+    pDesc->Gen.u16LimitLow = UINT16_C(0xffff);
+    pDesc->Gen.u16BaseLow  = (uint16_t)uBaseAddr;
+    pDesc->Gen.u8BaseHigh1 = (uint8_t)(uBaseAddr >> 16);
+    pDesc->Gen.u4Type      = X86_SEL_TYPE_RW_ACC;
+    pDesc->Gen.u1DescType  = 1; /* data/code */
+    pDesc->Gen.u2Dpl       = 3;
+    pDesc->Gen.u1Present   = 1;
+    pDesc->Gen.u4LimitHigh = 0;
+    pDesc->Gen.u1Available = 0;
+    pDesc->Gen.u1Long      = 0;
+    pDesc->Gen.u1DefBig    = 0;
+    pDesc->Gen.u1Granularity = 0;
+    pDesc->Gen.u8BaseHigh2 = (uint8_t)(uBaseAddr >> 24);
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-StrFormatV.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-StrFormatV.c
index 690ed6e..1441f38 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-StrFormatV.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-StrFormatV.c
@@ -214,11 +214,13 @@ static size_t bs3StrFormatU64(PBS3FMTSTATE pState, uint64_t uValue)
 {
 #if ARCH_BITS != 64
     /* Avoid 64-bit division by formatting 64-bit numbers as hex if they're higher than _4G. */
-    if (   pState->uBase == 10
-        && !(uValue >> 32)) /* uValue <= UINT32_MAX does not work, trouble with 64-bit compile time math! */
-        return bs3StrFormatU32(pState, uValue);
-    pState->fFlags |= STR_F_SPECIAL;
-    pState->uBase = 16;
+    if (pState->uBase == 10)
+    {
+        if (!(uValue >> 32)) /* uValue <= UINT32_MAX does not work, trouble with 64-bit compile time math! */
+            return bs3StrFormatU32(pState, uValue);
+        pState->fFlags |= STR_F_SPECIAL;
+        pState->uBase   = 16;
+    }
 #endif
 
     {
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SwitchTo16Bit.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SwitchTo16Bit.asm
index 4cf4b33..4fa419a 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SwitchTo16Bit.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SwitchTo16Bit.asm
@@ -74,7 +74,7 @@ BS3_PROC_BEGIN_CMN Bs3SwitchTo16Bit, BS3_PBC_NEAR
         xRETF
 
 BS3_BEGIN_TEXT16
-.sixteen_bit:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit
 
         ; Load 16-bit segment registers.
         add     ax, BS3_SEL_R0_SS16 - BS3_SEL_R0_CS16
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SwitchTo32Bit.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SwitchTo32Bit.asm
index 5f23160..39526c2 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SwitchTo32Bit.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SwitchTo32Bit.asm
@@ -143,7 +143,8 @@ BS3_SET_BITS 32
  %endif
         popfd
         pop     eax
-        ret
+TONLY64 ret     4
+TNOT64  ret
 %endif
 BS3_PROC_END_CMN   Bs3SwitchTo32Bit
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestDoModesByOneHlp.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestDoModesByOneHlp.asm
index aaa8fb0..55d84be 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestDoModesByOneHlp.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestDoModesByOneHlp.asm
@@ -86,7 +86,7 @@ BS3_PROC_BEGIN _Bs3TestCallDoerTo16_c32
         push    ax                                                  ; Worker bMode argument.
 
         ; Assuming real mode far pointer, convert protected mode before calling it.
-        push    word [BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent) + 2]
+        push    word [2 + BS3_DATA16_WRT(BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent))]
         call    _Bs3SelRealModeCodeToProtMode_c16
         add     sp, 2
 
@@ -94,7 +94,7 @@ BS3_PROC_BEGIN _Bs3TestCallDoerTo16_c32
         push    word .return                                        ; return address
 
         push    ax                                                  ; call converted selector
-        push    word [BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent)]  ; call offset
+        push    word [BS3_DATA16_WRT(BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent))]  ; call offset
         retf
 
 .return:
@@ -129,14 +129,14 @@ BS3_PROC_BEGIN _Bs3TestCallDoerTo16_c64
         push    ax                                                  ; Worker bMode argument.
 
         ; Assuming real mode far pointer, convert protected mode before calling it.
-        push    word [BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent) + 2]
+        push    word [2 + BS3_DATA16_WRT(BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent))]
         call    _Bs3SelRealModeCodeToProtMode_c16
         add     sp, 2
 
         push    cs                                                  ; return selector
         push    word .return                                        ; return address
         push    ax                                                  ; call converted selector
-        push    word [BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent)]  ; call offset
+        push    word [BS3_DATA16_WRT(BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent))] ; call offset
         retf
 
 .return:
@@ -156,7 +156,7 @@ BS3_PROC_END   _Bs3TestCallDoerTo16_c64
         ;
 
 BS3_BEGIN_TEXT16
-BS3_SET_BITS 32
+BS3_SET_BITS 16
 BS3_PROC_BEGIN _Bs3TestCallDoerTo32_f16
         push    xBP
         mov     xBP, xSP
@@ -179,7 +179,7 @@ BS3_PROC_BEGIN _Bs3TestCallDoerTo32_f16
         test    al, BS3_MODE_CODE_V86
         jnz     .return_to_v86          ; Need to figure this while we still have the mode value.
 
-        call    [BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent)]
+        call    [BS3_DATA16_WRT(BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent))]
 
         ; Switch back to 16-bit mode.
         extern  _Bs3SwitchTo16Bit_c32
@@ -191,7 +191,7 @@ BS3_PROC_BEGIN _Bs3TestCallDoerTo32_f16
 
         BS3_SET_BITS 32
 .return_to_v86:
-        call    [BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent)]
+        call    [BS3_DATA16_WRT(BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent))]
 
         ; Switch back to v8086 mode.
         extern  _Bs3SwitchTo16BitV86_c32
@@ -221,7 +221,7 @@ BS3_PROC_BEGIN _Bs3TestCallDoerTo32_c64
         BS3_SET_BITS 32
 
         push    eax                     ; Worker bMode argument.
-        call    [BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent)]
+        call    [BS3_DATA16_WRT(BS3_CMN_NM(g_pfnBs3TestDoModesByOneCurrent))]
 
         ; Switch back to 64-bit mode.
         extern  _Bs3SwitchTo64Bit_c32
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestFailed.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestFailed.c
index 3da2fd4..fcb0ee9 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestFailed.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestFailed.c
@@ -34,7 +34,7 @@
 
 
 /**
- * @impl_callback_method{FNBS3STRFORMATOUTPUT,
+ * @callback_method_impl{FNBS3STRFORMATOUTPUT,
  *      Used by Bs3TestFailedV and Bs3TestSkippedV.}
  */
 BS3_DECL_CALLBACK(size_t) bs3TestFailedStrOutput(char ch, void BS3_FAR *pvUser)
@@ -95,7 +95,7 @@ BS3_DECL_CALLBACK(size_t) bs3TestFailedStrOutput(char ch, void BS3_FAR *pvUser)
  * Equivalent to RTTestIFailedV.
  */
 #undef Bs3TestFailedV
-BS3_CMN_DEF(void, Bs3TestFailedV,(const char *pszFormat, va_list va))
+BS3_CMN_DEF(bool, Bs3TestFailedV,(const char *pszFormat, va_list va))
 {
     BS3TESTFAILEDBUF Buf;
 
@@ -112,6 +112,7 @@ BS3_CMN_DEF(void, Bs3TestFailedV,(const char *pszFormat, va_list va))
     Buf.fNewLine = false;
     Buf.cchBuf   = 0;
     Bs3StrFormatV(pszFormat, va, bs3TestFailedStrOutput, &Buf);
+    return false;
 }
 
 
@@ -119,12 +120,13 @@ BS3_CMN_DEF(void, Bs3TestFailedV,(const char *pszFormat, va_list va))
  * Equivalent to RTTestIFailedF.
  */
 #undef Bs3TestFailedF
-BS3_CMN_DEF(void, Bs3TestFailedF,(const char *pszFormat, ...))
+BS3_CMN_DEF(bool, Bs3TestFailedF,(const char *pszFormat, ...))
 {
     va_list va;
     va_start(va, pszFormat);
     BS3_CMN_NM(Bs3TestFailedV)(pszFormat, va);
     va_end(va);
+    return false;
 }
 
 
@@ -132,8 +134,8 @@ BS3_CMN_DEF(void, Bs3TestFailedF,(const char *pszFormat, ...))
  * Equivalent to RTTestIFailed.
  */
 #undef Bs3TestFailed
-BS3_CMN_DEF(void, Bs3TestFailed,(const char *pszMessage))
+BS3_CMN_DEF(bool, Bs3TestFailed,(const char *pszMessage))
 {
-    BS3_CMN_NM(Bs3TestFailedF)("%s", pszMessage);
+    return BS3_CMN_NM(Bs3TestFailedF)("%s", pszMessage);
 }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestPrintf.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestPrintf.c
index 15e414f..121e4a1 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestPrintf.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestPrintf.c
@@ -58,7 +58,7 @@ typedef struct BS3TESTPRINTBUF
 
 
 /**
- * @impl_callback_method{FNBS3STRFORMATOUTPUT, Prints to screen and VMMDev}
+ * @callback_method_impl{FNBS3STRFORMATOUTPUT, Prints to screen and VMMDev}
  */
 static BS3_DECL_CALLBACK(size_t) bs3TestPrintfStrOutput(char ch, void BS3_FAR *pvUser)
 {
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestSub.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestSub.c
index 8c62e17..3b271b2 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestSub.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TestSub.c
@@ -65,6 +65,9 @@ BS3_CMN_DEF(void, Bs3TestSubV,(const char *pszFormat, va_list va))
        Bs3PrintChr(' ');
     while (cch++ < 49);
     Bs3PrintStr(" TESTING\n");
+
+    /* The sub-test result is not yet reported. */
+    g_fbBs3SubTestReported = false;
 }
 
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TrapDefaultHandler.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TrapDefaultHandler.c
index 08adc13..1cf1392 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TrapDefaultHandler.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-TrapDefaultHandler.c
@@ -225,6 +225,7 @@ BS3_CMN_DEF(void, Bs3TrapDefaultHandler,(PBS3TRAPFRAME pTrapFrame))
     /*
      * Fatal.
      */
+    Bs3TestPrintf("*** GURU ***\n");
     Bs3TrapPrintFrame(pTrapFrame);
     Bs3Panic();
 }
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-paging.h b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-paging.h
index 8c1e0b9..6e84d94 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-paging.h
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-paging.h
@@ -44,9 +44,9 @@ extern uint32_t g_PhysPagingRootLM;
 
 #undef bs3PagingGetLegacyPte
 BS3_CMN_PROTO_STUB(X86PTE BS3_FAR *, bs3PagingGetLegacyPte,(RTCCUINTXREG cr3, uint32_t uFlat, bool fUseInvlPg, int *prc));
-#undef bs3PagingGetPte
-BS3_CMN_PROTO_STUB(X86PTEPAE BS3_FAR *, bs3PagingGetPte,(RTCCUINTXREG cr3, uint8_t bMode, uint64_t uFlat,
-                                                         bool fUseInvlPg, int *prc));
+#undef bs3PagingGetPaePte
+BS3_CMN_PROTO_STUB(X86PTEPAE BS3_FAR *, bs3PagingGetPaePte,(RTCCUINTXREG cr3, uint8_t bMode, uint64_t uFlat,
+                                                            bool fUseInvlPg, int *prc));
 
 RT_C_DECLS_END
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-test.h b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-test.h
index 5099161..f3563e2 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-test.h
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-test.h
@@ -137,7 +137,7 @@ BS3_DECL(bool) bs3TestIsVmmDevTestingPresent(void);
 BS3_DECL(void) bs3TestSubCleanup(void);
 
 /**
- * @impl_callback_method{FNBS3STRFORMATOUTPUT,
+ * @callback_method_impl{FNBS3STRFORMATOUTPUT,
  *      Used by Bs3TestFailedV and Bs3TestSkippedV.
  *
  *      The @a pvUser parameter must point a BS3TESTFAILEDBUF structure. }
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-first-common.mac b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-first-common.mac
index 3fb64ef..cbee35a 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-first-common.mac
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-first-common.mac
@@ -96,7 +96,7 @@ BS3_GLOBAL_DATA Bs3Data16_Size, 4
 BS3_GLOBAL_DATA Bs3Data16Thru64Text32And64_TotalSize, 4
     dd  BS3_DATA_NM(Bs3Data64_EndOfSegment) wrt BS3KIT_GRPNM_DATA16
 BS3_GLOBAL_DATA Bs3TotalImageSize, 4
-    dd  BS3_DATA_NM(Bs3Data64_EndOfSegment) wrt CGROUP16
+    dd  BS3_DATA_NM(Bs3Text64_EndOfSegment) wrt CGROUP16 ; ASSUMES TEXT64 is last.
 
 BS3_GLOBAL_DATA Bs3Text16_Size, 2
     dw  BS3_DATA_NM(Bs3Text16_EndOfSegment) wrt CGROUP16
@@ -158,7 +158,7 @@ GROUP BS3KIT_GRPNM_DATA16 \
 ;
 section BS3RMTEXT16_START   align=16 CLASS=BS3CLASS16RMCODE PUBLIC USE16
 BS3_GLOBAL_DATA Bs3RmText16_StartOfSegment, 0
-    db      10,13,'eye-catcher: BS3RMTEXT16',10,13
+    ;db      10,13,'eye-catcher: BS3RMTEXT16',10,13 - messes up switch in C code. Alt. is fConvertFixupp VBoxBs3ObjConverter.cpp.
 BS3_BEGIN_RMTEXT16
 section BS3RMTEXT16_END   align=1 CLASS=BS3CLASS16RMCODE PUBLIC USE16
 BS3_GLOBAL_DATA Bs3RmText16_EndOfSegment, 0
@@ -170,7 +170,7 @@ GROUP BS3GROUPRMTEXT16 BS3RMTEXT16_START BS3RMTEXT16 BS3RMTEXT16_END
 ;
 section BS3X0TEXT16_START   align=16 CLASS=BS3CLASS16X0CODE PUBLIC USE16
 BS3_GLOBAL_DATA Bs3X0Text16_StartOfSegment, 0
-    db      10,13,'eye-catcher: BS3X0TEXT16',10,13
+    ;db      10,13,'eye-catcher: BS3X0TEXT16',10,13 - messes up switch in C code. Alt. is fConvertFixupp VBoxBs3ObjConverter.cpp.
 BS3_BEGIN_X0TEXT16 4
 section BS3X0TEXT16_END   align=16 CLASS=BS3CLASS16X0CODE PUBLIC USE16
 BS3_GLOBAL_DATA Bs3X0Text16_EndOfSegment, 0
@@ -182,7 +182,7 @@ GROUP BS3GROUPX0TEXT16 BS3X0TEXT16_START BS3X0TEXT16 BS3X0TEXT16_END
 ;
 section BS3X1TEXT16_START   align=16 CLASS=BS3CLASS16X1CODE PUBLIC USE16
 BS3_GLOBAL_DATA Bs3X1Text16_StartOfSegment, 0
-    db      10,13,'eye-catcher: BS3X1TEXT16',10,13
+    ;db      10,13,'eye-catcher: BS3X1TEXT16',10,13 - messes up switch in C code. Alt. is fConvertFixupp VBoxBs3ObjConverter.cpp.
 BS3_BEGIN_X1TEXT16 4
 section BS3X1TEXT16_END   align=16 CLASS=BS3CLASS16X1CODE PUBLIC USE16
 BS3_GLOBAL_DATA Bs3X1Text16_EndOfSegment, 0
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-CpuDetect.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-CpuDetect.asm
index fff5447..e4736fa 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-CpuDetect.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-CpuDetect.asm
@@ -293,11 +293,16 @@ CPU 586
         pop     xAX                     ; restore PAE+PProOrNewer
         test    edx, X86_CPUID_EXT_FEATURE_EDX_LONG_MODE
         jz      .no_long_mode
-        or      ax, BS3CPU_F_CPUID_EXT_LEAVES | BS3CPU_F_LONG_MODE
-        jmp     .return
+        or      ah, ((BS3CPU_F_CPUID_EXT_LEAVES | BS3CPU_F_LONG_MODE) >> 8)
+        jmp     .no_check_for_nx
 .no_long_mode:
-        or      ax, BS3CPU_F_CPUID_EXT_LEAVES
+        or      ah, (BS3CPU_F_CPUID_EXT_LEAVES >> 8)
+.no_check_for_nx:
+        test    edx, X86_CPUID_EXT_FEATURE_EDX_NX
+        jz      .return
+        or      ax, BS3CPU_F_NX
         jmp     .return
+
 .no_ext_leaves:
         pop     xAX                     ; restore PAE+PProOrNewer
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-NameShortLower.asm
similarity index 60%
copy from src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-NameShortLower.asm
index d9ea987..a2a7f1b 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-NameShortLower.asm
@@ -1,6 +1,6 @@
-; $Id: bs3-cpu-decoding-1-asm.asm $
+; $Id: bs3-mode-NameShortLower.asm $
 ;; @file
-; BS3Kit - bs3-cpu-decoding-1, assembly helpers and template instantiation.
+; BS3Kit - g_szBs3ModeName_xxx
 ;
 
 ;
@@ -24,15 +24,13 @@
 ; terms and conditions of either the GPL or the CDDL or both.
 ;
 
+%include "bs3kit-template-header.mac"
 
-;*********************************************************************************************************************************
-;*  Header Files                                                                                                                 *
-;*********************************************************************************************************************************
-%include "bs3kit.mac"
 
-
-;
-; Instantiate code templates.
-;
-BS3_INSTANTIATE_TEMPLATE_ESSENTIALS      "bs3-cpu-decoding-1-template.mac"
+BS3_BEGIN_DATA16
+%undef  MY_MODE_NAME_STR
+%defstr MY_MODE_NAME_STR TMPL_MODE_LNAME
+BS3_GLOBAL_NAME_EX RT_CONCAT3(g_szBs3ModeNameShortLower, _, TMPL_MODE_LNAME), , %strlen(MY_MODE_NAME_STR)
+BS3_GLOBAL_NAME_EX RT_CONCAT3(_g_szBs3ModeNameShortLower, _, TMPL_MODE_LNAME), , %strlen(MY_MODE_NAME_STR)
+        db MY_MODE_NAME_STR, 0
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchTo32BitAndCallC.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchTo32BitAndCallC.asm
new file mode 100644
index 0000000..825dc8e
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchTo32BitAndCallC.asm
@@ -0,0 +1,154 @@
+; $Id: bs3-mode-SwitchTo32BitAndCallC.asm $
+;; @file
+; BS3Kit - bs3SwitchTo32BitAndCallC
+;
+
+;
+; Copyright (C) 2007-2016 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+; VirtualBox OSE distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+
+;*********************************************************************************************************************************
+;*  Header Files                                                                                                                 *
+;*********************************************************************************************************************************
+%include "bs3kit-template-header.mac"
+
+
+;*********************************************************************************************************************************
+;*  External Symbols                                                                                                             *
+;*********************************************************************************************************************************
+BS3_EXTERN_DATA16   g_bBs3CurrentMode
+TMPL_BEGIN_TEXT
+
+%ifdef BS3_STRICT
+BS3_EXTERN_CMN      Bs3Panic
+%endif
+
+%if BS3_MODE_IS_RM_OR_V86(TMPL_MODE)
+BS3_EXTERN_CMN      Bs3SelRealModeCodeToFlat
+%endif
+
+%if TMPL_MODE == BS3_MODE_RM
+extern              NAME(Bs3SwitchToPE32_rm)
+extern              NAME(Bs3SwitchToRM_pe32)
+%elif !BS3_MODE_IS_32BIT_CODE(TMPL_MODE)
+BS3_EXTERN_CMN      Bs3SwitchTo32Bit
+ %if BS3_MODE_IS_16BIT_CODE_NO_V86(TMPL_MODE)
+extern              _Bs3SwitchTo16Bit_c32
+ %elif BS3_MODE_IS_V86(TMPL_MODE)
+extern              _Bs3SwitchTo16BitV86_c32
+ %elif !BS3_MODE_IS_32BIT_CODE(TMPL_MODE)
+extern              _Bs3SwitchTo64_c32
+ %endif
+%endif
+
+
+
+;;
+; @cproto   BS3_MODE_PROTO_STUB(int, Bs3SwitchTo32BitAndCallC,(PFNBS3FARADDRCONV fpfnCall, unsigned cbParams, ...));
+;
+BS3_PROC_BEGIN_MODE Bs3SwitchTo32BitAndCallC, BS3_PBC_HYBRID
+        BS3_CALL_CONV_PROLOG 4
+TONLY16 inc     xBP
+        push    xBP
+        mov     xBP, xSP
+        push    xSI
+
+        ;
+        ; Push the arguments first.
+        ;
+TONLY16 mov     si,  [xBP + xCB + cbCurRetAddr + sCB]
+TNOT16  mov     esi, [xBP + xCB + cbCurRetAddr + sCB]
+%ifdef BS3_STRICT
+        test    xSI, 3
+        jz      .cbParams_ok
+        call    Bs3Panic
+.cbParams_ok:
+        cmp     byte [BS3_DATA16_WRT(g_bBs3CurrentMode)], TMPL_MODE
+        je      .mode_ok
+        call    Bs3Panic
+.mode_ok:
+%endif
+        add     xSI, sCB - 1            ; round it up to nearest push size / dword.
+        and     xSI, ~(sCB - 1)
+        jz      .done_pushing           ; skip if zero
+.push_more:
+        push    xPRE [xBP + xCB + cbCurRetAddr + sCB + xCB + xSI - xCB]
+        sub     xSI, xCB
+        jnz     .push_more
+        mov     xSI, xAX                ; restore xSI
+.done_pushing:
+
+        ;
+        ; Load fpfnCall into eax.
+        ;
+%if BS3_MODE_IS_RM_OR_V86(TMPL_MODE)
+        push    sPRE [xBP + xCB + cbCurRetAddr]
+        BS3_CALL Bs3SelRealModeCodeToFlat, 1
+        add     xSP, sCB
+        rol     eax, 16
+        mov     ax, dx
+        rol     eax, 16
+%else
+        mov     eax, [xBP + xCB + cbCurRetAddr]
+%endif
+
+        ;
+        ; Switch to 32-bit mode, if this is real mode pick PE32.
+        ;
+%if TMPL_MODE == BS3_MODE_RM
+        call    NAME(Bs3SwitchToPE32_rm)
+        BS3_SET_BITS 32
+%elif !BS3_MODE_IS_32BIT_CODE(TMPL_MODE)
+        call    Bs3SwitchTo32Bit
+        BS3_SET_BITS 32
+%endif
+
+        ;
+        ; Make the call.
+        ;
+        call    eax
+
+        ;
+        ; Return, preserving xAX.
+        ;
+%if BS3_MODE_IS_RM_OR_V86(TMPL_MODE)
+        mov     edx, eax
+        shr     edx, 16
+%endif
+%if TMPL_MODE == BS3_MODE_RM
+        call    NAME(Bs3SwitchToRM_pe32)
+%elif BS3_MODE_IS_16BIT_CODE_NO_V86(TMPL_MODE)
+        call    _Bs3SwitchTo16Bit_c32
+%elif BS3_MODE_IS_V86(TMPL_MODE)
+        call    _Bs3SwitchTo16BitV86_c32
+%elif !BS3_MODE_IS_32BIT_CODE(TMPL_MODE)
+        call    _Bs3SwitchTo64_c32
+%endif
+        BS3_SET_BITS TMPL_BITS
+
+        ; Epilog.
+        lea     xSP, [xBP - xCB]
+        pop     xSI
+        pop     xBP
+TONLY16 dec     xBP
+        BS3_CALL_CONV_EPILOG 4
+        BS3_HYBRID_RET
+BS3_PROC_END_MODE   Bs3SwitchTo32BitAndCallC
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToLM32.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToLM32.asm
index 0fcb910..f981a35 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToLM32.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToLM32.asm
@@ -78,7 +78,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToLM32, BS3_PBC_NEAR
  %if TMPL_BITS == 16
         jmp     .thirty_two_bit_segment
 BS3_BEGIN_TEXT32
-.thirty_two_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .thirty_two_bit_segment
  %endif
 
         push    eax
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16.asm
index 737fc1e..6830b0a 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16.asm
@@ -24,16 +24,25 @@
 ; terms and conditions of either the GPL or the CDDL or both.
 ;
 
+;*********************************************************************************************************************************
+;*  Header Files                                                                                                                 *
+;*********************************************************************************************************************************
 %include "bs3kit-template-header.mac"
 
+
+;*********************************************************************************************************************************
+;*  External Symbols                                                                                                             *
+;*********************************************************************************************************************************
 %ifndef TMPL_PAE16
+BS3_BEGIN_TEXT16
 extern  NAME(Bs3EnteredMode_pae16)
  %ifdef TMPL_PAE32
  BS3_EXTERN_CMN Bs3SwitchTo16Bit
- %else
  %endif
+TMPL_BEGIN_TEXT
 %endif
 
+
 ;;
 ; Switch to 16-bit paged protected mode from any other mode.
 ;
@@ -74,14 +83,14 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPAE16, BS3_PBC_NEAR
         ; Switch to 16-bit text segment and prepare for returning in 16-bit mode.
         ;
  %if TMPL_BITS != 16
-        shl     xPRE [xSP + xCB], TMPL_BITS - 16    ; Adjust the return address.
+        shl     xPRE [xSP], TMPL_BITS - 16  ; Adjust the return address.
         add     xSP, xCB - 2
 
         ; Must be in 16-bit segment when calling Bs3SwitchToRM and Bs3SwitchTo16Bit.
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
 
  %ifdef TMPL_PAE32
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16_32.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16_32.asm
index c5dae1a..65134c2 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16_32.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16_32.asm
@@ -52,7 +52,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPAE16_32, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
         extern  TMPL_NM(Bs3SwitchToPAE16)
         call    TMPL_NM(Bs3SwitchToPAE16)
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16_V86.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16_V86.asm
index 87f9b89..e7311fb 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16_V86.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE16_V86.asm
@@ -56,7 +56,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPAE16_V86, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
 
         ;
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE32.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE32.asm
index 4ab5a3d..670011f 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE32.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE32.asm
@@ -61,16 +61,18 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPAE32, BS3_PBC_NEAR
         ;
  %if TMPL_BITS != 32
   %if TMPL_BITS > 32
-        shl     xPRE [xSP + xCB], 32    ; Adjust the return address from 64-bit to 32-bit.
+        shl     xPRE [xSP], 32          ; Adjust the return address from 64-bit to 32-bit.
         add     rsp, xCB - 4
   %else
         push    word 0                  ; Reserve space to expand the return address.
   %endif
+ %endif
+ %if TMPL_BITS != 16
         ; Must be in 16-bit segment when calling Bs3SwitchTo16Bit.
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
 
         ;
@@ -120,7 +122,7 @@ BS3_BEGIN_TEXT16
         mov     cr0, eax
         jmp     BS3_SEL_R0_CS32:dword .thirty_two_bit wrt FLAT
 BS3_BEGIN_TEXT32
-.thirty_two_bit:
+BS3_GLOBAL_LOCAL_LABEL .thirty_two_bit
 
         ;
         ; Convert the (now) real mode stack pointer to 32-bit flat.
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE32_16.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE32_16.asm
index 507b15a..0a03c79 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE32_16.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPAE32_16.asm
@@ -47,8 +47,8 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPAE32_16, BS3_PBC_NEAR
         ret
 
 %elif TMPL_MODE == BS3_MODE_PAE32
-        extern  BS3_CMN_NM(Bs3SwitchTo32Bit)
-        jmp     BS3_CMN_NM(Bs3SwitchTo32Bit)
+        extern  BS3_CMN_NM(Bs3SwitchTo16Bit)
+        jmp     BS3_CMN_NM(Bs3SwitchTo16Bit)
 
 %else
         ;
@@ -65,7 +65,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPAE32_16, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
         extern  _Bs3SwitchTo16Bit_c32
  %if TMPL_BITS == 32
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16.asm
index 8c403ad..0704518 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16.asm
@@ -67,14 +67,14 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPE16, BS3_PBC_NEAR
         ; Switch to 16-bit mode and prepare for returning in 16-bit mode.
         ;
  %if TMPL_BITS != 16
-        shl     xPRE [xSP + xCB], TMPL_BITS - 16    ; Adjust the return address.
+        shl     xPRE [xSP], TMPL_BITS - 16    ; Adjust the return address.
         add     xSP, xCB - 2
 
         ; Must be in 16-bit segment when calling Bs3SwitchTo16Bit.
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
 
         ;
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16_32.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16_32.asm
index f237033..3e1fa65 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16_32.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16_32.asm
@@ -52,7 +52,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPE16_32, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
         extern  TMPL_NM(Bs3SwitchToPE16)
         call    TMPL_NM(Bs3SwitchToPE16)
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16_V86.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16_V86.asm
index 4e7b8d1..a2699e4 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16_V86.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE16_V86.asm
@@ -56,7 +56,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPE16_V86, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
 
         ;
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE32.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE32.asm
index 55993e3..885f72a 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE32.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE32.asm
@@ -60,18 +60,19 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPE32, BS3_PBC_NEAR
         ;
  %if TMPL_BITS != 32
   %if TMPL_BITS > 32
-        shl     xPRE [xSP + xCB], 32    ; Adjust the return address from 64-bit to 32-bit.
+        shl     xPRE [xSP], 32          ; Adjust the return address from 64-bit to 32-bit.
         add     rsp, xCB - 4
   %else
         push    word 0                  ; Reserve space to expand the return address.
   %endif
+ %endif
+ %if TMPL_BITS != 16
         ; Must be in 16-bit segment when calling Bs3SwitchTo16Bit.
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
-
         ;
         ; Switch to real mode.
         ;
@@ -98,7 +99,7 @@ BS3_BEGIN_TEXT16
         mov     cr0, eax
         jmp     BS3_SEL_R0_CS32:dword .thirty_two_bit wrt FLAT
 BS3_BEGIN_TEXT32
-.thirty_two_bit:
+BS3_GLOBAL_LOCAL_LABEL .thirty_two_bit
 
         ;
         ; Convert the (now) real mode stack pointer to 32-bit flat.
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE32_16.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE32_16.asm
index 7dac2cc..5d48fbe 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE32_16.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPE32_16.asm
@@ -47,8 +47,8 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPE32_16, BS3_PBC_NEAR
         ret
 
 %elif TMPL_MODE == BS3_MODE_PE32
-        extern  BS3_CMN_NM(Bs3SwitchTo32Bit)
-        jmp     BS3_CMN_NM(Bs3SwitchTo32Bit)
+        extern  BS3_CMN_NM(Bs3SwitchTo16Bit)
+        jmp     BS3_CMN_NM(Bs3SwitchTo16Bit)
 
 %else
         ;
@@ -65,7 +65,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPE32_16, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
         extern  _Bs3SwitchTo16Bit_c32
  %if TMPL_BITS == 32
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16.asm
index e6b91d5..34c105a 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16.asm
@@ -24,16 +24,24 @@
 ; terms and conditions of either the GPL or the CDDL or both.
 ;
 
+;*********************************************************************************************************************************
+;*  Header Files                                                                                                                 *
+;*********************************************************************************************************************************
 %include "bs3kit-template-header.mac"
 
+;*********************************************************************************************************************************
+;*  External Symbols                                                                                                             *
+;*********************************************************************************************************************************
 %ifndef TMPL_PP16
+BS3_BEGIN_TEXT16
 extern  NAME(Bs3EnteredMode_pp16)
  %ifdef TMPL_PP32
  BS3_EXTERN_CMN Bs3SwitchTo16Bit
- %else
  %endif
+TMPL_BEGIN_TEXT
 %endif
 
+
 ;;
 ; Switch to 16-bit paged protected mode from any other mode.
 ;
@@ -75,14 +83,14 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPP16, BS3_PBC_NEAR
         ; Switch to 16-bit text segment and prepare for returning in 16-bit mode.
         ;
  %if TMPL_BITS != 16
-        shl     xPRE [xSP + xCB], TMPL_BITS - 16    ; Adjust the return address.
+        shl     xPRE [xSP], TMPL_BITS - 16    ; Adjust the return address.
         add     xSP, xCB - 2
 
         ; Must be in 16-bit segment when calling Bs3SwitchToRM and Bs3SwitchTo16Bit.
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
 
  %ifdef TMPL_PP32
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16_32.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16_32.asm
index aa01943..af634b8 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16_32.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16_32.asm
@@ -52,7 +52,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPP16_32, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
         extern  TMPL_NM(Bs3SwitchToPP16)
         call    TMPL_NM(Bs3SwitchToPP16)
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16_V86.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16_V86.asm
index 7e7a50d..e9de5cf 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16_V86.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP16_V86.asm
@@ -56,7 +56,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPP16_V86, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
 
         ;
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP32.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP32.asm
index 06d01df..b34fb74 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP32.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP32.asm
@@ -61,16 +61,18 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPP32, BS3_PBC_NEAR
         ;
  %if TMPL_BITS != 32
   %if TMPL_BITS > 32
-        shl     xPRE [xSP + xCB], 32    ; Adjust the return address from 64-bit to 32-bit.
+        shl     xPRE [xSP], 32          ; Adjust the return address from 64-bit to 32-bit.
         add     rsp, xCB - 4
   %else
         push    word 0                  ; Reserve space to expand the return address.
   %endif
+ %endif
+ %if TMPL_BITS != 16
         ; Must be in 16-bit segment when calling Bs3SwitchTo16Bit.
         jmp     .sixteen_bit_segment
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
  %endif
 
         ;
@@ -128,7 +130,7 @@ BS3_BEGIN_TEXT16
         mov     cr0, eax
         jmp     BS3_SEL_R0_CS32:dword .thirty_two_bit wrt FLAT
 BS3_BEGIN_TEXT32
-.thirty_two_bit:
+BS3_GLOBAL_LOCAL_LABEL .thirty_two_bit
         ;
         ; Convert the (now) real mode stack pointer to 32-bit flat.
         ;
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP32_16.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP32_16.asm
index 70daaf0..8e76cf6 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP32_16.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToPP32_16.asm
@@ -47,8 +47,8 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToPP32_16, BS3_PBC_NEAR
         ret
 
 %elif TMPL_MODE == BS3_MODE_PP32
-        extern  BS3_CMN_NM(Bs3SwitchTo32Bit)
-        jmp     BS3_CMN_NM(Bs3SwitchTo32Bit)
+        extern  BS3_CMN_NM(Bs3SwitchTo16Bit)
+        jmp     BS3_CMN_NM(Bs3SwitchTo16Bit)
 
 %else
         ;
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToRM.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToRM.asm
index db47075..f31f9ff 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToRM.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-SwitchToRM.asm
@@ -241,7 +241,7 @@ BS3_PROC_BEGIN_MODE Bs3SwitchToRM, BS3_PBC_NEAR
         jmp     .sixteen_bit_segment wrt FLAT
 BS3_BEGIN_TEXT16
         BS3_SET_BITS TMPL_BITS
-.sixteen_bit_segment:
+BS3_GLOBAL_LOCAL_LABEL .sixteen_bit_segment
 
         extern  BS3_CMN_NM(Bs3SwitchTo16Bit)
         call    BS3_CMN_NM(Bs3SwitchTo16Bit)
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.c
index c87006e..f30f7da 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.c
@@ -1,6 +1,6 @@
 /* $Id: bs3-mode-TestDoModes.c $ */
 /** @file
- * BS3Kit - Bs3TestDoModeTests
+ * BS3Kit - Bs3TestDoModes
  */
 
 /*
@@ -36,59 +36,7 @@
 # include "bs3kit-template-header.h"
 # include "bs3-cmn-test.h"
 #endif
-
-
-/*********************************************************************************************************************************
-*   Defined Constants And Macros                                                                                                 *
-*********************************************************************************************************************************/
-/** @def CONV_TO_FLAT
- * Get flat address.  In 16-bit the parameter is a real mode far address, while
- * in 32-bit and 64-bit modes it is already flat.
- */
-/** @def CONV_TO_PROT_FAR16
- * Get a 32-bit value that makes a protected mode far 16:16 address.
- */
-/** @def CONV_TO_RM_FAR16
- * Get a 32-bit value that makes a real mode far 16:16 address.  In 16-bit mode
- * this is already what we've got, except must be converted to uint32_t.
- */
-#if ARCH_BITS == 16
-# define CONV_TO_FLAT(a_fpfn)           (((uint32_t)BS3_FP_SEG(a_fpfn) << 4) + BS3_FP_OFF(a_fpfn))
-# define CONV_TO_PROT_FAR16(a_fpfn)     RT_MAKE_U32(BS3_FP_OFF(a_fpfn), Bs3SelRealModeCodeToProtMode(BS3_FP_SEG(a_fpfn)))
-# define CONV_TO_RM_FAR16(a_fpfn)       RT_MAKE_U32(BS3_FP_OFF(a_fpfn), BS3_FP_SEG(a_fpfn))
-#else
-# define CONV_TO_FLAT(a_fpfn)           ((uint32_t)(uintptr_t)(a_fpfn))
-# define CONV_TO_PROT_FAR16(a_fpfn)     Bs3SelFlatCodeToProtFar16((uint32_t)(uintptr_t)(a_fpfn))
-# define CONV_TO_RM_FAR16(a_fpfn)       Bs3SelFlatCodeToRealMode( (uint32_t)(uintptr_t)(a_fpfn))
-#endif
-
-
-/*********************************************************************************************************************************
-*   Assembly Symbols                                                                                                             *
-*********************************************************************************************************************************/
-/* These are in the same code segment as this code, so no FAR necessary. */
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInRM)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPEV86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPPV86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAEV86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM64)(uint32_t uFlatAddrCallback);
+#include "bs3-mode-TestDoModes.h"
 
 
 
@@ -205,7 +153,9 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         const char *pszFmtStr = "Error #%u (%#x) in %s!\n";
         bool        fSkipped  = true;
         uint8_t     bErrNo;
-        Bs3TestSub(paEntries[i].pszSubTest);
+
+        if (paEntries[i].pszSubTest != NULL)
+            Bs3TestSub(paEntries[i].pszSubTest);
 
 #define PRE_DO_CALL(a_szModeName) do { if (fVerbose) Bs3TestPrintf("...%s\n", a_szModeName); } while (0)
 #define CHECK_RESULT(a_szModeName) \
@@ -252,7 +202,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         if (paEntries[i].pfnDoPE16_32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe16_32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPE16_32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPE16_32), BS3_MODE_PE16_32);
             CHECK_RESULT(g_szBs3ModeName_pe16_32);
         }
 
@@ -266,7 +216,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         if (paEntries[i].pfnDoPE32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(paEntries[i].pfnDoPE32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(paEntries[i].pfnDoPE32), BS3_MODE_PE32);
             CHECK_RESULT(g_szBs3ModeName_pe32);
         }
 
@@ -297,7 +247,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         if (paEntries[i].pfnDoPP16_32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pp16_32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPP16_32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPP16_32), BS3_MODE_PP16_32);
             CHECK_RESULT(g_szBs3ModeName_pp16_32);
         }
 
@@ -311,7 +261,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         if (paEntries[i].pfnDoPP32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pp32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(paEntries[i].pfnDoPP32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(paEntries[i].pfnDoPP32), BS3_MODE_PP32);
             CHECK_RESULT(g_szBs3ModeName_pp32);
         }
 
@@ -349,7 +299,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         if (paEntries[i].pfnDoPAE16_32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pae16_32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE16_32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE16_32), BS3_MODE_PAE16_32);
             CHECK_RESULT(g_szBs3ModeName_pae16_32);
         }
 
@@ -363,7 +313,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         if (paEntries[i].pfnDoPAE32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pae32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE32), BS3_MODE_PAE32);
             CHECK_RESULT(g_szBs3ModeName_pae32);
         }
 
@@ -408,7 +358,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         if (paEntries[i].pfnDoLM64)
         {
             PRE_DO_CALL(g_szBs3ModeName_lm64);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(paEntries[i].pfnDoLM64));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(paEntries[i].pfnDoLM64), BS3_MODE_LM64);
             CHECK_RESULT(g_szBs3ModeName_lm64);
         }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.h b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.h
new file mode 100644
index 0000000..a5883fe
--- /dev/null
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.h
@@ -0,0 +1,86 @@
+/* $Id: bs3-mode-TestDoModes.h $ */
+/** @file
+ * BS3Kit - Common header for the Bs3TestDoModes family.
+ */
+
+/*
+ * Copyright (C) 2007-2017 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+#ifndef ___bs3_mode_TestDoModes_h
+#define ___bs3_mode_TestDoModes_h
+
+#include "bs3kit.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/** @def CONV_TO_FLAT
+ * Get flat address.  In 16-bit the parameter is a real mode far address, while
+ * in 32-bit and 64-bit modes it is already flat.
+ */
+/** @def CONV_TO_PROT_FAR16
+ * Get a 32-bit value that makes a protected mode far 16:16 address.
+ */
+/** @def CONV_TO_RM_FAR16
+ * Get a 32-bit value that makes a real mode far 16:16 address.  In 16-bit mode
+ * this is already what we've got, except must be converted to uint32_t.
+ */
+#if ARCH_BITS == 16
+# define CONV_TO_FLAT(a_fpfn)           (((uint32_t)BS3_FP_SEG(a_fpfn) << 4) + BS3_FP_OFF(a_fpfn))
+# define CONV_TO_PROT_FAR16(a_fpfn)     RT_MAKE_U32(BS3_FP_OFF(a_fpfn), Bs3SelRealModeCodeToProtMode(BS3_FP_SEG(a_fpfn)))
+# define CONV_TO_RM_FAR16(a_fpfn)       RT_MAKE_U32(BS3_FP_OFF(a_fpfn), BS3_FP_SEG(a_fpfn))
+#else
+# define CONV_TO_FLAT(a_fpfn)           ((uint32_t)(uintptr_t)(a_fpfn))
+# define CONV_TO_PROT_FAR16(a_fpfn)     Bs3SelFlatCodeToProtFar16((uint32_t)(uintptr_t)(a_fpfn))
+# define CONV_TO_RM_FAR16(a_fpfn)       Bs3SelFlatCodeToRealMode( (uint32_t)(uintptr_t)(a_fpfn))
+#endif
+
+
+/*********************************************************************************************************************************
+*   Assembly Symbols                                                                                                             *
+*********************************************************************************************************************************/
+/* These are in the same code segment as the main API, so no FAR necessary. */
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInRM)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16_32)(uint32_t uFlatAddrCallback, uint8_t bMode);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16_V86)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE32)(uint32_t uFlatAddrCallback, uint8_t bMode);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE32_16)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPEV86)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16_32)(uint32_t uFlatAddrCallback, uint8_t bMode);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16_V86)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP32)(uint32_t uFlatAddrCallback, uint8_t bMode);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP32_16)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPPV86)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16_32)(uint32_t uFlatAddrCallback, uint8_t bMode);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16_V86)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE32)(uint32_t uFlatAddrCallback, uint8_t bMode);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE32_16)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAEV86)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM16)(uint32_t uCallbackFarPtr);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM32)(uint32_t uFlatAddrCallback);
+BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM64)(uint32_t uFlatAddrCallback, uint8_t bMode);
+
+#endif
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByMax.c
similarity index 55%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.c
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByMax.c
index c87006e..cf4bbd2 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModes.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByMax.c
@@ -1,10 +1,10 @@
-/* $Id: bs3-mode-TestDoModes.c $ */
+/* $Id: bs3-mode-TestDoModesByMax.c $ */
 /** @file
- * BS3Kit - Bs3TestDoModeTests
+ * BS3Kit - Bs3TestDoModesByMax
  */
 
 /*
- * Copyright (C) 2007-2016 Oracle Corporation
+ * Copyright (C) 2007-2017 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
  * available from http://www.virtualbox.org. This file is free software;
@@ -36,59 +36,7 @@
 # include "bs3kit-template-header.h"
 # include "bs3-cmn-test.h"
 #endif
-
-
-/*********************************************************************************************************************************
-*   Defined Constants And Macros                                                                                                 *
-*********************************************************************************************************************************/
-/** @def CONV_TO_FLAT
- * Get flat address.  In 16-bit the parameter is a real mode far address, while
- * in 32-bit and 64-bit modes it is already flat.
- */
-/** @def CONV_TO_PROT_FAR16
- * Get a 32-bit value that makes a protected mode far 16:16 address.
- */
-/** @def CONV_TO_RM_FAR16
- * Get a 32-bit value that makes a real mode far 16:16 address.  In 16-bit mode
- * this is already what we've got, except must be converted to uint32_t.
- */
-#if ARCH_BITS == 16
-# define CONV_TO_FLAT(a_fpfn)           (((uint32_t)BS3_FP_SEG(a_fpfn) << 4) + BS3_FP_OFF(a_fpfn))
-# define CONV_TO_PROT_FAR16(a_fpfn)     RT_MAKE_U32(BS3_FP_OFF(a_fpfn), Bs3SelRealModeCodeToProtMode(BS3_FP_SEG(a_fpfn)))
-# define CONV_TO_RM_FAR16(a_fpfn)       RT_MAKE_U32(BS3_FP_OFF(a_fpfn), BS3_FP_SEG(a_fpfn))
-#else
-# define CONV_TO_FLAT(a_fpfn)           ((uint32_t)(uintptr_t)(a_fpfn))
-# define CONV_TO_PROT_FAR16(a_fpfn)     Bs3SelFlatCodeToProtFar16((uint32_t)(uintptr_t)(a_fpfn))
-# define CONV_TO_RM_FAR16(a_fpfn)       Bs3SelFlatCodeToRealMode( (uint32_t)(uintptr_t)(a_fpfn))
-#endif
-
-
-/*********************************************************************************************************************************
-*   Assembly Symbols                                                                                                             *
-*********************************************************************************************************************************/
-/* These are in the same code segment as this code, so no FAR necessary. */
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInRM)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPEV86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPPV86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAEV86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM64)(uint32_t uFlatAddrCallback);
+#include "bs3-mode-TestDoModes.h"
 
 
 
@@ -103,7 +51,7 @@ BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM64)(uint32_t uFlatAddrCallback
  * @param   fHavePae        Whether the CPU has PAE.
  * @param   fHaveLongMode   Whether the CPU does long mode.
  */
-static void bs3TestWarnAboutSkippedModes(PCBS3TESTMODEENTRY paEntries, unsigned cEntries,
+static void bs3TestWarnAboutSkippedModes(PCBS3TESTMODEBYMAXENTRY paEntries, unsigned cEntries,
                                          uint8_t bCpuType, bool fHavePae, bool fHaveLongMode)
 {
     bool           fComplained286   = false;
@@ -129,17 +77,17 @@ static void bs3TestWarnAboutSkippedModes(PCBS3TESTMODEENTRY paEntries, unsigned
         }
 
         if (   !fComplained386
-            &&  (   paEntries[i].pfnDoPE16_32
-                 || paEntries[i].pfnDoPE16_V86
-                 || paEntries[i].pfnDoPE32
-                 || paEntries[i].pfnDoPE32_16
-                 || paEntries[i].pfnDoPEV86
-                 || paEntries[i].pfnDoPP16
-                 || paEntries[i].pfnDoPP16_32
-                 || paEntries[i].pfnDoPP16_V86
-                 || paEntries[i].pfnDoPP32
-                 || paEntries[i].pfnDoPP32_16
-                 || paEntries[i].pfnDoPPV86) )
+            &&  (   paEntries[i].fDoPE16_32
+                 || paEntries[i].fDoPE16_V86
+                 || paEntries[i].fDoPE32
+                 || paEntries[i].fDoPE32_16
+                 || paEntries[i].fDoPEV86
+                 || paEntries[i].fDoPP16
+                 || paEntries[i].fDoPP16_32
+                 || paEntries[i].fDoPP16_V86
+                 || paEntries[i].fDoPP32
+                 || paEntries[i].fDoPP32_16
+                 || paEntries[i].fDoPPV86) )
         {
             if (bCpuType < BS3CPU_80386)
             {
@@ -150,12 +98,12 @@ static void bs3TestWarnAboutSkippedModes(PCBS3TESTMODEENTRY paEntries, unsigned
         }
 
         if (   !fComplainedPAE
-            &&  (   paEntries[i].pfnDoPAE16
-                 || paEntries[i].pfnDoPAE16_32
-                 || paEntries[i].pfnDoPAE16_V86
-                 || paEntries[i].pfnDoPAE32
-                 || paEntries[i].pfnDoPAE32_16
-                 || paEntries[i].pfnDoPAEV86) )
+            &&  (   paEntries[i].fDoPAE16
+                 || paEntries[i].fDoPAE16_32
+                 || paEntries[i].fDoPAE16_V86
+                 || paEntries[i].fDoPAE32
+                 || paEntries[i].fDoPAE32_16
+                 || paEntries[i].fDoPAEV86) )
         {
             if (!fHavePae)
             {
@@ -166,9 +114,9 @@ static void bs3TestWarnAboutSkippedModes(PCBS3TESTMODEENTRY paEntries, unsigned
         }
 
         if (   !fComplainedAMD64
-            &&  (   paEntries[i].pfnDoLM16
-                 || paEntries[i].pfnDoLM32
-                 || paEntries[i].pfnDoLM64) )
+            &&  (   paEntries[i].fDoLM16
+                 || paEntries[i].fDoLM32
+                 || paEntries[i].fDoLM64) )
         {
             if (!fHaveLongMode)
             {
@@ -180,8 +128,8 @@ static void bs3TestWarnAboutSkippedModes(PCBS3TESTMODEENTRY paEntries, unsigned
     }
 }
 
-#undef Bs3TestDoModes
-BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries))
+#undef Bs3TestDoModesByMax
+BS3_MODE_DEF(void, Bs3TestDoModesByMax,(PCBS3TESTMODEBYMAXENTRY paEntries, size_t cEntries))
 {
     bool const      fVerbose         = true;
     bool const      fDoV86Modes      = true;
@@ -205,7 +153,9 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         const char *pszFmtStr = "Error #%u (%#x) in %s!\n";
         bool        fSkipped  = true;
         uint8_t     bErrNo;
-        Bs3TestSub(paEntries[i].pszSubTest);
+
+        if (paEntries[i].pszSubTest != NULL)
+            Bs3TestSub(paEntries[i].pszSubTest);
 
 #define PRE_DO_CALL(a_szModeName) do { if (fVerbose) Bs3TestPrintf("...%s\n", a_szModeName); } while (0)
 #define CHECK_RESULT(a_szModeName) \
@@ -219,7 +169,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
                 } \
             } while (0)
 
-        if (paEntries[i].pfnDoRM)
+        if (paEntries[i].fDoRM)
         {
             PRE_DO_CALL(g_szBs3ModeName_rm);
             bErrNo = TMPL_NM(Bs3TestCallDoerInRM)(CONV_TO_RM_FAR16(paEntries[i].pfnDoRM));
@@ -236,7 +186,7 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
         /*
          * Unpaged prot mode.
          */
-        if (paEntries[i].pfnDoPE16)
+        if (paEntries[i].fDoPE16)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe16);
             bErrNo = TMPL_NM(Bs3TestCallDoerInPE16)(CONV_TO_PROT_FAR16(paEntries[i].pfnDoPE16));
@@ -249,83 +199,83 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
             continue;
         }
 
-        if (paEntries[i].pfnDoPE16_32)
+        if (paEntries[i].fDoPE16_32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe16_32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPE16_32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPE16_32), BS3_MODE_PE16_32);
             CHECK_RESULT(g_szBs3ModeName_pe16_32);
         }
 
-        if (paEntries[i].pfnDoPE16_V86 && fDoWeirdV86Modes)
+        if (paEntries[i].fDoPE16_V86 && fDoWeirdV86Modes)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe16_v86);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_V86)(CONV_TO_RM_FAR16(paEntries[i].pfnDoPE16_V86));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPE16_32), BS3_MODE_PE16_V86);
             CHECK_RESULT(g_szBs3ModeName_pe16_v86);
         }
 
-        if (paEntries[i].pfnDoPE32)
+        if (paEntries[i].fDoPE32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(paEntries[i].pfnDoPE32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(paEntries[i].pfnDoPE32), BS3_MODE_PE32);
             CHECK_RESULT(g_szBs3ModeName_pe32);
         }
 
-        if (paEntries[i].pfnDoPE32_16)
+        if (paEntries[i].fDoPE32_16)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe32_16);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32_16)(CONV_TO_PROT_FAR16(paEntries[i].pfnDoPE32_16));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(paEntries[i].pfnDoPE32), BS3_MODE_PE32_16);
             CHECK_RESULT(g_szBs3ModeName_pe32_16);
         }
 
-        if (paEntries[i].pfnDoPEV86 && fDoV86Modes)
+        if (paEntries[i].fDoPEV86 && fDoV86Modes)
         {
             PRE_DO_CALL(g_szBs3ModeName_pev86);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPEV86)(CONV_TO_RM_FAR16(paEntries[i].pfnDoPEV86));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(paEntries[i].pfnDoPE32), BS3_MODE_PEV86);
             CHECK_RESULT(g_szBs3ModeName_pev86);
         }
 
         /*
          * Paged protected mode.
          */
-        if (paEntries[i].pfnDoPP16)
+        if (paEntries[i].fDoPP16)
         {
             PRE_DO_CALL(g_szBs3ModeName_pp16);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16)(CONV_TO_PROT_FAR16(paEntries[i].pfnDoPP16));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPP16_32), BS3_MODE_PP16);
             CHECK_RESULT(g_szBs3ModeName_pp16);
         }
 
-        if (paEntries[i].pfnDoPP16_32)
+        if (paEntries[i].fDoPP16_32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pp16_32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPP16_32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPP16_32), BS3_MODE_PP16_32);
             CHECK_RESULT(g_szBs3ModeName_pp16_32);
         }
 
-        if (paEntries[i].pfnDoPP16_V86 && fDoWeirdV86Modes)
+        if (paEntries[i].fDoPP16_V86 && fDoWeirdV86Modes)
         {
             PRE_DO_CALL(g_szBs3ModeName_pp16_v86);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_V86)(CONV_TO_RM_FAR16(paEntries[i].pfnDoPP16_V86));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPP16_32), BS3_MODE_PP16_V86);
             CHECK_RESULT(g_szBs3ModeName_pp16_v86);
         }
 
-        if (paEntries[i].pfnDoPP32)
+        if (paEntries[i].fDoPP32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pp32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(paEntries[i].pfnDoPP32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(paEntries[i].pfnDoPP32), BS3_MODE_PP32);
             CHECK_RESULT(g_szBs3ModeName_pp32);
         }
 
-        if (paEntries[i].pfnDoPP32_16)
+        if (paEntries[i].fDoPP32_16)
         {
             PRE_DO_CALL(g_szBs3ModeName_pp32_16);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32_16)(CONV_TO_PROT_FAR16(paEntries[i].pfnDoPP32_16));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(paEntries[i].pfnDoPP32), BS3_MODE_PP32_16);
             CHECK_RESULT(g_szBs3ModeName_pp32_16);
         }
 
-        if (paEntries[i].pfnDoPPV86 && fDoV86Modes)
+        if (paEntries[i].fDoPPV86 && fDoV86Modes)
         {
             PRE_DO_CALL(g_szBs3ModeName_ppv86);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPPV86)(CONV_TO_RM_FAR16(paEntries[i].pfnDoPPV86));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(paEntries[i].pfnDoPP32), BS3_MODE_PPV86);
             CHECK_RESULT(g_szBs3ModeName_ppv86);
         }
 
@@ -339,45 +289,45 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
             continue;
         }
 
-        if (paEntries[i].pfnDoPAE16)
+        if (paEntries[i].fDoPAE16)
         {
             PRE_DO_CALL(g_szBs3ModeName_pae16);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16)(CONV_TO_PROT_FAR16(paEntries[i].pfnDoPAE16));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE16_32), BS3_MODE_PAE16);
             CHECK_RESULT(g_szBs3ModeName_pae16);
         }
 
-        if (paEntries[i].pfnDoPAE16_32)
+        if (paEntries[i].fDoPAE16_32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pae16_32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE16_32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE16_32), BS3_MODE_PAE16_32);
             CHECK_RESULT(g_szBs3ModeName_pae16_32);
         }
 
-        if (paEntries[i].pfnDoPAE16_V86 && fDoWeirdV86Modes)
+        if (paEntries[i].fDoPAE16_V86 && fDoWeirdV86Modes)
         {
             PRE_DO_CALL(g_szBs3ModeName_pae16_v86);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_V86)(CONV_TO_RM_FAR16(paEntries[i].pfnDoPAE16_V86));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE16_32), BS3_MODE_PAE16_V86);
             CHECK_RESULT(g_szBs3ModeName_pae16_v86);
         }
 
-        if (paEntries[i].pfnDoPAE32)
+        if (paEntries[i].fDoPAE32)
         {
             PRE_DO_CALL(g_szBs3ModeName_pae32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE32), BS3_MODE_PAE32);
             CHECK_RESULT(g_szBs3ModeName_pae32);
         }
 
-        if (paEntries[i].pfnDoPAE32_16)
+        if (paEntries[i].fDoPAE32_16)
         {
             PRE_DO_CALL(g_szBs3ModeName_pae32_16);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32_16)(CONV_TO_PROT_FAR16(paEntries[i].pfnDoPAE32_16));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE32), BS3_MODE_PAE32_16);
             CHECK_RESULT(g_szBs3ModeName_pae32_16);
         }
 
-        if (paEntries[i].pfnDoPAEV86 && fDoV86Modes)
+        if (paEntries[i].fDoPAEV86 && fDoV86Modes)
         {
             PRE_DO_CALL(g_szBs3ModeName_paev86);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAEV86)(CONV_TO_RM_FAR16(paEntries[i].pfnDoPAEV86));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(paEntries[i].pfnDoPAE32), BS3_MODE_PAEV86);
             CHECK_RESULT(g_szBs3ModeName_paev86);
         }
 
@@ -391,24 +341,24 @@ BS3_MODE_DEF(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries
             continue;
         }
 
-        if (paEntries[i].pfnDoLM16)
+        if (paEntries[i].fDoLM16)
         {
             PRE_DO_CALL(g_szBs3ModeName_lm16);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInLM16)(CONV_TO_PROT_FAR16(paEntries[i].pfnDoLM16));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(paEntries[i].pfnDoLM64), BS3_MODE_LM16);
             CHECK_RESULT(g_szBs3ModeName_lm16);
         }
 
-        if (paEntries[i].pfnDoLM32)
+        if (paEntries[i].fDoLM32)
         {
             PRE_DO_CALL(g_szBs3ModeName_lm32);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInLM32)(CONV_TO_FLAT(paEntries[i].pfnDoLM32));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(paEntries[i].pfnDoLM64), BS3_MODE_LM32);
             CHECK_RESULT(g_szBs3ModeName_lm32);
         }
 
-        if (paEntries[i].pfnDoLM64)
+        if (paEntries[i].fDoLM64)
         {
             PRE_DO_CALL(g_szBs3ModeName_lm64);
-            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(paEntries[i].pfnDoLM64));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(paEntries[i].pfnDoLM64), BS3_MODE_LM64);
             CHECK_RESULT(g_szBs3ModeName_lm64);
         }
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByMaxStub.asm
similarity index 74%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByMaxStub.asm
index c3ac8e3..103458e 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByMaxStub.asm
@@ -1,10 +1,10 @@
-; $Id: bs3-mode-TestDoModesByOneStub.asm $
+; $Id: bs3-mode-TestDoModesByMaxStub.asm $
 ;; @file
-; BS3Kit - Bs3TestDoModesByOne near stub.
+; BS3Kit - Bs3TestDoModesByMax near stub.
 ;
 
 ;
-; Copyright (C) 2007-2016 Oracle Corporation
+; Copyright (C) 2007-2017 Oracle Corporation
 ;
 ; This file is part of VirtualBox Open Source Edition (OSE), as
 ; available from http://www.virtualbox.org. This file is free software;
@@ -30,26 +30,24 @@
 %include "bs3kit-template-header.mac"
 
 ;
-; Finally near stub for the API call (16-bit only).
+; Near stub for the API call (16-bit only).
 ;
 %if TMPL_BITS == 16
  %if TMPL_MODE == BS3_MODE_RM
 BS3_BEGIN_RMTEXT16
  %endif
 BS3_BEGIN_TEXT16_NEARSTUBS
-BS3_PROC_BEGIN_MODE Bs3TestDoModesByOne, BS3_PBC_NEAR
+BS3_PROC_BEGIN_MODE Bs3TestDoModesByMax, BS3_PBC_NEAR
         pop     ax
- %if TMPL_MODE == BS3_MODE_RM
         push    cs
         push    ax
-        extern TMPL_FAR_NM(Bs3TestDoModesByOne):wrt BS3GROUPRMTEXT16
-        jmp far TMPL_FAR_NM(Bs3TestDoModesByOne)
+ %if TMPL_MODE == BS3_MODE_RM
+        extern TMPL_FAR_NM(Bs3TestDoModesByMax):wrt BS3GROUPRMTEXT16
+        jmp far TMPL_FAR_NM(Bs3TestDoModesByMax)
  %else
-        push    cs
-        push    ax
-        extern TMPL_FAR_NM(Bs3TestDoModesByOne):wrt CGROUP16
-        jmp     TMPL_NM(Bs3TestDoModesByOne)
+        extern TMPL_FAR_NM(Bs3TestDoModesByMax):wrt CGROUP16
+        jmp     TMPL_NM(Bs3TestDoModesByMax)
  %endif
-BS3_PROC_END_MODE   Bs3TestDoModesByOne
+BS3_PROC_END_MODE   Bs3TestDoModesByMax
 %endif
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOne.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOne.c
index efed6b0..9f86730 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOne.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOne.c
@@ -36,63 +36,12 @@
 # include "bs3kit-template-header.h"
 # include "bs3-cmn-test.h"
 #endif
-
-
-/*********************************************************************************************************************************
-*   Defined Constants And Macros                                                                                                 *
-*********************************************************************************************************************************/
-/** @def CONV_TO_FLAT
- * Get flat address.  In 16-bit the parameter is a real mode far address, while
- * in 32-bit and 64-bit modes it is already flat.
- */
-/** @def CONV_TO_PROT_FAR16
- * Get a 32-bit value that makes a protected mode far 16:16 address.
- */
-/** @def CONV_TO_RM_FAR16
- * Get a 32-bit value that makes a real mode far 16:16 address.  In 16-bit mode
- * this is already what we've got, except must be converted to uint32_t.
- */
-#if ARCH_BITS == 16
-# define CONV_TO_FLAT(a_fpfn)           (((uint32_t)BS3_FP_SEG(a_fpfn) << 4) + BS3_FP_OFF(a_fpfn))
-# define CONV_TO_PROT_FAR16(a_fpfn)     RT_MAKE_U32(BS3_FP_OFF(a_fpfn), Bs3SelRealModeCodeToProtMode(BS3_FP_SEG(a_fpfn)))
-# define CONV_TO_RM_FAR16(a_fpfn)       RT_MAKE_U32(BS3_FP_OFF(a_fpfn), BS3_FP_SEG(a_fpfn))
-#else
-# define CONV_TO_FLAT(a_fpfn)           ((uint32_t)(uintptr_t)(a_fpfn))
-# define CONV_TO_PROT_FAR16(a_fpfn)     Bs3SelFlatCodeToProtFar16((uint32_t)(uintptr_t)(a_fpfn))
-# define CONV_TO_RM_FAR16(a_fpfn)       Bs3SelFlatCodeToRealMode( (uint32_t)(uintptr_t)(a_fpfn))
-#endif
+#include "bs3-mode-TestDoModes.h"
 
 
 /*********************************************************************************************************************************
 *   Assembly Symbols                                                                                                             *
 *********************************************************************************************************************************/
-/* These are in the same code segment as this code, so no FAR necessary. */
-#if ARCH_BITS != 64
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInRM)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPE32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPEV86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPP32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPPV86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16_32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE16_V86)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAE32_16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInPAEV86)(uint32_t uCallbackFarPtr);
-#endif
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM16)(uint32_t uCallbackFarPtr);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM32)(uint32_t uFlatAddrCallback);
-BS3_DECL_NEAR(uint8_t) TMPL_NM(Bs3TestCallDoerInLM64)(uint32_t uFlatAddrCallback);
-
-
 /* Assembly helpers for switching to the work bitcount and calling it. */
 BS3_DECL_FAR(uint8_t) Bs3TestCallDoerTo16_f16(uint8_t bMode);
 BS3_DECL_FAR(uint8_t) Bs3TestCallDoerTo16_c32(uint8_t bMode);
@@ -111,6 +60,7 @@ BS3_DECL_FAR(uint8_t) Bs3TestCallDoerTo64_c64(uint8_t bMode);
 #endif
 extern PFNBS3TESTDOMODE g_pfnBs3TestDoModesByOneCurrent;
 
+#include <iprt/asm-amd64-x86.h>
 
 
 #undef Bs3TestDoModesByOne
@@ -149,8 +99,9 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
      */
     for (i = 0; i < cEntries; i++)
     {
-        const char *pszFmtStr = "Error #%u (%#x) in %s!\n";
-        bool        fSkipped  = true;
+        const char *pszFmtStr   = "Error #%u (%#x) in %s!\n";
+        bool        fSkipped    = true;
+        bool const  fOnlyPaging = RT_BOOL(paEntries[i].fFlags & BS3TESTMODEBYONEENTRY_F_ONLY_PAGING);
         uint8_t     bErrNo;
         Bs3TestSub(paEntries[i].pszSubTest);
 
@@ -171,7 +122,7 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
 #if ARCH_BITS != 64
 
 # if ARCH_BITS == 16
-        if (true)
+        if (!fOnlyPaging)
         {
             PRE_DO_CALL(g_szBs3ModeName_rm);
             bErrNo = TMPL_NM(Bs3TestCallDoerInRM)(CONV_TO_RM_FAR16(paEntries[i].pfnWorker));
@@ -189,7 +140,7 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
         /*
          * Unpaged prot mode.
          */
-        if (true)
+        if (!fOnlyPaging)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe16);
 # if ARCH_BITS == 16
@@ -206,18 +157,18 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
             continue;
         }
 
-        if (true)
+        if (!fOnlyPaging)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe16_32);
 # if ARCH_BITS == 32
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(paEntries[i].pfnWorker));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(paEntries[i].pfnWorker), BS3_MODE_PE16_32);
 # else
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE16_32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)), BS3_MODE_PE16_32);
 # endif
             CHECK_RESULT(g_szBs3ModeName_pe16_32);
         }
 
-        if (fDoWeirdV86Modes)
+        if (fDoWeirdV86Modes && !fOnlyPaging)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe16_v86);
 # if ARCH_BITS == 16
@@ -228,18 +179,18 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
             CHECK_RESULT(g_szBs3ModeName_pe16_v86);
         }
 
-        if (true)
+        if (!fOnlyPaging)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe32);
 # if ARCH_BITS == 32
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(paEntries[i].pfnWorker));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(paEntries[i].pfnWorker), BS3_MODE_PE32);
 # else
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPE32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)), BS3_MODE_PE32);
 # endif
             CHECK_RESULT(g_szBs3ModeName_pe32);
         }
 
-        if (true)
+        if (!fOnlyPaging)
         {
             PRE_DO_CALL(g_szBs3ModeName_pe32_16);
 # if ARCH_BITS == 16
@@ -250,7 +201,7 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
             CHECK_RESULT(g_szBs3ModeName_pe32_16);
         }
 
-        if (fDoV86Modes)
+        if (fDoV86Modes && !fOnlyPaging)
         {
             PRE_DO_CALL(g_szBs3ModeName_pev86);
 # if ARCH_BITS == 16
@@ -279,9 +230,9 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
         {
             PRE_DO_CALL(g_szBs3ModeName_pp16_32);
 # if ARCH_BITS == 32
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(paEntries[i].pfnWorker));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(paEntries[i].pfnWorker), BS3_MODE_PP16_32);
 # else
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP16_32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)), BS3_MODE_PP16_32);
 # endif
             CHECK_RESULT(g_szBs3ModeName_pp16_32);
         }
@@ -301,9 +252,9 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
         {
             PRE_DO_CALL(g_szBs3ModeName_pp32);
 # if ARCH_BITS == 32
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(paEntries[i].pfnWorker));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(paEntries[i].pfnWorker), BS3_MODE_PP32);
 # else
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPP32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)), BS3_MODE_PP32);
 # endif
             CHECK_RESULT(g_szBs3ModeName_pp32);
         }
@@ -349,9 +300,9 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
         {
             PRE_DO_CALL(g_szBs3ModeName_pae16_32);
 # if ARCH_BITS == 32
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(paEntries[i].pfnWorker));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(paEntries[i].pfnWorker), BS3_MODE_PAE16_32);
 # else
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE16_32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)), BS3_MODE_PAE16_32);
 # endif
             CHECK_RESULT(g_szBs3ModeName_pae16_32);
         }
@@ -371,9 +322,9 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
         {
             PRE_DO_CALL(g_szBs3ModeName_pae32);
 # if ARCH_BITS == 32
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(paEntries[i].pfnWorker));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(paEntries[i].pfnWorker), BS3_MODE_PAE32);
 # else
-            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInPAE32)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c32)), BS3_MODE_PAE32);
 # endif
             CHECK_RESULT(g_szBs3ModeName_pae32);
         }
@@ -438,9 +389,9 @@ BS3_MODE_DEF(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_
         {
             PRE_DO_CALL(g_szBs3ModeName_lm64);
 #if ARCH_BITS == 64
-            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(paEntries[i].pfnWorker));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(paEntries[i].pfnWorker), BS3_MODE_LM64);
 #else
-            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c64)));
+            bErrNo = TMPL_NM(Bs3TestCallDoerInLM64)(CONV_TO_FLAT(RT_CONCAT3(Bs3TestCallDoerTo,ARCH_BITS,_c64)), BS3_MODE_LM64);
 #endif
             CHECK_RESULT(g_szBs3ModeName_lm64);
         }
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm
index c3ac8e3..157179d 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm
@@ -30,7 +30,7 @@
 %include "bs3kit-template-header.mac"
 
 ;
-; Finally near stub for the API call (16-bit only).
+; Near stub for the API call (16-bit only).
 ;
 %if TMPL_BITS == 16
  %if TMPL_MODE == BS3_MODE_RM
@@ -39,14 +39,12 @@ BS3_BEGIN_RMTEXT16
 BS3_BEGIN_TEXT16_NEARSTUBS
 BS3_PROC_BEGIN_MODE Bs3TestDoModesByOne, BS3_PBC_NEAR
         pop     ax
- %if TMPL_MODE == BS3_MODE_RM
         push    cs
         push    ax
+ %if TMPL_MODE == BS3_MODE_RM
         extern TMPL_FAR_NM(Bs3TestDoModesByOne):wrt BS3GROUPRMTEXT16
         jmp far TMPL_FAR_NM(Bs3TestDoModesByOne)
  %else
-        push    cs
-        push    ax
         extern TMPL_FAR_NM(Bs3TestDoModesByOne):wrt CGROUP16
         jmp     TMPL_NM(Bs3TestDoModesByOne)
  %endif
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesHlp.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesHlp.asm
index 6b57dfd..9e4eac3 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesHlp.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesHlp.asm
@@ -37,11 +37,13 @@
 ; We put most of this mess in the RMTEXT16 segment when in real mode.
 ;
 %if TMPL_MODE == BS3_MODE_RM
- %define MY_BEGIN_TEXT   BS3_BEGIN_RMTEXT16
- %define MY_BEGIN_TEXT16 BS3_BEGIN_RMTEXT16
+ %define MY_BEGIN_TEXT          BS3_BEGIN_RMTEXT16
+ %define MY_BEGIN_TEXT16        BS3_BEGIN_RMTEXT16
+ %define MY_TEXT16_WRT(a_Label) a_Label wrt BS3GROUPRMTEXT16
 %else
- %define MY_BEGIN_TEXT   TMPL_BEGIN_TEXT
- %define MY_BEGIN_TEXT16 BS3_BEGIN_TEXT16
+ %define MY_BEGIN_TEXT          TMPL_BEGIN_TEXT
+ %define MY_BEGIN_TEXT16        BS3_BEGIN_TEXT16
+ %define MY_TEXT16_WRT(a_Label) BS3_TEXT16_WRT(a_Label)
 %endif
 
 
@@ -136,7 +138,7 @@ extern RT_CONCAT3(_Bs3SwitchTo,TMPL_MODE_UNAME,_Safe_lm32):wrt BS3FLAT
 extern RT_CONCAT3(_Bs3SwitchTo,TMPL_MODE_UNAME,_Safe_lm64):wrt BS3FLAT
 
 
-
+MY_BEGIN_TEXT16                         ; need the group definition
 MY_BEGIN_TEXT
 
 ;;
@@ -276,11 +278,11 @@ BS3_GLOBAL_NAME_EX TMPL_NM(bs3TestCallDoerEpilogue), , 0
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInRM(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInRM, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -325,11 +327,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInRM
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPE16(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPE16, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -363,13 +365,14 @@ MY_BEGIN_TEXT
 BS3_PROC_END_MODE   Bs3TestCallDoerInPE16
 
 ;;
-; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPE16_32(uint16_t offBs3Text16);
+; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPE16_32(uint32_t FlatWorkerAddr, uint8_t bMode);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPE16_32, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 .doit:
         mov     eax, [xBP + xCB + cbCurRetAddr]      ; Load function pointer.
+        movzx   edx, byte [xBP + xCB + cbCurRetAddr + sCB] ; bMode
 
         ; Mode switch, make the call, switch back.
         STRICT_SAVE_REGS
@@ -381,7 +384,7 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPE16_32, BS3_PBC_NEAR
         BS3_SET_BITS 32
         STRICT_CHECK_REGS
 
-        push    BS3_MODE_PE16_32
+        push    edx                     ; bMode
         call    eax
 
         STRICT_SAVE_REGS
@@ -395,9 +398,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPE16_32
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPE16_V86(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPE16_V86, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
-.doit:
+MY_BEGIN_TEXT16
+BS3_SET_BITS TMPL_BITS
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -428,16 +433,18 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPE16_V86, BS3_PBC_NEAR
         BS3_SET_BITS TMPL_BITS
         STRICT_CHECK_REGS
         jmp     TMPL_NM(bs3TestCallDoerEpilogue)
+MY_BEGIN_TEXT
 BS3_PROC_END_MODE   Bs3TestCallDoerInPE16_V86
 
 ;;
-; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPE32(uint16_t offBs3Text16);
+; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPE32(uint32_t FlatWorkerAddr, uint8_t bMode);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPE32, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 .doit:
         mov     eax, [xBP + xCB + cbCurRetAddr]      ; Load function pointer.
+        movzx   edx, byte [xBP + xCB + cbCurRetAddr + sCB] ; bMode
 
         ; Mode switch, make the call, switch back.
         STRICT_SAVE_REGS
@@ -449,7 +456,7 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPE32, BS3_PBC_NEAR
         BS3_SET_BITS 32
         STRICT_CHECK_REGS
 
-        push    BS3_MODE_PE32
+        push    edx                     ; bMode
         call    eax
 
         STRICT_SAVE_REGS
@@ -463,11 +470,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPE32
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPE32_16(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPE32_16, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -505,11 +512,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPE32_16
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPEV86(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPEV86, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -553,11 +560,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPEV86
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPP16(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPP16, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -592,13 +599,14 @@ MY_BEGIN_TEXT
 BS3_PROC_END_MODE   Bs3TestCallDoerInPP16
 
 ;;
-; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPP16_32(uint16_t offBs3Text16);
+; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPP16_32(uint32_t uFlatWorkerAddr, uint8_t bMode);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPP16_32, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 .doit:
         mov     eax, [xBP + xCB + cbCurRetAddr]      ; Load function pointer.
+        movzx   edx, byte [xBP + xCB + cbCurRetAddr + sCB] ; bMode
 
         ; Mode switch, make the call, switch back.
         STRICT_SAVE_REGS
@@ -610,7 +618,7 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPP16_32, BS3_PBC_NEAR
         BS3_SET_BITS 32
         STRICT_CHECK_REGS
 
-        push    BS3_MODE_PP16_32
+        push    edx
         call    eax
 
         STRICT_SAVE_REGS
@@ -624,9 +632,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPP16_32
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPP16_V86(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPP16_V86, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
-.doit:
+MY_BEGIN_TEXT16
+BS3_SET_BITS TMPL_BITS
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -657,16 +667,18 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPP16_V86, BS3_PBC_NEAR
         BS3_SET_BITS TMPL_BITS
         STRICT_CHECK_REGS
         jmp     TMPL_NM(bs3TestCallDoerEpilogue)
+MY_BEGIN_TEXT
 BS3_PROC_END_MODE   Bs3TestCallDoerInPP16_V86
 
 ;;
-; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPP32(uint16_t offBs3Text16);
+; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPP32(uint32_t uFlatWorkerAddr, uint8_t bMode);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPP32, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 .doit:
         mov     eax, [xBP + xCB + cbCurRetAddr]      ; Load function pointer.
+        movzx   edx, byte [xBP + xCB + cbCurRetAddr + sCB] ; bMode
 
         ; Mode switch, make the call, switch back.
         STRICT_SAVE_REGS
@@ -678,7 +690,7 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPP32, BS3_PBC_NEAR
         BS3_SET_BITS 32
         STRICT_CHECK_REGS
 
-        push    BS3_MODE_PP32
+        push    edx                     ; bMode
         call    eax
 
         STRICT_SAVE_REGS
@@ -692,11 +704,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPP32
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPP32_16(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPP32_16, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -734,11 +746,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPP32_16
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPPV86(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPPV86, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -773,7 +785,6 @@ MY_BEGIN_TEXT
 BS3_PROC_END_MODE   Bs3TestCallDoerInPPV86
 
 
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; PAE paged protection mode.
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -782,11 +793,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPPV86
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPAE16(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAE16, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -821,13 +832,14 @@ MY_BEGIN_TEXT
 BS3_PROC_END_MODE   Bs3TestCallDoerInPAE16
 
 ;;
-; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPAE16_32(uint16_t offBs3Text16);
+; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPAE16_32(uint32_t uFlatWorkerAddr, uint8_t bMode);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAE16_32, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 .doit:
         mov     eax, [xBP + xCB + cbCurRetAddr]      ; Load function pointer.
+        movzx   edx, byte [xBP + xCB + cbCurRetAddr + sCB] ; bMode
 
         ; Mode switch, make the call, switch back.
         STRICT_SAVE_REGS
@@ -839,7 +851,7 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAE16_32, BS3_PBC_NEAR
         BS3_SET_BITS 32
         STRICT_CHECK_REGS
 
-        push    BS3_MODE_PAE16_32
+        push    edx                     ; bMode
         call    eax
 
         STRICT_SAVE_REGS
@@ -853,9 +865,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPAE16_32
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPAE16_V86(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAE16_V86, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
-.doit:
+MY_BEGIN_TEXT16
+BS3_SET_BITS TMPL_BITS
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -886,16 +900,18 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAE16_V86, BS3_PBC_NEAR
         BS3_SET_BITS TMPL_BITS
         STRICT_CHECK_REGS
         jmp     TMPL_NM(bs3TestCallDoerEpilogue)
+MY_BEGIN_TEXT
 BS3_PROC_END_MODE   Bs3TestCallDoerInPAE16_V86
 
 ;;
-; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPAE32(uint16_t offBs3Text16);
+; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPAE32(uint32_t uFlatWorkerAddr, uint8_t bMode);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAE32, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 .doit:
         mov     eax, [xBP + xCB + cbCurRetAddr]      ; Load function pointer.
+        movzx   edx, byte [xBP + xCB + cbCurRetAddr + sCB] ; bMode
 
         ; Mode switch, make the call, switch back.
         STRICT_SAVE_REGS
@@ -907,7 +923,7 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAE32, BS3_PBC_NEAR
         BS3_SET_BITS 32
         STRICT_CHECK_REGS
 
-        push    BS3_MODE_PAE32
+        push    edx                     ; bMode
         call    eax
 
         STRICT_SAVE_REGS
@@ -921,11 +937,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPAE32
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPAE32_16(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAE32_16, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -963,11 +979,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPAE32_16
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInPAEV86(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInPAEV86, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -1011,11 +1027,11 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInPAEV86
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInLM16(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInLM16, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 MY_BEGIN_TEXT16
 BS3_SET_BITS TMPL_BITS
-.doit:
+BS3_GLOBAL_LOCAL_LABEL .doit
         mov     ax, [xBP + xCB + cbCurRetAddr]      ; Load far function pointer.
         mov     dx, [xBP + xCB + cbCurRetAddr + 2]
 
@@ -1053,7 +1069,7 @@ BS3_PROC_END_MODE   Bs3TestCallDoerInLM16
 ; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInLM32(uint16_t offBs3Text16);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInLM32, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 .doit:
         mov     eax, [xBP + xCB + cbCurRetAddr]      ; Load function pointer.
@@ -1080,13 +1096,14 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInLM32, BS3_PBC_NEAR
 BS3_PROC_END_MODE   Bs3TestCallDoerInLM32
 
 ;;
-; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInLM64(uint16_t offBs3Text16);
+; @cproto   BS3_DECL(uint8_t) Bs3TestCallDoerInLM64(uint32_t uFlatWorkerAddr, uint8_t bMode);
 ; @uses     rax
 BS3_PROC_BEGIN_MODE Bs3TestCallDoerInLM64, BS3_PBC_NEAR
-        BS3_LEA_MOV_WRT_RIP(xAX, .doit)
+        BS3_LEA_MOV_WRT_RIP(xAX, MY_TEXT16_WRT(.doit))
         jmp     TMPL_NM(bs3TestCallDoerPrologue)
 .doit:
-        mov     eax, [xBP + xCB + cbCurRetAddr]      ; Load function pointer.
+        mov     eax, [xBP + xCB + cbCurRetAddr]     ; Load function pointer.
+        movzx   edx, byte [xBP + xCB + cbCurRetAddr + sCB] ; bMode
 
         ; Mode switch, make the call, switch back.
         STRICT_SAVE_REGS
@@ -1100,7 +1117,7 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInLM64, BS3_PBC_NEAR
 
         and     rsp, ~0fh
         sub     rsp, 18h
-        push    BS3_MODE_LM64
+        push    rdx                     ; bMode
         BS3_CALL rax, 1
 
         STRICT_SAVE_REGS
@@ -1110,26 +1127,3 @@ BS3_PROC_BEGIN_MODE Bs3TestCallDoerInLM64, BS3_PBC_NEAR
         jmp     TMPL_NM(bs3TestCallDoerEpilogue)
 BS3_PROC_END_MODE   Bs3TestCallDoerInLM64
 
-
-;
-; Finally near stub for the API call (16-bit only).
-;
-%if TMPL_BITS == 16
-BS3_BEGIN_TEXT16_NEARSTUBS
-BS3_PROC_BEGIN_MODE Bs3TestDoModes, BS3_PBC_NEAR
-        pop     ax
- %if TMPL_MODE == BS3_MODE_RM
-        push    cs
-        push    ax
-        extern TMPL_FAR_NM(Bs3TestDoModes):wrt BS3GROUPRMTEXT16
-        jmp far TMPL_FAR_NM(Bs3TestDoModes)
- %else
-        push    cs
-        push    ax
-        extern TMPL_FAR_NM(Bs3TestDoModes):wrt CGROUP16
-        jmp     TMPL_NM(Bs3TestDoModes)
- %endif
-BS3_PROC_END_MODE   Bs3TestDoModes
-
-%endif
-
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesStub.asm
similarity index 76%
copy from src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm
copy to src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesStub.asm
index c3ac8e3..cac0afc 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesByOneStub.asm
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-mode-TestDoModesStub.asm
@@ -1,6 +1,6 @@
-; $Id: bs3-mode-TestDoModesByOneStub.asm $
+; $Id: bs3-mode-TestDoModesStub.asm $
 ;; @file
-; BS3Kit - Bs3TestDoModesByOne near stub.
+; BS3Kit - Bs3TestDoModes near stub.
 ;
 
 ;
@@ -30,26 +30,25 @@
 %include "bs3kit-template-header.mac"
 
 ;
-; Finally near stub for the API call (16-bit only).
+; Near stub for the API call (16-bit only).
 ;
 %if TMPL_BITS == 16
  %if TMPL_MODE == BS3_MODE_RM
 BS3_BEGIN_RMTEXT16
  %endif
 BS3_BEGIN_TEXT16_NEARSTUBS
-BS3_PROC_BEGIN_MODE Bs3TestDoModesByOne, BS3_PBC_NEAR
+BS3_PROC_BEGIN_MODE Bs3TestDoModes, BS3_PBC_NEAR
         pop     ax
- %if TMPL_MODE == BS3_MODE_RM
         push    cs
         push    ax
-        extern TMPL_FAR_NM(Bs3TestDoModesByOne):wrt BS3GROUPRMTEXT16
-        jmp far TMPL_FAR_NM(Bs3TestDoModesByOne)
+ %if TMPL_MODE == BS3_MODE_RM
+        extern TMPL_FAR_NM(Bs3TestDoModes):wrt BS3GROUPRMTEXT16
+        jmp far TMPL_FAR_NM(Bs3TestDoModes)
  %else
-        push    cs
-        push    ax
-        extern TMPL_FAR_NM(Bs3TestDoModesByOne):wrt CGROUP16
-        jmp     TMPL_NM(Bs3TestDoModesByOne)
+        extern TMPL_FAR_NM(Bs3TestDoModes):wrt CGROUP16
+        jmp     TMPL_NM(Bs3TestDoModes)
  %endif
-BS3_PROC_END_MODE   Bs3TestDoModesByOne
+BS3_PROC_END_MODE   Bs3TestDoModes
 %endif
 
+
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-rm-InitMemory.c b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-rm-InitMemory.c
index db7cb0e..1029a47 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-rm-InitMemory.c
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-rm-InitMemory.c
@@ -281,7 +281,7 @@ BS3_DECL(void) BS3_FAR_CODE Bs3InitMemory_rm_far(void)
     ASMBitSetRange(g_Bs3Mem4KLow.Core.bmAllocated, 0, 0x10 + cPages);
 
     /* Mark any unused pages between BS3TEXT16 and BS3SYSTEM16 as free. */
-    cPages = (Bs3Text16_Size + _4K - 1U) >> 12;
+    cPages = (Bs3Text16_Size + (uint32_t)_4K - 1U) >> 12;
     ASMBitClearRange(g_Bs3Mem4KLow.Core.bmAllocated, 0x10U + cPages, 0x20U);
 
     /* In case the system has less than 640KB of memory, check the BDA variable for it. */
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-autostubs.kmk b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-autostubs.kmk
index fff75b2..e72377a 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-autostubs.kmk
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-autostubs.kmk
@@ -31,19 +31,30 @@ $(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,Bs3SelProtFar16DataToRealMode,
 $(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,Bs3SelRealModeCodeToFlat,4)
 $(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,Bs3SelRealModeDataToFlat,4)
 $(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,Bs3SelRealModeDataToProtFar16,4)
+$(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,Bs3ExtCtxRestore,4)
+$(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,Bs3ExtCtxSave,4)
 $(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,Bs3SelFar32ToFlat32NoClobber,6)
 $(call BS3KIT_FN_GEN_CMN_FARSTUB,bs3kit-common-16,Bs3RegCtxSaveEx,8)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestCheckRegCtxEx)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestFailed)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestFailedF)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestFailedV)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3GetCpuVendor)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3StrCpy)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3GetModeName)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3GetModeNameShortLower)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingAlias)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingInitRootForLM)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingInitRootForPAE)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingInitRootForPP)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingProtect)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingProtectPtr)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingQueryAddressInfo)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingUnalias)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SwitchFromV86To16BitAndCallC)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3ExtCtxAlloc)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3ExtCtxCopy)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3ExtCtxInit)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TrapSetHandler)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3Printf)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PrintfV)
@@ -52,6 +63,7 @@ $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3StrLen)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3StrNLen)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3StrPrintf)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3StrPrintfV)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3ExtCtxGetSize)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PicUpdateMask)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabFree)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestSubErrorCount)
@@ -64,13 +76,16 @@ $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3MemGuardedTestPageAlloc)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3MemGuardedTestPageAllocEx)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3MemMove)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3MemPCpy)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingGetPte)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PagingSetupCanonicalTraps)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabAlloc)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabAllocEx)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabListAlloc)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabListAllocEx)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3ExtCtxFree)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3MemFree)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3MemGuardedTestPageFree)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3MemPrintInfo)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PicMaskAll)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PicSetup)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3PitDisable)
@@ -84,13 +99,12 @@ $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3RegCtxSetGrpSegFromFlat)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3RegCtxSetRipCsFromCurPtr)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3RegCtxSetRipCsFromFlat)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3RegCtxSetRipCsFromLnkPtr)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SelSetup16BitCode)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SelSetup16BitData)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabInit)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabListAdd)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabListFree)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3SlabListInit)
-$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestFailed)
-$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestFailedF)
-$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestFailedV)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestInit)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestPrintf)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TestPrintfV)
@@ -121,5 +135,6 @@ $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3TrapUnsetJmp)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3UInt32Div)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,Bs3UInt64Div)
 $(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,bs3PagingGetLegacyPte)
-$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,bs3PagingGetPte)
+$(call BS3KIT_FN_GEN_CMN_NEARSTUB,bs3kit-common-16,bs3PagingGetPaePte)
+$(call BS3KIT_FN_GEN_MODE_NEARSTUB,bs3kit-common-16,Bs3SwitchTo32BitAndCallC)
 $(call BS3KIT_FN_GEN_MODE_NEARSTUB,bs3kit-common-16,Bs3TrapInit)
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-mangling-code-define.h b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-mangling-code-define.h
index c8b4cfc..9b54bbc 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-mangling-code-define.h
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-mangling-code-define.h
@@ -30,7 +30,16 @@
 #define Bs3A20Enable BS3_CMN_MANGLER(Bs3A20Enable)
 #define Bs3A20EnableViaKbd BS3_CMN_MANGLER(Bs3A20EnableViaKbd)
 #define Bs3A20EnableViaPortA BS3_CMN_MANGLER(Bs3A20EnableViaPortA)
+#define Bs3ExtCtxAlloc BS3_CMN_MANGLER(Bs3ExtCtxAlloc)
+#define Bs3ExtCtxCopy BS3_CMN_MANGLER(Bs3ExtCtxCopy)
+#define Bs3ExtCtxFree BS3_CMN_MANGLER(Bs3ExtCtxFree)
+#define Bs3ExtCtxGetSize BS3_CMN_MANGLER(Bs3ExtCtxGetSize)
+#define Bs3ExtCtxInit BS3_CMN_MANGLER(Bs3ExtCtxInit)
+#define Bs3ExtCtxRestore BS3_CMN_MANGLER(Bs3ExtCtxRestore)
+#define Bs3ExtCtxSave BS3_CMN_MANGLER(Bs3ExtCtxSave)
+#define Bs3GetCpuVendor BS3_CMN_MANGLER(Bs3GetCpuVendor)
 #define Bs3GetModeName BS3_CMN_MANGLER(Bs3GetModeName)
+#define Bs3GetModeNameShortLower BS3_CMN_MANGLER(Bs3GetModeNameShortLower)
 #define Bs3KbdRead BS3_CMN_MANGLER(Bs3KbdRead)
 #define Bs3KbdWait BS3_CMN_MANGLER(Bs3KbdWait)
 #define Bs3KbdWrite BS3_CMN_MANGLER(Bs3KbdWrite)
@@ -45,16 +54,19 @@
 #define Bs3MemGuardedTestPageFree BS3_CMN_MANGLER(Bs3MemGuardedTestPageFree)
 #define Bs3MemMove BS3_CMN_MANGLER(Bs3MemMove)
 #define Bs3MemPCpy BS3_CMN_MANGLER(Bs3MemPCpy)
+#define Bs3MemPrintInfo BS3_CMN_MANGLER(Bs3MemPrintInfo)
 #define Bs3MemSet BS3_CMN_MANGLER(Bs3MemSet)
 #define Bs3MemZero BS3_CMN_MANGLER(Bs3MemZero)
 #define Bs3PagingAlias BS3_CMN_MANGLER(Bs3PagingAlias)
 #define bs3PagingGetLegacyPte BS3_CMN_MANGLER(bs3PagingGetLegacyPte)
-#define bs3PagingGetPte BS3_CMN_MANGLER(bs3PagingGetPte)
+#define bs3PagingGetPaePte BS3_CMN_MANGLER(bs3PagingGetPaePte)
+#define Bs3PagingGetPte BS3_CMN_MANGLER(Bs3PagingGetPte)
 #define Bs3PagingInitRootForLM BS3_CMN_MANGLER(Bs3PagingInitRootForLM)
 #define Bs3PagingInitRootForPAE BS3_CMN_MANGLER(Bs3PagingInitRootForPAE)
 #define Bs3PagingInitRootForPP BS3_CMN_MANGLER(Bs3PagingInitRootForPP)
 #define Bs3PagingProtect BS3_CMN_MANGLER(Bs3PagingProtect)
 #define Bs3PagingProtectPtr BS3_CMN_MANGLER(Bs3PagingProtectPtr)
+#define Bs3PagingQueryAddressInfo BS3_CMN_MANGLER(Bs3PagingQueryAddressInfo)
 #define Bs3PagingSetupCanonicalTraps BS3_CMN_MANGLER(Bs3PagingSetupCanonicalTraps)
 #define Bs3PagingUnalias BS3_CMN_MANGLER(Bs3PagingUnalias)
 #define Bs3Panic BS3_CMN_MANGLER(Bs3Panic)
@@ -95,6 +107,8 @@
 #define Bs3SelRealModeCodeToProtMode BS3_CMN_MANGLER(Bs3SelRealModeCodeToProtMode)
 #define Bs3SelRealModeDataToFlat BS3_CMN_MANGLER(Bs3SelRealModeDataToFlat)
 #define Bs3SelRealModeDataToProtFar16 BS3_CMN_MANGLER(Bs3SelRealModeDataToProtFar16)
+#define Bs3SelSetup16BitCode BS3_CMN_MANGLER(Bs3SelSetup16BitCode)
+#define Bs3SelSetup16BitData BS3_CMN_MANGLER(Bs3SelSetup16BitData)
 #define Bs3Shutdown BS3_CMN_MANGLER(Bs3Shutdown)
 #define Bs3SlabAlloc BS3_CMN_MANGLER(Bs3SlabAlloc)
 #define Bs3SlabAllocEx BS3_CMN_MANGLER(Bs3SlabAllocEx)
@@ -152,7 +166,9 @@
 #define Bs3UtilSetFullIdtr BS3_CMN_MANGLER(Bs3UtilSetFullIdtr)
 #ifndef BS3_CMN_ONLY
 # define Bs3CpuDetect BS3_MODE_MANGLER(Bs3CpuDetect)
+# define Bs3SwitchTo32BitAndCallC BS3_MODE_MANGLER(Bs3SwitchTo32BitAndCallC)
 # define Bs3TestDoModes BS3_MODE_MANGLER(Bs3TestDoModes)
+# define Bs3TestDoModesByMax BS3_MODE_MANGLER(Bs3TestDoModesByMax)
 # define Bs3TestDoModesByOne BS3_MODE_MANGLER(Bs3TestDoModesByOne)
 # define Bs3TrapInit BS3_MODE_MANGLER(Bs3TrapInit)
 #endif /* !BS3_CMN_ONLY */
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-mangling-code-undef.h b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-mangling-code-undef.h
index f1cc7c8..6e20907 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-mangling-code-undef.h
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-mangling-code-undef.h
@@ -30,7 +30,16 @@
 #undef Bs3A20Enable
 #undef Bs3A20EnableViaKbd
 #undef Bs3A20EnableViaPortA
+#undef Bs3ExtCtxAlloc
+#undef Bs3ExtCtxCopy
+#undef Bs3ExtCtxFree
+#undef Bs3ExtCtxGetSize
+#undef Bs3ExtCtxInit
+#undef Bs3ExtCtxRestore
+#undef Bs3ExtCtxSave
+#undef Bs3GetCpuVendor
 #undef Bs3GetModeName
+#undef Bs3GetModeNameShortLower
 #undef Bs3KbdRead
 #undef Bs3KbdWait
 #undef Bs3KbdWrite
@@ -45,16 +54,19 @@
 #undef Bs3MemGuardedTestPageFree
 #undef Bs3MemMove
 #undef Bs3MemPCpy
+#undef Bs3MemPrintInfo
 #undef Bs3MemSet
 #undef Bs3MemZero
 #undef Bs3PagingAlias
 #undef bs3PagingGetLegacyPte
-#undef bs3PagingGetPte
+#undef bs3PagingGetPaePte
+#undef Bs3PagingGetPte
 #undef Bs3PagingInitRootForLM
 #undef Bs3PagingInitRootForPAE
 #undef Bs3PagingInitRootForPP
 #undef Bs3PagingProtect
 #undef Bs3PagingProtectPtr
+#undef Bs3PagingQueryAddressInfo
 #undef Bs3PagingSetupCanonicalTraps
 #undef Bs3PagingUnalias
 #undef Bs3Panic
@@ -95,6 +107,8 @@
 #undef Bs3SelRealModeCodeToProtMode
 #undef Bs3SelRealModeDataToFlat
 #undef Bs3SelRealModeDataToProtFar16
+#undef Bs3SelSetup16BitCode
+#undef Bs3SelSetup16BitData
 #undef Bs3Shutdown
 #undef Bs3SlabAlloc
 #undef Bs3SlabAllocEx
@@ -152,7 +166,9 @@
 #undef Bs3UtilSetFullIdtr
 #ifndef BS3_CMN_ONLY
 # undef Bs3CpuDetect
+# undef Bs3SwitchTo32BitAndCallC
 # undef Bs3TestDoModes
+# undef Bs3TestDoModesByMax
 # undef Bs3TestDoModesByOne
 # undef Bs3TrapInit
 #endif /* !BS3_CMN_ONLY */
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-template-footer.mac b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-template-footer.mac
index de6d8f6..189552e 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-template-footer.mac
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-template-footer.mac
@@ -126,5 +126,7 @@
 %unmacro TONLY16 1+
 %unmacro TONLY32 1+
 %unmacro TONLY64 1+
+%unmacro TNOT16  1+
+%unmacro TNOT32  1+
 %unmacro TNOT64  1+
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-template-header.mac b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-template-header.mac
index 6384287..54b69f8 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-template-header.mac
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit-template-header.mac
@@ -469,6 +469,30 @@
  %endmacro
 %endif
 
+;; @def TNOT16
+; Version of BNOT16 that follows the code template.
+; Like BNOT16 this normally goes in column 1.
+%if TMPL_BITS == 16
+ %macro TNOT16 1+
+ %endmacro
+%else
+ %macro TNOT16 1+
+        %1
+ %endmacro
+%endif
+
+;; @def TNOT32
+; Version of BNOT32 that follows the code template.
+; Like BNOT32 this normally goes in column 1.
+%if TMPL_BITS == 32
+ %macro TNOT32 1+
+ %endmacro
+%else
+ %macro TNOT32 1+
+        %1
+ %endmacro
+%endif
+
 ;; @def TNOT64
 ; Version of BNOT64 that follows the code template.
 ; Like BNOT64 this normally goes in column 1.
@@ -481,6 +505,7 @@
  %endmacro
 %endif
 
+
 ;
 ; Default code segment (changes BITS too).
 ;
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.h b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.h
index 2d31204..1667d09 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.h
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.h
@@ -37,6 +37,33 @@
 # undef  IN_RING0
 #endif
 
+/*
+ * Make asm.h and friend compatible with our 64-bit assembly config (ASM_CALL64_MSC).
+ */
+#if defined(__GNUC__) && ARCH_BITS == 64
+# undef DECLASM
+# ifdef __cplusplus
+#  define DECLASM(type)             extern "C" type BS3_CALL
+# else
+#  define DECLASM(type)             type BS3_CALL
+# endif
+#endif
+
+
+/*
+ * Work around ms_abi trouble in the gcc camp (gcc bugzilla #50818).
+ * ASSUMES all va_lists are in functions with
+ */
+#if defined(__GNUC__) && ARCH_BITS == 64
+# undef  va_list
+# undef  va_start
+# undef  va_end
+# undef  va_copy
+# define va_list                    __builtin_ms_va_list
+# define va_start(a_Va, a_Arg)      __builtin_ms_va_start(a_Va, a_Arg)
+# define va_end(a_Va)               __builtin_ms_va_end(a_Va)
+# define va_copy(a_DstVa, a_SrcVa)  __builtin_ms_va_copy(a_DstVa, a_SrcVa)
+#endif
 
 
 /** @def BS3_USE_ALT_16BIT_TEXT_SEG
@@ -44,7 +71,7 @@
  * Combines the BS3_USE_RM_TEXT_SEG,  BS3_USE_X0_TEXT_SEG, and
  * BS3_USE_X1_TEXT_SEG indicators into a single one.
  */
-#if defined(BS3_USE_RM_TEXT_SEG) || defined(BS3_USE_X0_TEXT_SEG) || defined(BS3_USE_X1_TEXT_SEG)
+#if defined(BS3_USE_RM_TEXT_SEG) || defined(BS3_USE_X0_TEXT_SEG) || defined(BS3_USE_X1_TEXT_SEG) || defined(DOXYGEN_RUNNING)
 # define BS3_USE_ALT_16BIT_TEXT_SEG
 #else
 # undef  BS3_USE_ALT_16BIT_TEXT_SEG
@@ -228,6 +255,10 @@ RT_C_DECLS_BEGIN
 /** Whether the system is long mode. */
 #define BS3_MODE_IS_64BIT_SYS(a_fMode)          (((a_fMode) & BS3_MODE_SYS_MASK) == BS3_MODE_SYS_LM)
 
+/** Whether the system is in protected mode (with or without paging).
+ * @note Long mode is not included. */
+#define BS3_MODE_IS_PM_SYS(a_fMode)             ((a_fMode) >= BS3_MODE_SYS_PE16 && (a_fMode) < BS3_MODE_SYS_LM)
+
 /** @todo testcase: How would long-mode handle a 16-bit TSS loaded prior to the switch? (mainly stack switching wise) Hopefully, it will tripple fault, right? */
 /** @} */
 
@@ -538,7 +569,7 @@ RT_C_DECLS_BEGIN
 # define BS3_FP_OFF(a_pv)            ((uintptr_t)(a_pv))
 #endif
 
-/** @def BS3_MAKE_PROT_PTR_FROM_FLAT
+/** @def BS3_MAKE_PROT_R0PTR_FROM_FLAT
  * Creates a protected mode pointer from a flat address.
  *
  * For sake of convenience, this macro also works in 32-bit and 64-bit mode,
@@ -694,7 +725,7 @@ RT_C_DECLS_BEGIN
  * Example: @code{.c}
  *  \#define Bs3Gdt BS3_DATA_NM(Bs3Gdt)
  *  extern X86DESC BS3_FAR_DATA Bs3Gdt
-f * @endcode
+ * @endcode
  *
  * @param   a_Name      The name of the global variable.
  * @remarks Mainly used in bs3kit-mangling.h, internal headers and templates.
@@ -706,9 +737,9 @@ f * @endcode
 //converter does this now//#endif
 
 /**
- * Template for createing a pointer union type.
+ * Template for creating a pointer union type.
  * @param   a_BaseName      The base type name.
- * @param   a_Modifier      The type modifier.
+ * @param   a_Modifiers     The type modifier.
  */
 #define BS3_PTR_UNION_TEMPLATE(a_BaseName, a_Modifiers) \
     typedef union a_BaseName \
@@ -1009,7 +1040,7 @@ extern X86XDTR64 BS3_FAR_DATA Bs3Lidt_Idt32;
 /** Structure for the LIDT instruction for loading the 64-bit IDT. */
 extern X86XDTR64 BS3_FAR_DATA Bs3Lidt_Idt64;
 /** Structure for the LIDT instruction for loading the real mode interrupt
- *  vector table.. */
+ *  vector table. */
 extern X86XDTR64 BS3_FAR_DATA Bs3Lidt_Ivt;
 /** Structure for the LGDT instruction for loading the current GDT. */
 extern X86XDTR64 BS3_FAR_DATA Bs3Lgdt_Gdt;
@@ -1189,7 +1220,7 @@ AssertCompileSize(BS3XPTR, 4);
 # error "ARCH_BITS"
 #endif
 
-/** @def BS3_XPTR_DEF_MEMBER
+/** @def BS3_XPTR_MEMBER
  * Defines a pointer member that can be shared by all CPU modes.
  *
  * @param   a_Type      The type we're pointing to.
@@ -1197,7 +1228,7 @@ AssertCompileSize(BS3XPTR, 4);
  */
 #define BS3_XPTR_MEMBER(a_Type, a_Name) BS3_XPTR_DEF_INTERNAL(RT_NOTHING, a_Type, a_Name)
 
-/** @def BS3_XPTR_DEF_AUTO
+/** @def BS3_XPTR_AUTO
  * Defines a pointer static variable for working with an XPTR.
  *
  * This is typically used to convert flat pointers into context specific
@@ -1208,7 +1239,7 @@ AssertCompileSize(BS3XPTR, 4);
  */
 #define BS3_XPTR_AUTO(a_Type, a_Name) BS3_XPTR_DEF_INTERNAL(RT_NOTHING, a_Type, a_Name)
 
-/** @def BS3_XPTR_SET
+/** @def BS3_XPTR_SET_FLAT
  * Sets a cross context pointer.
  *
  * @param   a_Type      The type we're pointing to.
@@ -1377,7 +1408,7 @@ DECLINLINE(void BS3_FAR *) Bs3XptrFlatToCurrent(RTCCUINTXREG uFlatPtr)
     BS3_DECL_NEAR(a_RetType) BS3_CMN_NM(a_Name) a_Params
 #endif
 
-/** @BS3_CMN_PROTO_STUB
+/** @def BS3_CMN_PROTO_STUB
  * Macro for prototyping all the variations of a common function with automatic
  * near -> far stub.
  *
@@ -1388,7 +1419,7 @@ DECLINLINE(void BS3_FAR *) Bs3XptrFlatToCurrent(RTCCUINTXREG uFlatPtr)
  */
 #define BS3_CMN_PROTO_STUB(a_RetType, a_Name, a_Params) BS3_CMN_PROTO_INT(a_RetType, a_Name, a_Params)
 
-/** @BS3_CMN_PROTO_NOSB
+/** @def BS3_CMN_PROTO_NOSB
  * Macro for prototyping all the variations of a common function without any
  * near > far stub.
  *
@@ -1399,7 +1430,7 @@ DECLINLINE(void BS3_FAR *) Bs3XptrFlatToCurrent(RTCCUINTXREG uFlatPtr)
  */
 #define BS3_CMN_PROTO_NOSB(a_RetType, a_Name, a_Params) BS3_CMN_PROTO_INT(a_RetType, a_Name, a_Params)
 
-/** @BS3_CMN_PROTO_FRST
+/** @def BS3_CMN_PROTO_FARSTUB
  * Macro for prototyping all the variations of a common function with automatic
  * far -> near stub.
  *
@@ -1462,6 +1493,33 @@ BS3_CMN_PROTO_NOSB(DECL_NO_RETURN(void), Bs3Panic,(void));
 BS3_CMN_PROTO_STUB(const char BS3_FAR *, Bs3GetModeName,(uint8_t bMode));
 
 /**
+ * Translate a mode into a short lower case string.
+ *
+ * @returns Pointer to read-only short mode name string.
+ * @param   bMode       The mode value (BS3_MODE_XXX).
+ */
+BS3_CMN_PROTO_STUB(const char BS3_FAR *, Bs3GetModeNameShortLower,(uint8_t bMode));
+
+/** CPU vendors. */
+typedef enum BS3CPUVENDOR
+{
+    BS3CPUVENDOR_INVALID = 0,
+    BS3CPUVENDOR_INTEL,
+    BS3CPUVENDOR_AMD,
+    BS3CPUVENDOR_VIA,
+    BS3CPUVENDOR_CYRIX,
+    BS3CPUVENDOR_UNKNOWN,
+    BS3CPUVENDOR_END
+} BS3CPUVENDOR;
+
+/**
+ * Tries to detect the CPU vendor.
+ *
+ * @returns CPU vendor.
+ */
+BS3_CMN_PROTO_STUB(BS3CPUVENDOR, Bs3GetCpuVendor,(void));
+
+/**
  * Shutdown the system, never returns.
  *
  * This currently only works for VMs.  When running on real systems it will
@@ -1486,7 +1544,7 @@ BS3_CMN_PROTO_NOSB(void, Bs3PrintX32,(uint32_t uValue));
 /**
  * Formats and prints a string to the screen.
  *
- * See #Bs3StrFormatV_c16 for supported format types.
+ * See #Bs3StrFormatV for supported format types.
  *
  * @param   pszFormat       The format string.
  * @param   ...             Format arguments.
@@ -1496,7 +1554,7 @@ BS3_CMN_PROTO_STUB(size_t, Bs3Printf,(const char BS3_FAR *pszFormat, ...));
 /**
  * Formats and prints a string to the screen, va_list version.
  *
- * See #Bs3Format_c16 for supported format types.
+ * See #Bs3StrFormatV for supported format types.
  *
  * @param   pszFormat       The format string.
  * @param   va              Format arguments.
@@ -1513,7 +1571,7 @@ BS3_CMN_PROTO_STUB(void, Bs3PrintStr,(const char BS3_FAR *pszString));
 /**
  * Prints a string to the screen.
  *
- * @param   pchString       The string to print.  Any terminator charss will be printed.
+ * @param   pszString       The string to print.  Any terminator charss will be printed.
  * @param   cchString       The exact number of characters to print.
  */
 BS3_CMN_PROTO_NOSB(void, Bs3PrintStrN,(const char BS3_FAR *pszString, size_t cchString));
@@ -1563,7 +1621,7 @@ BS3_CMN_PROTO_STUB(size_t, Bs3StrFormatV,(const char BS3_FAR *pszFormat, va_list
 /**
  * Formats a string into a buffer.
  *
- * See #Bs3Format_c16 for supported format types.
+ * See #Bs3StrFormatV for supported format types.
  *
  * @returns The length of the formatted string (excluding terminator).
  *          This will be higher or equal to @c cbBuf in case of an overflow.
@@ -1577,7 +1635,7 @@ BS3_CMN_PROTO_STUB(size_t, Bs3StrPrintfV,(char BS3_FAR *pszBuf, size_t cbBuf, co
 /**
  * Formats a string into a buffer.
  *
- * See #Bs3Format_c16 for supported format types.
+ * See #Bs3StrFormatV for supported format types.
  *
  * @returns The length of the formatted string (excluding terminator).
  *          This will be higher or equal to @c cbBuf in case of an overflow.
@@ -1623,7 +1681,7 @@ BS3_CMN_PROTO_STUB(char BS3_FAR *, Bs3StrCpy,(char BS3_FAR *pszDst, const char B
  * @returns pvDst
  * @param   pvDst           The destination buffer.
  * @param   pvSrc           The source buffer.
- * @param   cbCopy          The number of bytes to copy.
+ * @param   cbToCopy        The number of bytes to copy.
  */
 BS3_CMN_PROTO_STUB(void BS3_FAR *, Bs3MemCpy,(void BS3_FAR *pvDst, const void BS3_FAR *pvSrc, size_t cbToCopy));
 
@@ -1633,7 +1691,7 @@ BS3_CMN_PROTO_STUB(void BS3_FAR *, Bs3MemCpy,(void BS3_FAR *pvDst, const void BS
  * @returns pvDst + cbCopy
  * @param   pvDst           The destination buffer.
  * @param   pvSrc           The source buffer.
- * @param   cbCopy          The number of bytes to copy.
+ * @param   cbToCopy        The number of bytes to copy.
  */
 BS3_CMN_PROTO_STUB(void BS3_FAR *, Bs3MemPCpy,(void BS3_FAR *pvDst, const void BS3_FAR *pvSrc, size_t cbToCopy));
 
@@ -1643,7 +1701,7 @@ BS3_CMN_PROTO_STUB(void BS3_FAR *, Bs3MemPCpy,(void BS3_FAR *pvDst, const void B
  * @returns pvDst
  * @param   pvDst           The destination buffer.
  * @param   pvSrc           The source buffer.
- * @param   cbCopy          The number of bytes to copy.
+ * @param   cbToCopy        The number of bytes to copy.
  */
 BS3_CMN_PROTO_STUB(void BS3_FAR *, Bs3MemMove,(void BS3_FAR *pvDst, const void BS3_FAR *pvSrc, size_t cbToCopy));
 
@@ -1680,7 +1738,7 @@ BS3_CMN_PROTO_NOSB(void BS3_FAR *, Bs3MemChr,(void const BS3_FAR *pvHaystack, ui
  *          side, and positive in the other case.
  * @param   pv1             The left hand memory.
  * @param   pv2             The right hand memory.
- * @param   bNeedle         The number of bytes to compare.
+ * @param   cb              The number of bytes to compare.
  */
 BS3_CMN_PROTO_NOSB(int, Bs3MemCmp,(void const BS3_FAR *pv1, void const BS3_FAR *pv2, size_t cb));
 
@@ -1847,6 +1905,23 @@ DECLINLINE(RTCCUINTXREG) Bs3SelPtrToFlat(void BS3_FAR *pv)
 #endif
 }
 
+/**
+ * Sets up a 16-bit read-write data selector with ring-3 access and 64KB limit.
+ *
+ * @param   pDesc       Pointer to the descriptor table entry.
+ * @param   uBaseAddr   The base address of the descriptor.
+ */
+BS3_CMN_PROTO_STUB(void, Bs3SelSetup16BitData,(X86DESC BS3_FAR *pDesc, uint32_t uBaseAddr));
+
+/**
+ * Sets up a 16-bit execute-read selector with a 64KB limit.
+ *
+ * @param   pDesc       Pointer to the descriptor table entry.
+ * @param   uBaseAddr   The base address of the descriptor.
+ * @param   bDpl        The descriptor privilege level.
+ */
+BS3_CMN_PROTO_STUB(void, Bs3SelSetup16BitCode,(X86DESC BS3_FAR *pDesc, uint32_t uBaseAddr, uint8_t bDpl));
+
 
 /**
  * Slab control structure list head.
@@ -2067,6 +2142,11 @@ BS3_CMN_PROTO_STUB(void BS3_FAR *, Bs3MemGuardedTestPageAllocEx,(BS3MEMKIND enmK
  */
 BS3_CMN_PROTO_STUB(void, Bs3MemGuardedTestPageFree,(void BS3_FAR *pvGuardedPage));
 
+/**
+ * Print all heap info.
+ */
+BS3_CMN_PROTO_STUB(void, Bs3MemPrintInfo, (void));
+
 /** Highes RAM byte below 4G. */
 extern uint32_t  g_uBs3EndOfRamBelow4G;
 
@@ -2201,6 +2281,64 @@ BS3_CMN_PROTO_STUB(int, Bs3PagingAlias,(uint64_t uDst, uint64_t uPhysToAlias, ui
  */
 BS3_CMN_PROTO_STUB(int, Bs3PagingUnalias,(uint64_t uDst, uint32_t cbHowMuch));
 
+/**
+ * Get the pointer to the PTE for the given address.
+ *
+ * @returns Pointer to the PTE.
+ * @param   uFlat               The flat address of the page which PTE we want.
+ * @param   prc                 Where to return additional error info. Optional.
+ */
+BS3_CMN_PROTO_STUB(void BS3_FAR *, Bs3PagingGetPte,(uint64_t uFlat, int *prc));
+
+/**
+ * Paging information for an address.
+ */
+typedef struct BS3PAGINGINFO4ADDR
+{
+    /** The depth of the system's paging mode.
+     * This is always 2 for legacy, 3 for PAE and 4 for long mode. */
+    uint8_t             cEntries;
+    /** The size of the page structures (the entires). */
+    uint8_t             cbEntry;
+    /** Flags defined for future fun, currently zero. */
+    uint16_t            fFlags;
+    /** Union display different view on the entry pointers. */
+    union
+    {
+        /** Pointer to the page structure entries, starting with the PTE as 0.
+         * If large pages are involved, the first entry will be NULL (first two if 1GB
+         * page).  Same if the address is invalid on a higher level. */
+        uint8_t BS3_FAR    *apbEntries[4];
+        /** Alternative view for legacy mode. */
+        struct
+        {
+            X86PTE BS3_FAR *pPte;
+            X86PDE BS3_FAR *pPde;
+            void           *pvUnused2;
+            void           *pvUnused3;
+        } Legacy;
+        /** Alternative view for PAE and Long mode. */
+        struct
+        {
+            X86PTEPAE BS3_FAR *pPte;
+            X86PDEPAE BS3_FAR *pPde;
+            X86PDPE   BS3_FAR *pPdpe;
+            X86PML4E  BS3_FAR *pPml4e;
+        } Pae;
+    } u;
+} BS3PAGINGINFO4ADDR;
+/** Pointer to paging information for and address.   */
+typedef BS3PAGINGINFO4ADDR BS3_FAR *PBS3PAGINGINFO4ADDR;
+
+/**
+ * Queries paging information about the given virtual address.
+ *
+ * @returns VBox status code.
+ * @param   uFlat               The flat address to query information about.
+ * @param   pPgInfo             Where to return the information.
+ */
+BS3_CMN_PROTO_STUB(int, Bs3PagingQueryAddressInfo,(uint64_t uFlat, PBS3PAGINGINFO4ADDR pPgInfo));
+
 
 /** The physical / flat address of the buffer backing the canonical traps.
  * This buffer is spread equally on each side of the 64-bit non-canonical
@@ -2312,10 +2450,11 @@ extern uint16_t             g_cBs3PitIntervalHz;
  * @a fpfnCall with @a cbParams bytes of parameters pushed on the stack.
  * Afterwards it switches back to v8086 mode and returns a 16-bit status code.
  *
- * @returns     16-bit status code if the function returned anything.
- * @param       fpfnCall        Far real mode pointer to the function to call.
- * @param       cbParams        The size of the parameter list, in bytes.
- * @param       ...             The parameters.
+ * @returns 16-bit status code if the function returned anything.
+ * @param   fpfnCall        Far real mode pointer to the function to call.
+ * @param   cbParams        The size of the parameter list, in bytes.
+ * @param   ...             The parameters.
+ * @sa Bs3SwitchTo32BitAndCallC
  */
 BS3_CMN_PROTO_STUB(int, Bs3SwitchFromV86To16BitAndCallC,(FPFNBS3FAR fpfnCall, unsigned cbParams, ...));
 
@@ -2539,6 +2678,125 @@ BS3_CMN_PROTO_STUB(void, Bs3RegCtxSetRipCsFromCurPtr,(PBS3REGCTX pRegCtx, FPFNBS
 
 
 /**
+ * The method to be used to save and restore the extended context.
+ */
+typedef enum BS3EXTCTXMETHOD
+{
+    BS3EXTCTXMETHOD_INVALID = 0,
+    BS3EXTCTXMETHOD_ANCIENT,    /**< Ancient fnsave/frstor format. */
+    BS3EXTCTXMETHOD_FXSAVE,     /**< fxsave/fxrstor format. */
+    BS3EXTCTXMETHOD_XSAVE,      /**< xsave/xrstor format. */
+    BS3EXTCTXMETHOD_END,
+} BS3EXTCTXMETHOD;
+
+
+/**
+ * Extended CPU context (FPU, SSE, AVX, ++).
+ *
+ * @remarks Also in bs3kit.inc
+ */
+typedef struct BS3EXTCTX
+{
+    /** Dummy/magic value. */
+    uint16_t            u16Magic;
+    /** The size of the structure. */
+    uint16_t            cb;
+    /** The method used to save and restore the context (BS3EXTCTXMETHOD). */
+    uint8_t             enmMethod;
+    uint8_t             abPadding0[3];
+    /** Nominal XSAVE_C_XXX. */
+    uint64_t            fXcr0Nominal;
+    /** The saved XCR0 mask (restored after xrstor).  */
+    uint64_t            fXcr0Saved;
+
+    /** Explicit alignment padding. */
+    uint8_t             abPadding[64 - 2 - 2 - 1 - 3 - 8 - 8];
+
+    /** The context, variable size (see above).
+     * This must be aligned on a 64 byte boundrary. */
+    union
+    {
+        /** fnsave/frstor. */
+        X86FPUSTATE     Ancient;
+        /** fxsave/fxrstor   */
+        X86FXSTATE      x87;
+        /** xsave/xrstor   */
+        X86XSAVEAREA    x;
+        /** Byte array view. */
+        uint8_t         ab[sizeof(X86XSAVEAREA)];
+    } Ctx;
+} BS3EXTCTX;
+AssertCompileMemberAlignment(BS3EXTCTX, Ctx, 64);
+/** Pointer to an extended CPU context. */
+typedef BS3EXTCTX BS3_FAR *PBS3EXTCTX;
+/** Pointer to a const extended CPU context. */
+typedef BS3EXTCTX const BS3_FAR *PCBS3EXTCTX;
+
+/** Magic value for BS3EXTCTX. */
+#define BS3EXTCTX_MAGIC     UINT16_C(0x1980)
+
+/**
+ * Allocates and initializes the extended CPU context structure.
+ *
+ * @returns The new extended CPU context structure.
+ * @param   enmKind         The kind of allocation to make.
+ */
+BS3_CMN_PROTO_STUB(PBS3EXTCTX, Bs3ExtCtxAlloc,(BS3MEMKIND enmKind));
+
+/**
+ * Frees an extended CPU context structure.
+ *
+ * @param   pExtCtx         The extended CPU context (returned by
+ *                          Bs3ExtCtxAlloc).
+ */
+BS3_CMN_PROTO_STUB(void,       Bs3ExtCtxFree,(PBS3EXTCTX pExtCtx));
+
+/**
+ * Get the size required for a BS3EXTCTX structure.
+ *
+ * @returns size in bytes of the whole structure.
+ * @param   pfFlags         Where to return flags for Bs3ExtCtxInit.
+ * @note    Use Bs3ExtCtxAlloc when possible.
+ */
+BS3_CMN_PROTO_STUB(uint16_t,   Bs3ExtCtxGetSize,(uint64_t *pfFlags));
+
+/**
+ * Initializes the extended CPU context structure.
+ * @returns pExtCtx
+ * @param   pExtCtx         The extended CPU context.
+ * @param   cbExtCtx        The size of the @a pExtCtx allocation.
+ * @param   fFlags          XSAVE_C_XXX flags.
+ */
+BS3_CMN_PROTO_STUB(PBS3EXTCTX, Bs3ExtCtxInit,(PBS3EXTCTX pExtCtx, uint16_t cbExtCtx, uint64_t fFlags));
+
+/**
+ * Saves the extended CPU state to the given structure.
+ *
+ * @param   pExtCtx         The extended CPU context.
+ * @remarks All GPRs preserved.
+ */
+BS3_CMN_PROTO_FARSTUB(4, void, Bs3ExtCtxSave,(PBS3EXTCTX pExtCtx));
+
+/**
+ * Restores the extended CPU state from the given structure.
+ *
+ * @param   pExtCtx         The extended CPU context.
+ * @remarks All GPRs preserved.
+ */
+BS3_CMN_PROTO_FARSTUB(4, void, Bs3ExtCtxRestore,(PBS3EXTCTX pExtCtx));
+
+/**
+ * Copies the state from one context to another.
+ *
+ * @returns pDst
+ * @param   pDst            The destination extended CPU context.
+ * @param   pSrc            The source extended CPU context.
+ */
+BS3_CMN_PROTO_STUB(PBS3EXTCTX, Bs3ExtCtxCopy,(PBS3EXTCTX pDst, PCBS3EXTCTX pSrc));
+
+
+
+/**
  * Trap frame.
  */
 typedef struct BS3TRAPFRAME
@@ -2640,7 +2898,7 @@ BS3_CMN_PROTO_STUB(void, Bs3Trap64Init,(void));
  * Modifies the real-mode / V86 IVT entry specified by @a iIvt.
  *
  * @param   iIvt        The index of the IDT entry to set.
- * @param   uSel        The handler real-mode segment.
+ * @param   uSeg        The handler real-mode segment.
  * @param   off         The handler offset.
  */
 BS3_CMN_PROTO_STUB(void, Bs3TrapRmV86SetGate,(uint8_t iIvt, uint16_t uSeg, uint16_t off));
@@ -2879,18 +3137,21 @@ BS3_CMN_PROTO_STUB(void, Bs3TestPrintfV,(const char BS3_FAR *pszFormat, va_list
 
 /**
  * Equivalent to RTTestIFailed.
+ * @returns false.
  */
-BS3_CMN_PROTO_STUB(void, Bs3TestFailed,(const char BS3_FAR *pszMessage));
+BS3_CMN_PROTO_STUB(bool, Bs3TestFailed,(const char BS3_FAR *pszMessage));
 
 /**
  * Equivalent to RTTestIFailedF.
+ * @returns false.
  */
-BS3_CMN_PROTO_STUB(void, Bs3TestFailedF,(const char BS3_FAR *pszFormat, ...));
+BS3_CMN_PROTO_STUB(bool, Bs3TestFailedF,(const char BS3_FAR *pszFormat, ...));
 
 /**
  * Equivalent to RTTestIFailedV.
+ * @returns false.
  */
-BS3_CMN_PROTO_STUB(void, Bs3TestFailedV,(const char BS3_FAR *pszFormat, va_list va));
+BS3_CMN_PROTO_STUB(bool, Bs3TestFailedV,(const char BS3_FAR *pszFormat, va_list va));
 
 /**
  * Equivalent to RTTestISkipped.
@@ -2961,6 +3222,7 @@ typedef FNBS3TESTDOMODE            *PFNBS3TESTDOMODE;
  */
 typedef struct BS3TESTMODEENTRY
 {
+    /** The sub-test name to be passed to Bs3TestSub if not NULL. */
     const char * BS3_FAR    pszSubTest;
 
     PFNBS3TESTDOMODE        pfnDoRM;
@@ -3030,6 +3292,39 @@ typedef BS3TESTMODEENTRY const *PCBS3TESTMODEENTRY;
     FNBS3TESTDOMODE /*BS3_FAR_CODE*/    RT_CONCAT(a_BaseNm, _c32); \
     FNBS3TESTDOMODE /*BS3_FAR_CODE*/    RT_CONCAT(a_BaseNm, _c64)
 
+/** @def BS3TESTMODEENTRY_CMN_64
+ * Produces a BS3TESTMODEENTRY initializer for common 64-bit test functions. */
+#define BS3TESTMODEENTRY_CMN_64(a_szTest, a_BaseNm) \
+    {   /*pszSubTest =*/ a_szTest, \
+        /*RM*/        NULL, \
+        /*PE16*/      NULL, \
+        /*PE16_32*/   NULL, \
+        /*PE16_V86*/  NULL, \
+        /*PE32*/      NULL, \
+        /*PE32_16*/   NULL, \
+        /*PEV86*/     NULL, \
+        /*PP16*/      NULL, \
+        /*PP16_32*/   NULL, \
+        /*PP16_V86*/  NULL, \
+        /*PP32*/      NULL, \
+        /*PP32_16*/   NULL, \
+        /*PPV86*/     NULL, \
+        /*PAE16*/     NULL, \
+        /*PAE16_32*/  NULL, \
+        /*PAE16_V86*/ NULL, \
+        /*PAE32*/     NULL, \
+        /*PAE32_16*/  NULL, \
+        /*PAEV86*/    NULL, \
+        /*LM16*/      NULL, \
+        /*LM32*/      NULL, \
+        /*LM64*/      RT_CONCAT(a_BaseNm, _c64), \
+    }
+
+/** @def BS3TESTMODE_PROTOTYPES_CMN
+ * Standard protype to go with #BS3TESTMODEENTRY_CMN_64. */
+#define BS3TESTMODE_PROTOTYPES_CMN_64(a_BaseNm) \
+    FNBS3TESTDOMODE /*BS3_FAR_CODE*/    RT_CONCAT(a_BaseNm, _c64)
+
 /** @def BS3TESTMODEENTRY_MODE
  * Produces a BS3TESTMODEENTRY initializer for a full set of mode test
  * functions. */
@@ -3087,6 +3382,162 @@ typedef BS3TESTMODEENTRY const *PCBS3TESTMODEENTRY;
 
 
 /**
+ * Mode sub-test entry, max bit-count driven
+ *
+ * This is an alternative to BS3TESTMODEENTRY where a few workers (test drivers)
+ * does all the work, using faster 32-bit and 64-bit code where possible.  This
+ * avoids executing workers in V8086 mode.  It allows for modifying and checking
+ * 64-bit register content when testing LM16 and LM32.
+ *
+ * The 16-bit workers are only used for real mode and 16-bit protected mode.
+ * So, the 16-bit version of the code template can be stripped of anything
+ * related to paging and/or v8086, saving code space.
+ */
+typedef struct BS3TESTMODEBYMAXENTRY
+{
+    /** The sub-test name to be passed to Bs3TestSub if not NULL. */
+    const char * BS3_FAR    pszSubTest;
+
+    PFNBS3TESTDOMODE        pfnDoRM;
+    PFNBS3TESTDOMODE        pfnDoPE16;
+    PFNBS3TESTDOMODE        pfnDoPE16_32;
+    PFNBS3TESTDOMODE        pfnDoPE32;
+    PFNBS3TESTDOMODE        pfnDoPP16_32;
+    PFNBS3TESTDOMODE        pfnDoPP32;
+    PFNBS3TESTDOMODE        pfnDoPAE16_32;
+    PFNBS3TESTDOMODE        pfnDoPAE32;
+    PFNBS3TESTDOMODE        pfnDoLM64;
+
+    bool                    fDoRM : 1;
+
+    bool                    fDoPE16 : 1;
+    bool                    fDoPE16_32 : 1;
+    bool                    fDoPE16_V86 : 1;
+    bool                    fDoPE32 : 1;
+    bool                    fDoPE32_16 : 1;
+    bool                    fDoPEV86 : 1;
+
+    bool                    fDoPP16 : 1;
+    bool                    fDoPP16_32 : 1;
+    bool                    fDoPP16_V86 : 1;
+    bool                    fDoPP32 : 1;
+    bool                    fDoPP32_16 : 1;
+    bool                    fDoPPV86 : 1;
+
+    bool                    fDoPAE16 : 1;
+    bool                    fDoPAE16_32 : 1;
+    bool                    fDoPAE16_V86 : 1;
+    bool                    fDoPAE32 : 1;
+    bool                    fDoPAE32_16 : 1;
+    bool                    fDoPAEV86 : 1;
+
+    bool                    fDoLM16 : 1;
+    bool                    fDoLM32 : 1;
+    bool                    fDoLM64 : 1;
+
+} BS3TESTMODEBYMAXENTRY;
+/** Pointer to a mode-by-max sub-test entry. */
+typedef BS3TESTMODEBYMAXENTRY const *PCBS3TESTMODEBYMAXENTRY;
+
+/** @def BS3TESTMODEBYMAXENTRY_CMN
+ * Produces a BS3TESTMODEBYMAXENTRY initializer for common (c16,c32,c64) test
+ * functions. */
+#define BS3TESTMODEBYMAXENTRY_CMN(a_szTest, a_BaseNm) \
+    {   /*pszSubTest =*/    a_szTest, \
+        /*RM*/              RT_CONCAT(a_BaseNm, _c16), \
+        /*PE16*/            RT_CONCAT(a_BaseNm, _c16), \
+        /*PE16_32*/         RT_CONCAT(a_BaseNm, _c32), \
+        /*PE32*/            RT_CONCAT(a_BaseNm, _c32), \
+        /*PP16_32*/         RT_CONCAT(a_BaseNm, _c32), \
+        /*PP32*/            RT_CONCAT(a_BaseNm, _c32), \
+        /*PAE16_32*/        RT_CONCAT(a_BaseNm, _c32), \
+        /*PAE32*/           RT_CONCAT(a_BaseNm, _c32), \
+        /*LM64*/            RT_CONCAT(a_BaseNm, _c64), \
+        /*fDoRM*/           true, \
+        /*fDoPE16*/         true, \
+        /*fDoPE16_32*/      true, \
+        /*fDoPE16_V86*/     true, \
+        /*fDoPE32*/         true, \
+        /*fDoPE32_16*/      true, \
+        /*fDoPEV86*/        true, \
+        /*fDoPP16*/         true, \
+        /*fDoPP16_32*/      true, \
+        /*fDoPP16_V86*/     true, \
+        /*fDoPP32*/         true, \
+        /*fDoPP32_16*/      true, \
+        /*fDoPPV86*/        true, \
+        /*fDoPAE16*/        true, \
+        /*fDoPAE16_32*/     true, \
+        /*fDoPAE16_V86*/    true, \
+        /*fDoPAE32*/        true, \
+        /*fDoPAE32_16*/     true, \
+        /*fDoPAEV86*/       true, \
+        /*fDoLM16*/         true, \
+        /*fDoLM32*/         true, \
+        /*fDoLM64*/         true, \
+    }
+
+/** @def BS3TESTMODEBYMAX_PROTOTYPES_CMN
+ * A set of standard protypes to go with #BS3TESTMODEBYMAXENTRY_CMN. */
+#define BS3TESTMODEBYMAX_PROTOTYPES_CMN(a_BaseNm) \
+    FNBS3TESTDOMODE /*BS3_FAR_CODE*/    RT_CONCAT(a_BaseNm, _c16); \
+    FNBS3TESTDOMODE /*BS3_FAR_CODE*/    RT_CONCAT(a_BaseNm, _c32); \
+    FNBS3TESTDOMODE /*BS3_FAR_CODE*/    RT_CONCAT(a_BaseNm, _c64)
+
+
+/** @def BS3TESTMODEBYMAXENTRY_MODE
+ * Produces a BS3TESTMODEBYMAXENTRY initializer for a full set of mode test
+ * functions. */
+#define BS3TESTMODEBYMAXENTRY_MODE(a_szTest, a_BaseNm) \
+    {   /*pszSubTest =*/ a_szTest, \
+        /*RM*/              RT_CONCAT(a_BaseNm, _rm), \
+        /*PE16*/            RT_CONCAT(a_BaseNm, _pe16), \
+        /*PE16_32*/         RT_CONCAT(a_BaseNm, _pe16_32), \
+        /*PE32*/            RT_CONCAT(a_BaseNm, _pe32), \
+        /*PP16_32*/         RT_CONCAT(a_BaseNm, _pp16_32), \
+        /*PP32*/            RT_CONCAT(a_BaseNm, _pp32), \
+        /*PAE16_32*/        RT_CONCAT(a_BaseNm, _pae16_32), \
+        /*PAE32*/           RT_CONCAT(a_BaseNm, _pae32), \
+        /*LM64*/            RT_CONCAT(a_BaseNm, _lm64), \
+        /*fDoRM*/           true, \
+        /*fDoPE16*/         true, \
+        /*fDoPE16_32*/      true, \
+        /*fDoPE16_V86*/     true, \
+        /*fDoPE32*/         true, \
+        /*fDoPE32_16*/      true, \
+        /*fDoPEV86*/        true, \
+        /*fDoPP16*/         true, \
+        /*fDoPP16_32*/      true, \
+        /*fDoPP16_V86*/     true, \
+        /*fDoPP32*/         true, \
+        /*fDoPP32_16*/      true, \
+        /*fDoPPV86*/        true, \
+        /*fDoPAE16*/        true, \
+        /*fDoPAE16_32*/     true, \
+        /*fDoPAE16_V86*/    true, \
+        /*fDoPAE32*/        true, \
+        /*fDoPAE32_16*/     true, \
+        /*fDoPAEV86*/       true, \
+        /*fDoLM16*/         true, \
+        /*fDoLM32*/         true, \
+        /*fDoLM64*/         true, \
+    }
+
+/** @def BS3TESTMODEBYMAX_PROTOTYPES_MODE
+ * A set of standard protypes to go with #BS3TESTMODEBYMAXENTRY_MODE. */
+#define BS3TESTMODEBYMAX_PROTOTYPES_MODE(a_BaseNm) \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _rm); \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _pe16); \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _pe16_32); \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _pe32); \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _pp16_32); \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _pp32); \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _pae16_32); \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _pae32); \
+    FNBS3TESTDOMODE   RT_CONCAT(a_BaseNm, _lm64)
+
+
+/**
  * One worker drives all modes.
  *
  * This is an alternative to BS3TESTMODEENTRY where one worker, typically
@@ -3097,11 +3548,18 @@ typedef struct BS3TESTMODEBYONEENTRY
 {
     const char * BS3_FAR    pszSubTest;
     PFNBS3TESTDOMODE        pfnWorker;
-    uint32_t                u32Reserved;
+    /** BS3TESTMODEBYONEENTRY_F_XXX. */
+    uint32_t                fFlags;
 } BS3TESTMODEBYONEENTRY;
 /** Pointer to a mode-by-one sub-test entry. */
 typedef BS3TESTMODEBYONEENTRY const *PCBS3TESTMODEBYONEENTRY;
 
+/** @name BS3TESTMODEBYONEENTRY_F_XXX - flags.
+ * @{ */
+/** Only test modes that has paging enabled. */
+#define BS3TESTMODEBYONEENTRY_F_ONLY_PAGING     RT_BIT_32(0)
+/** @} */
+
 
 /**
  * Sets the full GDTR register.
@@ -3262,6 +3720,10 @@ BS3_DECL_FAR(void) Bs3InitGdt_rm_far(void);
 /** The TMPL_MODE_STR value for each mode.
  * These are all in DATA16 so they can be accessed from any code.  */
 BS3_MODE_EXPAND_EXTERN_DATA16(const char, g_szBs3ModeName, []);
+/** The TMPL_MODE_LNAME value for each mode.
+ * These are all in DATA16 so they can be accessed from any code.  */
+BS3_MODE_EXPAND_EXTERN_DATA16(const char, g_szBs3ModeNameShortLower, []);
+
 
 /**
  * Basic CPU detection.
@@ -3297,12 +3759,35 @@ BS3_MODE_PROTO_NOSB(uint8_t, Bs3CpuDetect,(void));
 #define BS3CPU_F_PSE                UINT16_C(0x0800)
 /** Flag indicating that the CPU supports long mode. */
 #define BS3CPU_F_LONG_MODE          UINT16_C(0x1000)
+/** Flag indicating that the CPU supports NX. */
+#define BS3CPU_F_NX                 UINT16_C(0x2000)
 /** @} */
 
 /** The return value of #Bs3CpuDetect_mmm. (Initial value is BS3CPU_TYPE_MASK.) */
 extern uint16_t g_uBs3CpuDetected;
 
 /**
+ * Call 32-bit prot mode C function.
+ *
+ * This switches to 32-bit mode and calls the 32-bit @a fpfnCall C code with @a
+ * cbParams on the stack, then returns in the original mode.  When called in
+ * real mode, this will switch to PE32.
+ *
+ * @returns 32-bit status code if the function returned anything.
+ * @param   fpfnCall        Address of the 32-bit C function to call.  When
+ *                          called from 16-bit code, this is a far real mode
+ *                          function pointer, i.e. as fixed up by the linker.
+ *                          In 32-bit and 64-bit code, this is a flat address.
+ * @param   cbParams        The size of the parameter list, in bytes.
+ * @param   ...             The parameters.
+ * @sa      Bs3SwitchFromV86To16BitAndCallC
+ *
+ * @remarks     WARNING! This probably doesn't work in 64-bit mode yet.
+ *                       Only tested for 16-bit real mode.
+ */
+BS3_MODE_PROTO_STUB(int32_t, Bs3SwitchTo32BitAndCallC,(FPFNBS3FAR fpfnCall, unsigned cbParams, ...));
+
+/**
  * Initializes trap handling for the current system.
  *
  * Calls the appropriate Bs3Trap16Init, Bs3Trap32Init or Bs3Trap64Init function.
@@ -3318,7 +3803,7 @@ BS3_MODE_PROTO_STUB(void, Bs3TrapInit,(void));
 BS3_MODE_PROTO_NOSB(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t cEntries));
 
 /**
- * Executes the array of tests in every possibly mode, unitifed driver.
+ * Executes the array of tests in every possibly mode, unified driver.
  *
  * This requires much less code space than Bs3TestDoModes as there is only one
  * instace of each sub-test driver code, instead of 3 (cmn) or 22 (per-mode)
@@ -3330,6 +3815,17 @@ BS3_MODE_PROTO_NOSB(void, Bs3TestDoModes,(PCBS3TESTMODEENTRY paEntries, size_t c
  */
 BS3_MODE_PROTO_NOSB(void, Bs3TestDoModesByOne,(PCBS3TESTMODEBYONEENTRY paEntries, size_t cEntries, uint32_t fFlags));
 
+/**
+ * Executes the array of tests in every possibly mode, using the max bit-count
+ * worker for each.
+ *
+ * @param   paEntries       The mode sub-test entries.
+ * @param   cEntries        The number of sub-test entries.
+ */
+BS3_MODE_PROTO_NOSB(void, Bs3TestDoModesByMax,(PCBS3TESTMODEBYMAXENTRY paEntries, size_t cEntries));
+
+
+/** @} */
 
 /** @} */
 
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.mac b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.mac
index d4e31bb..c9254eb 100644
--- a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.mac
+++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3kit.mac
@@ -212,6 +212,18 @@
   %define BS3_DATA16_WRT(a_Var)     BS3_WRT_RIP(a_Var) wrt FLAT
  %endif
 
+ ;; @def BS3_TEXT16_WRT
+ ; For accessing BS3DATA16 correctly.
+ ; @param a_Label The BS3TEXT16 label.
+ %undef BS3_TEXT16_WRT
+ %if %1 == 16
+  %define BS3_TEXT16_WRT(a_Label)   a_Label wrt CGROUP16
+ %elif %1 == 32
+  %define BS3_TEXT16_WRT(a_Label)   a_Label wrt FLAT
+ %else
+  %define BS3_TEXT16_WRT(a_Label)   BS3_WRT_RIP(a_Label) wrt FLAT
+ %endif
+
  %undef   BS3_IF_16BIT_OTHERWISE
  %if %1 == 16
   %define BS3_IF_16BIT_OTHERWISE(a_16BitExpr, a_OtherwiseExpr) a_16BitExpr
@@ -622,6 +634,19 @@ GROUP FLAT BS3FLAT
 %macro BS3_GLOBAL_NAME_EX 3
 global %1
 %1:
+%undef  BS3_LAST_LABEL
+%xdefine BS3_LAST_LABEL %1
+%endmacro
+
+;;
+; Global local label.
+;
+; This should be used when switching segments and jumping to it via a local lable.
+; It makes the lable visible to the debugger and map file.
+;
+%macro BS3_GLOBAL_LOCAL_LABEL 1
+global RT_CONCAT(BS3_LAST_LABEL,%1)
+%1:
 %endmacro
 
 ;;
@@ -1391,6 +1416,24 @@ AssertCompileSize(BS3REGCTX, 0xd0)
 %define BS3REG_CTX_F_NO_AMD64           0x10
 ;; @}
 
+
+;;
+; BS3 extended register context (FPU, SSE, AVX, ++)
+;
+struc BS3EXTCTX
+        .u16Magic       resw 1          ; uint16_t            u16Magic;
+        .cb             resw 1          ; uint16_t            cb;
+        .enmMethod      resb 1          ; uint8_t             enmMethod;
+        alignb           8
+        .fXcr0Nominal   resq 1          ; uint64_t            fXcr0Nominal;
+        .fXcr0Saved     resq 1          ; uint64_t            fXcr0Saved;
+        alignb           64
+        .Ctx            resb 512
+endstruc
+%define BS3EXTCTXMETHOD_ANCIENT     1
+%define BS3EXTCTXMETHOD_FXSAVE      2
+%define BS3EXTCTXMETHOD_XSAVE       3
+
 ;;
 ; BS3 Trap Frame.
 ;
@@ -1677,6 +1720,9 @@ endstruc
 %define BS3CPU_F_PSE                0x0800
 %define BS3CPU_F_PSE_BIT            11
 %define BS3CPU_F_LONG_MODE          0x1000
+%define BS3CPU_F_LONG_MODE_BIT      12
+%define BS3CPU_F_NX                 0x2000
+%define BS3CPU_F_NX_BIT             13
 ;; @}
 
 
diff --git a/src/VBox/ValidationKit/common/utils.py b/src/VBox/ValidationKit/common/utils.py
index 2bd9d29..b45d66b 100755
--- a/src/VBox/ValidationKit/common/utils.py
+++ b/src/VBox/ValidationKit/common/utils.py
@@ -27,7 +27,7 @@ CDDL are applicable instead of those of the GPL.
 You may elect to license modified versions of this file under the
 terms and conditions of either the GPL or the CDDL or both.
 """
-__version__ = "$Revision: 109040 $"
+__version__ = "$Revision: 114822 $"
 
 
 # Standard Python imports.
@@ -237,6 +237,13 @@ def openNoInherit(sFile, sMode = 'r'):
     try:
         from fcntl import FD_CLOEXEC, F_GETFD, F_SETFD, fcntl; # pylint: disable=F0401
     except:
+        # On windows, use the 'N' flag introduces in Visual C++ 7.0 or 7.1.
+        if getHostOs() == 'win':
+            offComma = sMode.find(',');
+            if offComma < 0:
+                return open(sFile, sMode + 'N');
+            return open(sFile, sMode[:offComma] + 'N' + sMode[offComma:]);
+        # Just in case.
         return open(sFile, sMode);
 
     oFile = open(sFile, sMode)
diff --git a/src/VBox/ValidationKit/common/webutils.py b/src/VBox/ValidationKit/common/webutils.py
index b46dcc6..46ea2fd 100755
--- a/src/VBox/ValidationKit/common/webutils.py
+++ b/src/VBox/ValidationKit/common/webutils.py
@@ -26,7 +26,7 @@ CDDL are applicable instead of those of the GPL.
 You may elect to license modified versions of this file under the
 terms and conditions of either the GPL or the CDDL or both.
 """
-__version__ = "$Revision: 109040 $"
+__version__ = "$Revision: 114790 $"
 
 # Standard Python imports.
 import os;
@@ -167,7 +167,7 @@ def downloadFile(sUrlFile, sDstFile, sLocalPrefix, fnLog, fnError = None, fNoPro
             oDst.write(oSrc.read());
             oDst.close();
             oSrc.close();
-        except Exception, oXcpt:
+        except Exception as oXcpt:
             fnError('Error downloading "%s" to "%s": %s' % (sUrlFile, sDstFile, oXcpt));
             return False;
     else:
@@ -176,7 +176,7 @@ def downloadFile(sUrlFile, sDstFile, sLocalPrefix, fnLog, fnError = None, fNoPro
         fnLog('Copying "%s" to "%s"...' % (sSrcPath, sDstFile));
         try:
             utils.copyFileSimple(sSrcPath, sDstFile);
-        except Exception, oXcpt:
+        except Exception as oXcpt:
             fnError('Error copying "%s" to "%s": %s' % (sSrcPath, sDstFile, oXcpt));
             return False;
 
diff --git a/src/VBox/ValidationKit/utils/cpu/cidet-core.cpp b/src/VBox/ValidationKit/utils/cpu/cidet-core.cpp
index 8d168fe..803dc19 100644
--- a/src/VBox/ValidationKit/utils/cpu/cidet-core.cpp
+++ b/src/VBox/ValidationKit/utils/cpu/cidet-core.cpp
@@ -1918,8 +1918,8 @@ bool CidetCoreAssembleLength(PCIDETCORE pThis)
     //uint8_t const *pbOpcode = pThis->pCurInstr->abOpcode;
     switch (pThis->pCurInstr->cbOpcode)
     {
-        case 3: off++;
-        case 2: off++;
+        case 3: off++; /* fall thru */
+        case 2: off++; /* fall thru */
         case 1: off++;
             break;
         default:
@@ -2036,8 +2036,8 @@ bool CidetCoreAssemble(PCIDETCORE pThis)
     uint8_t const *pbOpcode = pThis->pCurInstr->abOpcode;
     switch (pThis->pCurInstr->cbOpcode)
     {
-        case 3: pThis->abInstr[off++] = *pbOpcode++;
-        case 2: pThis->abInstr[off++] = *pbOpcode++;
+        case 3: pThis->abInstr[off++] = *pbOpcode++; /* fall thru */
+        case 2: pThis->abInstr[off++] = *pbOpcode++; /* fall thru */
         case 1: pThis->abInstr[off++] = *pbOpcode++;
             break;
         default:
@@ -2058,13 +2058,13 @@ bool CidetCoreAssemble(PCIDETCORE pThis)
             switch (pThis->aOperands[pThis->idxMrmRmOp].cbMemDisp)
             {
                 case 0: break;
-                case 8: pThis->abInstr[off + 3] = (uDispValue >> 56) & UINT8_C(0xff);
-                case 7: pThis->abInstr[off + 3] = (uDispValue >> 48) & UINT8_C(0xff);
-                case 6: pThis->abInstr[off + 3] = (uDispValue >> 40) & UINT8_C(0xff);
-                case 5: pThis->abInstr[off + 3] = (uDispValue >> 32) & UINT8_C(0xff);
-                case 4: pThis->abInstr[off + 3] = (uDispValue >> 24) & UINT8_C(0xff);
-                case 3: pThis->abInstr[off + 2] = (uDispValue >> 16) & UINT8_C(0xff);
-                case 2: pThis->abInstr[off + 1] = (uDispValue >>  8) & UINT8_C(0xff);
+                case 8: pThis->abInstr[off + 3] = (uDispValue >> 56) & UINT8_C(0xff); /* fall thru */
+                case 7: pThis->abInstr[off + 3] = (uDispValue >> 48) & UINT8_C(0xff); /* fall thru */
+                case 6: pThis->abInstr[off + 3] = (uDispValue >> 40) & UINT8_C(0xff); /* fall thru */
+                case 5: pThis->abInstr[off + 3] = (uDispValue >> 32) & UINT8_C(0xff); /* fall thru */
+                case 4: pThis->abInstr[off + 3] = (uDispValue >> 24) & UINT8_C(0xff); /* fall thru */
+                case 3: pThis->abInstr[off + 2] = (uDispValue >> 16) & UINT8_C(0xff); /* fall thru */
+                case 2: pThis->abInstr[off + 1] = (uDispValue >>  8) & UINT8_C(0xff); /* fall thru */
                 case 1: pThis->abInstr[off] = uDispValue & UINT8_C(0xff);
                     break;
                 default: AssertReleaseFailedReturn(false);
@@ -2083,13 +2083,13 @@ bool CidetCoreAssemble(PCIDETCORE pThis)
             uint64_t uImmValue = pThis->aOperands[iOp].uImmDispValue;
             switch (pThis->aOperands[iOp].cb)
             {
-                case 8: pThis->abInstr[off + 3] = (uImmValue >> 56) & UINT8_C(0xff);
-                case 7: pThis->abInstr[off + 3] = (uImmValue >> 48) & UINT8_C(0xff);
-                case 6: pThis->abInstr[off + 3] = (uImmValue >> 40) & UINT8_C(0xff);
-                case 5: pThis->abInstr[off + 3] = (uImmValue >> 32) & UINT8_C(0xff);
-                case 4: pThis->abInstr[off + 3] = (uImmValue >> 24) & UINT8_C(0xff);
-                case 3: pThis->abInstr[off + 2] = (uImmValue >> 16) & UINT8_C(0xff);
-                case 2: pThis->abInstr[off + 1] = (uImmValue >>  8) & UINT8_C(0xff);
+                case 8: pThis->abInstr[off + 3] = (uImmValue >> 56) & UINT8_C(0xff); /* fall thru */
+                case 7: pThis->abInstr[off + 3] = (uImmValue >> 48) & UINT8_C(0xff); /* fall thru */
+                case 6: pThis->abInstr[off + 3] = (uImmValue >> 40) & UINT8_C(0xff); /* fall thru */
+                case 5: pThis->abInstr[off + 3] = (uImmValue >> 32) & UINT8_C(0xff); /* fall thru */
+                case 4: pThis->abInstr[off + 3] = (uImmValue >> 24) & UINT8_C(0xff); /* fall thru */
+                case 3: pThis->abInstr[off + 2] = (uImmValue >> 16) & UINT8_C(0xff); /* fall thru */
+                case 2: pThis->abInstr[off + 1] = (uImmValue >>  8) & UINT8_C(0xff); /* fall thru */
                 case 1: pThis->abInstr[off] = uImmValue & UINT8_C(0xff);
                     break;
                 default: AssertReleaseFailedReturn(false);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-virtualbox/virtualbox.git



More information about the Pkg-virtualbox-commits mailing list