Index: /trunk/src/recompiler_new/COPYING.LIB
===================================================================
--- /trunk/src/recompiler_new/COPYING.LIB	(revision 13168)
+++ /trunk/src/recompiler_new/COPYING.LIB	(revision 13168)
@@ -0,0 +1,513 @@
+		  GNU LESSER GENERAL PUBLIC LICENSE
+		       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+
+		  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+  
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+			    NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
Index: /trunk/src/recompiler_new/Makefile.kmk
===================================================================
--- /trunk/src/recompiler_new/Makefile.kmk	(revision 13168)
+++ /trunk/src/recompiler_new/Makefile.kmk	(revision 13168)
@@ -0,0 +1,488 @@
+# $Id$
+## @file
+# The Recompiler Sub-Makefile.
+#
+# There are a few of complicating factors here, esp. on AMD64 systems:
+#
+#   * op.c doesn't compile work correctly with gcc 4. For this we've
+#     checked in op.S, which is the reason why we don't compile op.c
+#     directly but always compile via the assembly file.s
+#   * On 64-bit Windows we lack a compiler and have to resort to a
+#     linux cross compiler building an ELF relocatable module which
+#     we then load using a wrapper module. Thus the REM_MOD mess.
+#   * On platforms using the 64-bit GCC ABI, we're not allowed to
+#     generate non-PIC shared objects, and op.c requires the code
+#     to be non-PIC. We apply the same trick as we developed for
+#     64-bit windows.
+#
+
+#
+# Copyright (C) 2006-2007 Sun Microsystems, Inc.
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+# Clara, CA 95054 USA or visit http://www.sun.com if you need
+# additional information or have any questions.
+#
+
+
+SUB_DEPTH = ../..
+include $(KBUILD_PATH)/subheader.kmk
+
+BLDPROGS             += dyngen
+ifneq ($(or $(eq $(KBUILD_TARGET_ARCH),amd64) , $(VBOX_TARGET_MAC_OS_X_VERSION_10_5)),)
+ SYSMODS             += VBoxREM2
+ REM_MOD             += VBoxREM2
+else
+ REM_MOD             += VBoxREM
+endif
+DLLS                 += VBoxREM
+IMPORT_LIBS          += VBoxREMImp
+
+OTHER_CLEAN          += \
+	$(PATH_$(REM_MOD))/op.h \
+	$(PATH_$(REM_MOD))/opc.h \
+	$(PATH_$(REM_MOD))/gen-op.h \
+	$(PATH_$(REM_MOD))/opc.h
+
+#
+# Globals
+#
+VBOX_PATH_RECOMPILER_SRC := $(PATH_SUB_CURRENT)
+TEMPLATE_DUMMY = dummy template (move to kBuild)
+
+
+#
+# L4 must use the no-crt path because it's lacking math stuff it seems...
+# Darwin must use the non-crt path because it can't compile op.c nativly.
+# All the AMD64 target must use the no-crt path because ELF doesn't like op.c
+# when stuffed into a shared library and windows doesn't have 64-bit gcc (yet).
+#
+ifeq ($(filter-out l4 darwin freebsd,$(KBUILD_TARGET)),)
+ REM_USE_NOCRT := 1
+endif
+ifeq ($(REM_MOD),VBoxREM2)
+ REM_USE_NOCRT := 1
+endif
+
+
+#
+# The dyngen build tool.
+#
+ifeq ($(KBUILD_HOST),win)
+ dyngen_TOOL          = MINGW32
+ dyngen_SDKS          = W32API
+ # On 64-bit Windows we pretend to be 32-bit.
+ dyngen_BLD_TRG_ARCH  = x86
+ dyngen_BLD_TRG_CPU   = i386
+ dyngen_CFLAGS        = -Wall -g -fno-strict-aliasing
+ dyngen_TEMPLATE      = DUMMY
+else
+ dyngen_TEMPLATE      = VBOXBLDPROG
+endif
+dyngen_DEFS          += REM_PHYS_ADDR_IN_TLB
+ifeq ($(KBUILD_TARGET_ARCH),amd64)
+ dyngen_DEFS         += HOST_X86_64=1
+endif
+dyngen_CFLAGS        += -Wno-missing-prototypes -Wno-missing-declarations
+dyngen_INCS           = \
+	Sun \
+	target-i386 \
+	fpu \
+	.
+dyngen_SOURCES        = dyngen.c
+
+
+#
+# The VBoxREM.[dll|so|..] or VBoxREM2.rel.
+#
+$(REM_MOD)_DEFS             = IN_REM_R3 REM_INCLUDE_CPU_H
+$(REM_MOD)_DEFS            += REM_PHYS_ADDR_IN_TLB
+#$(REM_MOD)_DEFS          += DEBUG_ALL_LOGGING DEBUG_DISAS DEBUG_PCALL DEBUG_EXEC DEBUG_FLUSH DEBUG_IOPORT DEBUG_SIGNAL DEBUG_TLB_CHECK DEBUG_TB_INVALIDATE DEBUG_TLB  # Enables huge amounts of debug logging.
+
+$(REM_MOD)_INCS             = \
+	Sun \
+	Sun/crt\
+	target-i386 \
+	fpu \
+	$(PATH_$(REM_MOD)) \
+	$(PATH_ROOT)/src/VBox/VMM \
+	.
+
+$(REM_MOD)_SOURCES          = \
+	VBoxRecompiler.c \
+	cpu-exec.c \
+	exec.c \
+	translate-all.c \
+	translate-op.c \
+	fpu/softfloat-native.c \
+	target-i386/helper.c \
+	target-i386/helper2.c \
+	target-i386/translate.c
+$(REM_MOD)_SOURCES.debug = \
+	Sun/testmath.c
+ifeq ($(filter-out win os2,$(KBUILD_TARGET)),)
+ $(REM_MOD)_SOURCES        += target-i386/op.c
+ FILE_OP_OBJ                = $(PATH_$(REM_MOD))/target-i386/op.o
+else # The remaining targets can be using gcc-4 and needs checking.
+ $(REM_MOD)_SOURCES        += $(PATH_$(REM_MOD))/op.S
+ FILE_OP_OBJ                = $(PATH_$(REM_MOD))/gen/op.o
+ $(REM_MOD)_CLEAN           = $(FILE_OP_OBJ) $(PATH_$(REM_MOD))/op.S.dep
+endif
+$(REM_MOD)_SOURCES.win.x86  = $(REM_MOD).def
+ifneq ($(REM_MOD),VBoxREM2)
+ $(REM_MOD)_POST_CMDS       = $(VBOX_SIGN_IMAGE_CMDS)
+endif
+
+
+ifdef REM_USE_NOCRT
+ $(REM_MOD)_TEMPLATE        = VBOXNOCRTGAS
+ $(REM_MOD)_DEFS           += LOG_USE_C99 $(ARCH_BITS_DEFS)
+ $(REM_MOD)_CFLAGS.amd64    = -O2
+ $(REM_MOD)_CFLAGS.debug    = -O0
+ ifdef ($(KBUILD_TARGET_ARCH),x86)
+  $(REM_MOD)_CFLAGS.release+= -fomit-frame-pointer -fno-gcse
+ endif
+
+ # This doesn't fit in IPRT because it requires GAS and is LGPL.
+ $(REM_MOD)_SOURCES        += \
+	Sun/e_powl-$(KBUILD_TARGET_ARCH).S
+
+ ifeq ($(REM_MOD),VBoxREM)
+  $(REM_MOD)_LIBS           = \
+	$(PATH_LIB)/RuntimeR3NoCRTGCC$(VBOX_SUFF_LIB) \
+ 	$(LIB_VMM) \
+ 	$(LIB_RUNTIME)
+  ifeq ($(KBUILD_TARGET),l4)
+   $(REM_MOD)_LIBS          += \
+	$(L4_LIBDIR)/libuc.0.s.so
+  endif
+  $(REM_MOD)_LIBS.darwin    = \
+  	$(TARGET_VBoxREMImp)
+  $(REM_MOD)_LDFLAGS.darwin  = -read_only_relocs suppress -multiply_defined warning # -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/$(REM_MOD).dylib
+  $(REM_MOD)_POST_CMDS.darwin = install_name_tool -id $(VBOX_DYLD_EXECUTABLE_PATH)/$(REM_MOD).dylib $(out)
+  $(REM_MOD)_CFLAGS.darwin   = -fno-common -mdynamic-no-pic
+ else
+  $(REM_MOD)_LIBS           = \
+	$(PATH_LIB)/RuntimeR3NoCRTGCC$(VBOX_SUFF_LIB)
+  $(REM_MOD)_SYSSUFF        = .rel
+  $(REM_MOD)_LDFLAGS.darwin = -nostdlib -static
+  $(REM_MOD)_CFLAGS.darwin  = -fno-common -static -mno-dynamic-no-pic
+ endif
+
+else # !REM_USE_NOCRT
+
+ $(REM_MOD)_TOOL            = GXX3
+ $(REM_MOD)_TOOL.solaris    = GXX3PLAIN
+ $(REM_MOD)_TOOL.win.x86    = MINGW32
+ $(REM_MOD)_TOOL.win.amd64  = XGCCAMD64LINUX
+ $(REM_MOD)_TEMPLATE        = DUMMY
+ $(REM_MOD)_SDKS.win.x86    = W32API  							## @todo do we really need this now?
+ $(REM_MOD)_ASFLAGS         = -x assembler-with-cpp 			## @todo didn't I make this default already?
+ $(REM_MOD)_CFLAGS          = -Wall -g
+ $(REM_MOD)_CFLAGS.debug    = -O0
+ $(REM_MOD)_CFLAGS.release += -fomit-frame-pointer -fno-gcse
+ $(REM_MOD)_CFLAGS.profile  = $($(REM_MOD)_CFLAGS.release)
+ $(REM_MOD)_CFLAGS.kprofile = $($(REM_MOD)_CFLAGS.release)
+ $(REM_MOD)_CFLAGS.l4       = -nostdinc
+ ifeq ($(KBUILD_TARGET),l4)
+  $(REM_MOD)_INCS          += $(VBOX_L4_GCC3_INCS) $(L4_INCDIR)
+ endif
+
+ $(REM_MOD)_DEFS           += IN_RING3 LOG_USE_C99 $(ARCH_BITS_DEFS)
+ #$(REM_MOD)_DEFS          += DEBUG_DISAS DEBUG_PCALL DEBUG_EXEC DEBUG_FLUSH DEBUG_IOPORT DEBUG_SIGNAL DEBUG_TLB_CHECK DEBUG_TB_INVALIDATE DEBUG_TLB  # Enables huge amounts of debug logging.
+ # these defines are probably all irrelevant now:
+ $(REM_MOD)_DEFS           += _GNU_SOURCE _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE _REENTRANT
+
+ $(REM_MOD)_LDFLAGS.darwin  = -read_only_relocs suppress -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/$(REM_MOD).dylib -multiple_defined warning
+ $(REM_MOD)_LDFLAGS.l4      = -T$(L4_LIBDIR)/../main_rel.ld -nostdlib -Wl,--no-undefined
+ $(REM_MOD)_LDFLAGS.linux   = $(VBOX_LD_as_needed)
+ $(REM_MOD)_LDFLAGS.os2     = -Zomf
+ $(REM_MOD)_LDFLAGS.debug   = -g
+ $(REM_MOD)_LDFLAGS.solaris = -mimpure-text
+ ifdef VBOX_SOLARIS_10
+  $(REM_MOD)_DEFS.solaris    += HOST_SOLARIS=10
+ else # solaris 11
+  $(REM_MOD)_DEFS.solaris    += HOST_SOLARIS=11
+ endif
+ ifeq ($(KBUILD_TARGET_ARCH),amd64)
+  $(REM_MOD)_LIBS           = $(FILE_TOOL_GCC3_LIBGCC)
+ else # x86
+  $(REM_MOD)_LIBS           = \
+ 	$(LIB_VMM) \
+ 	$(LIB_RUNTIME)
+  $(REM_MOD)_LIBS.win.x86   = \
+ 	mingw32 \
+ 	user32 gdi32 winmm ws2_32 iphlpapi dxguid
+  $(REM_MOD)_LIBS.linux     = \
+ 	$(LIB_UUID) \
+ 	m \
+ 	util \
+ 	rt \
+ 	$(LIB_PTHREAD)
+  $(REM_MOD)_LIBS.l4        = \
+ 	gcc \
+ 	$(L4_LIBDIR)/libvboxserver.s.so \
+ 	$(L4_LIBDIR)/libdl.s.so \
+ 	$(L4_LIBDIR)/libuc.0.s.so
+ endif # x86
+
+endif # !REM_USE_NOCRT
+
+# Extra flags for these source modules.
+target-i386/op.c_CFLAGS         = -O2 -fno-strict-aliasing -fomit-frame-pointer -falign-functions=0 -fno-reorder-blocks -fno-optimize-sibling-calls
+target-i386/op.c_CFLAGS.x86     = -fno-gcse -fno-instrument-functions -mpreferred-stack-boundary=2
+target-i386/op.c_CFLAGS.darwin.x86 = -m128bit-long-double -mpreferred-stack-boundary=4
+target-i386/helper.c_CFLAGS.x86 = -O2 -fomit-frame-pointer -fno-strict-aliasing -fno-gcse
+cpu-exec.c_CFLAGS.x86           = -O2 -fomit-frame-pointer -fno-strict-aliasing -fno-gcse
+cpu-exec.c_CFLAGS.solaris.amd64 = -O2 -fomit-frame-pointer -fno-strict-aliasing
+
+
+translate-all.c_DEPS = \
+	$(PATH_$(REM_MOD))/op.h \
+	$(PATH_$(REM_MOD))/opc.h \
+	$(PATH_$(REM_MOD))/gen-op.h
+translate-op.c_DEPS = $(translate-all.c_DEPS)
+target-i386/translate.c_DEPS = $(translate-all.c_DEPS)
+
+
+#
+# The math testcase as a standalone program for testing and debugging purposes.
+#
+## @todo This is a bit messy because of MINGW32.
+#BLDPROGS += testmath
+testmath_TOOL           = GXX3
+testmath_TOOL.win.x86   = MINGW32
+testmath_SDKS.win.x86   = W32API
+ifeq ($(KBUILD_HOST).$(KBUILD_HOST_ARCH),win.amd64)
+ # 64-bit windows: Pretend to be 32-bit.
+ testmath_BLD_TRG       = win32
+ testmath_BLD_TRG_ARCH  = x86
+ testmath_BLD_TRG_CPU   = i386
+endif
+testmath_ASTOOL         = $(VBOX_ASTOOL)
+ifeq ($(filter-out win32 win64,$(KBUILD_HOST)),)
+ testmath_ASFLAGS        = -f win32 -DNASM_FORMAT_PE $(VBOX_ASFLAGS) -w+orphan-labels
+else
+ testmath_ASFLAGS        = -f elf -DNASM_FORMAT_ELF $(VBOX_ASFLAGS) -w+orphan-labels
+endif
+testmath_ASFLAGS.amd64  = -m amd64
+testmath_CFLAGS         = -Wall -g
+testmath_CFLAGS.release = -O3
+testmath_LDFLAGS        = -g
+testmath_DEFS           = MATHTEST_STANDALONE
+testmath_SOURCES        = Sun/testmath.c
+#testmath_SOURCES        += $(PATH_LIB)/RuntimeR3NoCRTGCC$(VBOX_SUFF_LIB)
+
+
+ifeq ($(REM_MOD),VBoxREM2)
+#
+# The VBoxREM2 wrapper.
+#
+VBoxREM_TEMPLATE       = VBOXR3
+VBoxREM_DEFS           = IN_REM_R3
+VBoxREM_SOURCES        = \
+	VBoxREMWrapper.cpp \
+	VBoxREMWrapperA.asm
+VBoxREM_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM.dylib
+VBoxREM_LIBS           = \
+	$(LIB_VMM) \
+	$(LIB_RUNTIME)
+endif
+
+
+#
+# The VBoxREM import library.
+#
+VBoxREMImp_TEMPLATE         = VBOXR3
+ifeq ($(KBUILD_TARGET),darwin)
+VBoxREMImp_INST             = $(INST_LIB)
+endif
+VBoxREMImp_SOURCES.win      = VBoxREM.def
+VBoxREMImp_SOURCES.os2      = $(PATH_VBoxREMImp)/VBoxREMOS2.def
+ifeq ($(filter win os2,$(KBUILD_TARGET)),)
+VBoxREMImp_SOURCES          = $(PATH_VBoxREMImp)/VBoxREMImp.c
+VBoxREMImp_CLEAN            = $(PATH_VBoxREMImp)/VBoxREMImp.c
+endif
+ifneq ($(filter-out darwin os2 win,$(KBUILD_TARGET)),)
+VBoxREMImp_SONAME           = VBoxREM$(SUFF_DLL)
+endif
+VBoxREMImp_LDFLAGS.darwin   = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM.dylib
+VBoxREMImp_LDFLAGS.l4       = -T$(L4_LIBDIR)/../main_rel.ld -nostdlib
+
+$$(PATH_VBoxREMImp)/VBoxREMImp.c: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.def $(VBOX_PATH_RECOMPILER_SRC)/Sun/deftoimp.sed $(MAKEFILE_CURRENT) | $$(dir $$@)
+	$(call MSG_GENERATE,,$@)
+	$(QUIET)$(APPEND) -t $@ '#ifdef VBOX_HAVE_VISIBILITY_HIDDEN'
+	$(QUIET)$(APPEND)    $@ '# define EXPORT __attribute__((visibility("default")))'
+	$(QUIET)$(APPEND)    $@ '#else'
+	$(QUIET)$(APPEND)    $@ '# define EXPORT'
+	$(QUIET)$(APPEND)    $@ '#endif'
+	$(QUIET)$(APPEND)    $@ ''
+	$(QUIET)$(SED) -f $(VBOX_PATH_RECOMPILER_SRC)/Sun/deftoimp.sed --append $@ $<
+
+$$(PATH_VBoxREMImp)/VBoxREMOS2.def: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.def $(MAKEFILE_CURRENT) | $$(dir $$@)
+	$(SED) \
+		-e 's/^[ \t][ \t]*REMR3/    _REMR3/' \
+		-e 's/\.[Dd][Ll][Ll]//' \
+		-e 's/^LIBRARY .*/LIBRARY VBoxREM INITINSTANCE TERMINSTANCE\nDATA MULTIPLE\n/' \
+		--output $@ \
+		$<
+
+include $(KBUILD_PATH)/subfooter.kmk
+
+
+#
+# Generate the op.S file somehow...
+#
+# Gathering the flags, defines and include dirs for the command is a lot
+# of work. Unfortunately, there is only a highly specialized kBuild function
+# for doing this, so we're currently left to our own devices here.
+#
+# Add something like VBOX_RECOMPILER_OP_GCC = gcc-3.4.6 to LocalConfig.kmk
+# to be 100% sure that you get a working op.S. My gcc 4.1.1 seems to work
+# fine, so feel free to try VBOX_RECOMPILER_OP_GCC = gcc.
+#
+# The op-undefined.lst is generated by finding all the undefined symbols
+# in one (or more) ELF op.o files using nm.
+#
+ifndef VBOX_RECOMPILER_OP_GCC
+ ifeq ($(KBUILD_TARGET).$(KBUILD_TARGET_ARCH),darwin.x86)
+  VBOX_RECOMPILER_OP_GCC ?= $(notdir $(firstword $(which i386-elf-gcc-3.4 i386-elf-gcc-3.4.6 i386-elf-gcc-3.4.3 i386-elf-gcc) i386-elf-gcc)) # (port install i386-gcc-elf)
+  VBOX_RECOMPILER_OP_GCC_OK := yes
+  VBOX_RECOMPILER_OP_GCC_INCS ?= $(abspath $(dir $(shell LC_ALL=C $(VBOX_RECOMPILER_OP_GCC) -print-libgcc-file-name)))/include
+ endif
+ ifndef VBOX_RECOMPILER_OP_GCC
+  VBOX_RECOMPILER_OP_GCC := $(TOOL_$(VBOX_GCC_TOOL)_CC)
+  VBOX_RECOMPILER_OP_GCC_OK := dunno
+ endif
+else
+ # If set, assume it's an OK compiler.
+ VBOX_RECOMPILER_OP_GCC_OK := yes
+endif
+
+
+# The command sans -o op.S.tmp.
+COMPILE_OP_CMDS_3 = $(VBOX_RECOMPILER_OP_GCC) \
+	-S -s \
+	$(filter-out -g -O0, \
+		$($(REM_MOD)_CFLAGS) $($(REM_MOD)_CFLAGS.$(KBUILD_TYPE)) $($(REM_MOD)_CFLAGS.$(KBUILD_TARGET)) $($(REM_MOD)_CFLAGS.$(KBUILD_TARGET_ARCH)) $($(REM_MOD)_CFLAGS.$(KBUILD_TARGET).$(KBUILD_TARGET_ARCH)) \
+		$(target-i386/op.c_CFLAGS) $(target-i386/op.c_CFLAGS.$(KBUILD_TARGET)) $(target-i386/op.c_CFLAGS.$(KBUILD_TARGET_ARCH)) $(target-i386/op.c_CFLAGS.$(KBUILD_TARGET).$(KBUILD_TARGET_ARCH)) \
+		) \
+	$(addprefix -I, $(abspathex \
+		$($(REM_MOD)_CINCS.$(KBUILD_TARGET_ARCH)) $($(REM_MOD)_CINCS.$(KBUILD_TARGET)) $($(REM_MOD)_CINCS) $(CINCS) \
+		$($(REM_MOD)_INCS.$(KBUILD_TARGET_ARCH))  $($(REM_MOD)_INCS.$(KBUILD_TARGET))  $($(REM_MOD)_INCS) $(INCS) \
+		, $($(REM_MOD)_PATH))) \
+	$(addprefix -D, \
+		$($(REM_MOD)_CDEFS.$(KBUILD_TARGET_ARCH)) $($(REM_MOD)_CDEFS.$(KBUILD_TARGET)) $($(REM_MOD)_CDEFS) $(CDEFS.$(KBUILD_TARGET)) $(CDEFS.$(KBUILD_TARGET_ARCH)) $(CDEFS.$(KBUILD_TYPE)) $(CDEFS) \
+		$($(REM_MOD)_DEFS.$(KBUILD_TARGET_ARCH))  $($(REM_MOD)_DEFS.$(KBUILD_TARGET))  $($(REM_MOD)_DEFS)  $(DEFS.$(KBUILD_TARGET))  $(DEFS.$(KBUILD_TARGET_ARCH))  $(DEFS.$(KBUILD_TYPE))  $(DEFS) \
+		) \
+	-Wp,-MD,$(PATH_$(REM_MOD))/op.S.dep \
+        -Wp,-MT,$(PATH_$(REM_MOD))/op.S \
+        -Wp,-MP \
+	$(VBOX_PATH_RECOMPILER_SRC)/target-i386/op.c
+
+# Use the right GCC includes.
+ifdef VBOX_RECOMPILER_OP_GCC_INCS
+COMPILE_OP_CMDS_2 = $(subst $(VBOX_PATH_GCC_INCS),$(VBOX_RECOMPILER_OP_GCC_INCS),$(COMPILE_OP_CMDS_3))
+else
+COMPILE_OP_CMDS_2 = $(COMPILE_OP_CMDS_3)
+endif
+
+# Drop incompatible options when using the cross-compiler on darwin.
+ifeq ($(KBUILD_TARGET),darwin)
+ ifeq ($(filter-out i386-elf-gcc%, $(VBOX_RECOMPILER_OP_GCC)),)
+  COMPILE_OP_CMDS = $(filter-out -mdynamic-no-pic -mno-dynamic-no-pic -fno-stack-protector -Wno-missing-field-initializers, $(COMPILE_OP_CMDS_2))
+ endif
+else if1of ($(KBUILD_TARGET),linux)
+ ifneq ($(TOOL_$(VBOX_GCC_TOOL)_CC),$(VBOX_RECOMPILER_OP_GCC))
+  VBOX_RECOMPILER_OP_CHECK_CC_GCC = $(shell \
+    if $(VBOX_RECOMPILER_OP_GCC) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \
+    then echo "$(1)"; \
+    else echo "$(2)"; fi; )
+  COMPILE_OP_CMDS = \
+    $(filter-out -fno-stack-protector -Wextra -Wno-missing-field-initializers, $(COMPILE_OP_CMDS_2)) \
+    $(call VBOX_RECOMPILER_OP_CHECK_CC_GCC,-fno-stack-protector)
+ endif
+endif
+COMPILE_OP_CMDS ?= $(COMPILE_OP_CMDS_2)
+
+# include the dependencies
+-include $(PATH_$(REM_MOD))/op.S.dep
+
+# The rule.
+$(PATH_$(REM_MOD))/op.S: \
+		$(VBOX_PATH_RECOMPILER_SRC)/target-i386/op.c \
+		$(VBOX_PATH_RECOMPILER_SRC)/Sun/staged-op-elf-$(KBUILD_TARGET_ARCH).S \
+		$(VBOX_PATH_RECOMPILER_SRC)/Sun/op-validate.sed \
+		$(VBOX_PATH_RECOMPILER_SRC)/Sun/op-darwin.sed \
+		$(VBOX_PATH_RECOMPILER_SRC)/Sun/op-undefined.lst \
+		$(VBOX_PATH_RECOMPILER_SRC)/Makefile.kmk \
+		$$(comp-cmds COMPILE_OP_CMDS,COMPILE_OP_CMDS_PREV,FORCE) \
+		| $(call DIRDEP,$(PATH_$(REM_MOD)))
+	$(RM) -f $@ $@.tmp $@.tmp2 $@.dep
+ifeq ($(VBOX_RECOMPILER_OP_GCC_OK),yes)
+	$(call MSG_COMPILE,VBoxREM,$<,$@,AS)
+	$(addsuffix $(SP)\$(NL)$(TAB)  ,$(COMPILE_OP_CMDS)) -o $@.tmp
+else ifeq ($(VBOX_RECOMPILER_OP_GCC_OK),dunno) # (permit 3.x.x and 4.1.x+ for now)
+	major_ver=`$(VBOX_RECOMPILER_OP_GCC) -dumpversion | $(SED) -e 's/^\([2-9]\)\..*$$/\1/'`; \
+	minor_ver=`$(VBOX_RECOMPILER_OP_GCC) -dumpversion | $(SED) -e 's/^[2-9]\.\([0-9]\)\..*$$/\1/'`; \
+	bugfix_ver=`$(VBOX_RECOMPILER_OP_GCC) -dumpversion | $(SED) -e 's/^[2-9]\.[0-9]\.\([0-9]\).*$$/\1/'`; \
+	if test "$$major_ver" = "3" -o "(" "$$major_ver" = "4" -a "$$minor_ver" != "0" ")"; then \
+		$(ECHO_EXT) "Compiling $< => $@ [gcc v$${major_ver}.$${minor_ver}.$${bugfix_ver}]" && \
+		$(addsuffix $(SP)\$(NL)$(TAB)$(TAB)  ,$(COMPILE_OP_CMDS)) -o $@.tmp; \
+	else \
+		$(ECHO_EXT) "Using staged op.S [gcc v$${major_ver}.$${minor_ver}.$${bugfix_ver}]" && \
+		$(CP_EXT) -f $(VBOX_PATH_RECOMPILER_SRC)/Sun/staged-op-elf-$(KBUILD_TARGET_ARCH).S $@.tmp; \
+	fi
+else
+	$(CP) $(VBOX_PATH_RECOMPILER_SRC)/Sun/staged-op-elf-$(KBUILD_TARGET_ARCH).S $@.tmp
+endif
+	$(SED) -f $(VBOX_PATH_RECOMPILER_SRC)/Sun/op-validate.sed $@.tmp
+ifeq ($(KBUILD_TARGET),darwin)
+	$(SED) -f $(VBOX_PATH_RECOMPILER_SRC)/Sun/op-darwin.sed $@.tmp > $@.tmp2
+	$(SED) -e 's/^\(.*\)$$/#define \1 _\1/' $(VBOX_PATH_RECOMPILER_SRC)/Sun/op-undefined.lst > $@.tmp
+	$(CAT_EXT) $@.tmp2 >> $@.tmp
+endif
+	$(MV) -f $@.tmp $@
+	$(QUIET2)$(APPEND) "$@.dep"
+	$(QUIET2)$(APPEND) "$@.dep" 'define COMPILE_OP_CMDS_PREV'
+	$(QUIET2)$(APPEND) "$@.dep" '$(subst $(NL),'$(NL)$(TAB)@$(APPEND) "$@.dep" ',$(COMPILE_OP_CMDS))'
+	$(QUIET2)$(APPEND) "$@.dep" 'endef'
+
+
+# Hack for crosscompiling.
+DYNGEN = $(PATH_dyngen)/dyngen$(HOSTSUFF_EXE)
+DYNGEN_EXEC = $(DYNGEN)
+ifneq ($(KBUILD_HOST),$(KBUILD_TARGET)) # hack for crosscompiling.
+ ifeq ($(KBUILD_TARGET),win)
+  DYNGEN       = $(PATH_dyngen)/dyngen.exe
+  DYNGEN_EXEC := $(EXEC_X86_WIN32) $(DYNGEN_EXEC)
+ endif
+endif
+
+# The dyngen rules.
+$(PATH_$(REM_MOD))/op.h:     $(FILE_OP_OBJ) $(DYNGEN)
+	$(call MSG_TOOL,dyngen,VBoxREM,$<,$@)
+	$(QUIET)$(DYNGEN_EXEC) -o $@ $<
+
+$(PATH_$(REM_MOD))/opc.h:    $(FILE_OP_OBJ) $(DYNGEN)
+	$(call MSG_TOOL,dyngen,VBoxREM,$<,$@)
+	$(QUIET)$(DYNGEN_EXEC) -c -o $@ $<
+
+$(PATH_$(REM_MOD))/gen-op.h: $(FILE_OP_OBJ) $(DYNGEN)
+	$(call MSG_TOOL,dyngen,VBoxREM,$<,$@)
+	$(QUIET)$(DYNGEN_EXEC) -g -o $@ $<
+
+
+# Some aliases
+do_dyngen: $(PATH_$(REM_MOD))/gen-op.h $(PATH_$(REM_MOD))/opc.h $(PATH_$(REM_MOD))/op.h
+importlib: $(LIB_REM)
+op.S: $(PATH_$(REM_MOD))/op.S
+
Index: /trunk/src/recompiler_new/Sun/config-host.h
===================================================================
--- /trunk/src/recompiler_new/Sun/config-host.h	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/config-host.h	(revision 13168)
@@ -0,0 +1,46 @@
+/* $Id$ */
+/** @file
+ * Sun host config - maintained by hand
+ */
+
+/*
+ * Copyright (C) 2006-2007 Sun Microsystems, Inc.
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+ * Clara, CA 95054 USA or visit http://www.sun.com if you need
+ * additional information or have any questions.
+ */
+
+
+#if defined(RT_ARCH_AMD64) || defined(HOST_X86_64) /* The latter, for dyngen when cross compiling (windows, l4, etc). */
+# define HOST_X86_64 1
+# define HOST_LONG_BITS 64
+#else
+# define HOST_I386 1
+# define HOST_LONG_BITS 32
+# ifdef RT_OS_WINDOWS
+#  define CONFIG_WIN32 1
+# elif defined(RT_OS_OS2)
+#  define CONFIG_OS2
+# elif defined(RT_OS_DARWIN)
+#  define CONFIG_DARWIN
+# elif defined(RT_OS_FREEBSD) || defined(RT_OS_NETBSD) || defined(RT_OS_OPENBSD)
+/*#  define CONFIG_BSD*/
+# elif defined(RT_OS_SOLARIS)
+#  define CONFIG_SOLARIS
+# elif !defined(IPRT_NO_CRT)
+#  define HAVE_BYTESWAP_H 1
+# endif
+#endif
+#define QEMU_VERSION "0.8.1"
+#define CONFIG_UNAME_RELEASE ""
+#define CONFIG_QEMU_SHAREDIR "."
+
Index: /trunk/src/recompiler_new/Sun/config.h
===================================================================
--- /trunk/src/recompiler_new/Sun/config.h	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/config.h	(revision 13168)
@@ -0,0 +1,30 @@
+/* $Id$ */
+/** @file
+ * Sun config - Maintained by hand
+ */
+
+/*
+ * Copyright (C) 2006-2007 Sun Microsystems, Inc.
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+ * Clara, CA 95054 USA or visit http://www.sun.com if you need
+ * additional information or have any questions.
+ */
+
+#include "config-host.h"
+#define CONFIG_QEMU_PREFIX "/usr/gnemul/qemu-i386"
+#define TARGET_ARCH "i386"
+#define TARGET_I386 1
+#define CONFIG_SOFTMMU 1
+
+#ifdef VBOX_WITH_64_BITS_GUESTS
+#define TARGET_X86_64
+#endif
Index: /trunk/src/recompiler_new/Sun/crt/stdio.h
===================================================================
--- /trunk/src/recompiler_new/Sun/crt/stdio.h	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/crt/stdio.h	(revision 13168)
@@ -0,0 +1,74 @@
+/* $Id$ */
+/** @file
+ * Our minimal stdio
+ */
+
+/*
+ * Copyright (C) 2006-2007 Sun Microsystems, Inc.
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+ * Clara, CA 95054 USA or visit http://www.sun.com if you need
+ * additional information or have any questions.
+ */
+
+#ifndef ___Sun_stdio_h
+#define ___Sun_stdio_h
+
+#ifndef LOG_GROUP
+# define UNDO_LOG_GROUP
+#endif
+
+#include <VBox/log.h>
+
+#ifdef UNDO_LOG_GROUP
+# undef UNDO_LOG_GROUP
+# undef LOG_GROUP
+#endif
+
+#ifndef LOG_USE_C99
+# error "LOG_USE_C99 isn't defined."
+#endif
+
+__BEGIN_DECLS
+
+typedef struct FILE FILE;
+
+#if defined(RT_OS_SOLARIS)
+/** @todo Check solaris' floatingpoint.h as to why we do this */
+# define _FILEDEFED
+#endif
+
+DECLINLINE(int) fprintf(FILE *ignored, const char *pszFormat, ...)
+{
+/** @todo We don't support wrapping calls taking a va_list yet. It's not worth it yet,
+ * since there are only a couple of cases where this fprintf implementation is used.
+ * (The macro below will deal with the majority of the fprintf calls.) */
+#if 0 /*def LOG_ENABLED*/
+    if (LogIsItEnabled(NULL, 0, LOG_GROUP_REM_PRINTF))
+    {
+        va_list va;
+        va_start(va, pszFormat);
+        RTLogLoggerExV(NULL, 0, LOG_GROUP_REM_PRINTF, pszFormat, va);
+        va_end(va);
+    }
+#endif
+    return 0;
+}
+
+#define fflush(file)            RTLogFlush(NULL)
+#define printf(...)             LogIt(LOG_INSTANCE, 0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__))
+#define fprintf(logfile, ...)   LogIt(LOG_INSTANCE, 0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__))
+
+
+__END_DECLS
+
+#endif
+
Index: /trunk/src/recompiler_new/Sun/deftoimp.sed
===================================================================
--- /trunk/src/recompiler_new/Sun/deftoimp.sed	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/deftoimp.sed	(revision 13168)
@@ -0,0 +1,42 @@
+# $Id$
+## @file
+# SED script for generating a dummy .so from a windows .def file.
+#
+
+#
+#
+# Copyright (C) 2006-2007 Sun Microsystems, Inc.
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+# Clara, CA 95054 USA or visit http://www.sun.com if you need
+# additional information or have any questions.
+#
+
+s/;.*$//g
+s/^[[:space:]][[:space:]]*//g
+s/[[:space:]][[:space:]]*$//g
+/^$/d
+
+# Handle text after EXPORTS
+/EXPORTS/,//{
+s/^EXPORTS$//
+/^$/b end
+
+s/^\(.*\)$/EXPORT\nvoid \1(void);\nvoid \1(void){}/
+b end
+}
+d
+b end
+
+
+# next expression
+:end
+
Index: /trunk/src/recompiler_new/Sun/e_powl-amd64.S
===================================================================
--- /trunk/src/recompiler_new/Sun/e_powl-amd64.S	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/e_powl-amd64.S	(revision 13168)
@@ -0,0 +1,362 @@
+/* ix87 specific implementation of pow function.
+   Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Founda
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/*#include <machine/asm.h>*/
+#include <iprt/cdefs.h>
+
+#define ALIGNARG(log2) 1<<log2
+#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg;
+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
+#define ASM_GLOBAL_DIRECTIVE .global
+
+#define C_LABEL(name)		name:
+#define C_SYMBOL_NAME(name) name
+
+#define	ENTRY(name)							      \
+  ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);				      \
+  ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)			      \
+  .align ALIGNARG(4);							      \
+  C_LABEL(name)
+
+#undef	END
+#define END(name)							      \
+  ASM_SIZE_DIRECTIVE(name)
+
+
+#ifdef __ELF__
+	.section .rodata
+#else
+	.text
+#endif
+
+	.align ALIGNARG(4)
+	ASM_TYPE_DIRECTIVE(infinity,@object)
+inf_zero:
+infinity:
+	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
+	ASM_SIZE_DIRECTIVE(infinity)
+	ASM_TYPE_DIRECTIVE(zero,@object)
+zero:	.double 0.0
+	ASM_SIZE_DIRECTIVE(zero)
+	ASM_TYPE_DIRECTIVE(minf_mzero,@object)
+minf_mzero:
+minfinity:
+	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
+mzero:
+	.byte 0, 0, 0, 0, 0, 0, 0, 0x80
+	ASM_SIZE_DIRECTIVE(minf_mzero)
+	ASM_TYPE_DIRECTIVE(one,@object)
+one:	.double 1.0
+	ASM_SIZE_DIRECTIVE(one)
+	ASM_TYPE_DIRECTIVE(limit,@object)
+limit:	.double 0.29
+	ASM_SIZE_DIRECTIVE(limit)
+	ASM_TYPE_DIRECTIVE(p63,@object)
+p63:
+	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+	ASM_SIZE_DIRECTIVE(p63)
+
+//#ifdef PIC
+//#define MO(op) op##(%rip)
+//#else
+#define MO(op) op
+//#endif
+
+	.text
+/*ENTRY(__ieee754_powl)*/
+ENTRY(RT_NOCRT(powl))
+
+	fldt	24(%rsp)	// y
+	fxam
+
+
+	fnstsw
+	movb	%ah, %dl
+	andb	$0x45, %ah
+	cmpb	$0x40, %ah	// is y == 0 ?
+	je	11f
+
+	cmpb	$0x05, %ah	// is y == ±inf ?
+	je	12f
+
+	cmpb	$0x01, %ah	// is y == NaN ?
+	je	30f
+
+	fldt	8(%rsp)		// x : y
+
+	fxam
+	fnstsw
+	movb	%ah, %dh
+	andb	$0x45, %ah
+	cmpb	$0x40, %ah
+	je	20f		// x is ±0
+
+	cmpb	$0x05, %ah
+	je	15f		// x is ±inf
+
+	fxch			// y : x
+
+	/* fistpll raises invalid exception for |y| >= 1L<<63.  */
+	fldl	MO(p63)		// 1L<<63 : y : x
+	fld	%st(1)		// y : 1L<<63 : y : x
+	fabs			// |y| : 1L<<63 : y : x
+	fcomip	%st(1), %st	// 1L<<63 : y : x
+	fstp	%st(0)		// y : x
+	jnc	2f
+
+	/* First see whether `y' is a natural number.  In this case we
+	   can use a more precise algorithm.  */
+	fld	%st		// y : y : x
+	fistpll	-8(%rsp)	// y : x
+	fildll	-8(%rsp)	// int(y) : y : x
+	fucomip	%st(1),%st	// y : x
+	jne	2f
+
+	/* OK, we have an integer value for y.  */
+	mov	-8(%rsp),%eax
+	mov	-4(%rsp),%edx
+	orl	$0, %edx
+	fstp	%st(0)		// x
+	jns	4f		// y >= 0, jump
+	fdivrl	MO(one)		// 1/x		(now referred to as x)
+	negl	%eax
+	adcl	$0, %edx
+	negl	%edx
+4:	fldl	MO(one)		// 1 : x
+	fxch
+
+6:	shrdl	$1, %edx, %eax
+	jnc	5f
+	fxch
+	fmul	%st(1)		// x : ST*x
+	fxch
+5:	fmul	%st(0), %st	// x*x : ST*x
+	shrl	$1, %edx
+	movl	%eax, %ecx
+	orl	%edx, %ecx
+	jnz	6b
+	fstp	%st(0)		// ST*x
+	ret
+
+	/* y is ±NAN */
+30:	fldt	8(%rsp)		// x : y
+	fldl	MO(one)		// 1.0 : x : y
+	fucomip	%st(1),%st	// x : y
+	je	31f
+	fxch			// y : x
+31:	fstp	%st(1)
+	ret
+
+	.align ALIGNARG(4)
+2:	/* y is a real number.  */
+	fxch			// x : y
+	fldl	MO(one)		// 1.0 : x : y
+	fld	%st(1)		// x : 1.0 : x : y
+	fsub	%st(1)		// x-1 : 1.0 : x : y
+	fabs			// |x-1| : 1.0 : x : y
+	fcompl	MO(limit)	// 1.0 : x : y
+	fnstsw
+	fxch			// x : 1.0 : y
+	test	$4500,%eax
+	jz	7f
+	fsub	%st(1)		// x-1 : 1.0 : y
+	fyl2xp1			// log2(x) : y
+	jmp	8f
+
+7:	fyl2x			// log2(x) : y
+8:	fmul	%st(1)		// y*log2(x) : y
+	fxam
+	fnstsw
+	andb	$0x45, %ah
+	cmpb	$0x05, %ah      // is y*log2(x) == ±inf ?
+	je	28f
+	fst	%st(1)		// y*log2(x) : y*log2(x)
+	frndint			// int(y*log2(x)) : y*log2(x)
+	fsubr	%st, %st(1)	// int(y*log2(x)) : fract(y*log2(x))
+	fxch			// fract(y*log2(x)) : int(y*log2(x))
+	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
+	faddl	MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
+	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
+	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
+	ret
+
+28:	fstp	%st(1)		// y*log2(x)
+	fldl	MO(one)		// 1 : y*log2(x)
+	fscale			// 2^(y*log2(x)) : y*log2(x)
+	fstp	%st(1)		// 2^(y*log2(x))
+	ret
+
+	// pow(x,±0) = 1
+	.align ALIGNARG(4)
+11:	fstp	%st(0)		// pop y
+	fldl	MO(one)
+	ret
+
+	// y == ±inf
+	.align ALIGNARG(4)
+12:	fstp	%st(0)		// pop y
+	fldt	8(%rsp)		// x
+	fabs
+	fcompl	MO(one)		// < 1, == 1, or > 1
+	fnstsw
+	andb	$0x45, %ah
+	cmpb	$0x45, %ah
+	je	13f		// jump if x is NaN
+
+	cmpb	$0x40, %ah
+	je	14f		// jump if |x| == 1
+
+	shlb	$1, %ah
+	xorb	%ah, %dl
+	andl	$2, %edx
+#ifdef PIC
+	lea	inf_zero(%rip),%rcx
+	fldl	(%rcx, %rdx, 4)
+#else
+	fldl	inf_zero(,%rdx, 4)
+#endif
+	ret
+
+	.align ALIGNARG(4)
+14:	fldl	MO(one)
+	ret
+
+	.align ALIGNARG(4)
+13:	fldt	8(%rsp)		// load x == NaN
+	ret
+
+	.align ALIGNARG(4)
+	// x is ±inf
+15:	fstp	%st(0)		// y
+	testb	$2, %dh
+	jz	16f		// jump if x == +inf
+
+	// We must find out whether y is an odd integer.
+	fld	%st		// y : y
+	fistpll	-8(%rsp)	// y
+	fildll	-8(%rsp)	// int(y) : y
+	fucomip %st(1),%st
+	ffreep	%st		// <empty>
+	jne	17f
+
+	// OK, the value is an integer, but is it odd?
+	mov	-8(%rsp), %eax
+	mov	-4(%rsp), %edx
+	andb	$1, %al
+	jz	18f		// jump if not odd
+	// It's an odd integer.
+	shrl	$31, %edx
+#ifdef PIC
+	lea	minf_mzero(%rip),%rcx
+	fldl	(%rcx, %rdx, 8)
+#else
+	fldl	minf_mzero(,%rdx, 8)
+#endif
+	ret
+
+	.align ALIGNARG(4)
+16:	fcompl	MO(zero)
+	fnstsw
+	shrl	$5, %eax
+	andl	$8, %eax
+#ifdef PIC
+	lea	inf_zero(%rip),%rcx
+	fldl	(%rcx, %rax, 1)
+#else
+	fldl	inf_zero(,%rax, 1)
+#endif
+	ret
+
+	.align ALIGNARG(4)
+17:	shll	$30, %edx	// sign bit for y in right position
+18:	shrl	$31, %edx
+#ifdef PIC
+	lea	inf_zero(%rip),%rcx
+	fldl	(%rcx, %rdx, 8)
+#else
+	fldl	inf_zero(,%rdx, 8)
+#endif
+	ret
+
+	.align ALIGNARG(4)
+	// x is ±0
+20:	fstp	%st(0)		// y
+	testb	$2, %dl
+	jz	21f		// y > 0
+
+	// x is ±0 and y is < 0.  We must find out whether y is an odd integer.
+	testb	$2, %dh
+	jz	25f
+
+	fld	%st		// y : y
+	fistpll	-8(%rsp)	// y
+	fildll	-8(%rsp)	// int(y) : y
+	fucomip	%st(1),%st
+	ffreep	%st		// <empty>
+	jne	26f
+
+	// OK, the value is an integer, but is it odd?
+	mov	-8(%rsp),%eax
+	mov	-4(%rsp),%edx
+	andb	$1, %al
+	jz	27f		// jump if not odd
+	// It's an odd integer.
+	// Raise divide-by-zero exception and get minus infinity value.
+	fldl	MO(one)
+	fdivl	MO(zero)
+	fchs
+	ret
+
+25:	fstp	%st(0)
+26:
+27:	// Raise divide-by-zero exception and get infinity value.
+	fldl	MO(one)
+	fdivl	MO(zero)
+	ret
+
+	.align ALIGNARG(4)
+	// x is ±0 and y is > 0.  We must find out whether y is an odd integer.
+21:	testb	$2, %dh
+	jz	22f
+
+	fld	%st		// y : y
+	fistpll	-8(%rsp)	// y
+	fildll	-8(%rsp)	// int(y) : y
+	fucomip %st(1),%st
+	ffreep	%st		// <empty>
+	jne	23f
+
+	// OK, the value is an integer, but is it odd?
+	mov	-8(%rsp),%eax
+	mov	-4(%rsp),%edx
+	andb	$1, %al
+	jz	24f		// jump if not odd
+	// It's an odd integer.
+	fldl	MO(mzero)
+	ret
+
+22:	fstp	%st(0)
+23:
+24:	fldl	MO(zero)
+	ret
+
+/*END(__ieee754_powl)*/
+END(RT_NOCRT(powl))
+
Index: /trunk/src/recompiler_new/Sun/e_powl-x86.S
===================================================================
--- /trunk/src/recompiler_new/Sun/e_powl-x86.S	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/e_powl-x86.S	(revision 13168)
@@ -0,0 +1,404 @@
+/* ix87 specific implementation of pow function.
+   Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004, 2005
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/*#include <machine/asm.h>*/
+#include <iprt/cdefs.h>
+
+#ifdef __MINGW32__
+# define ASM_TYPE_DIRECTIVE(name,typearg) 
+# define ASM_SIZE_DIRECTIVE(name) 
+# define cfi_adjust_cfa_offset(a)
+# define C_LABEL(name)       _ ## name:
+# define C_SYMBOL_NAME(name) _ ## name
+# define ASM_GLOBAL_DIRECTIVE .global
+# define ALIGNARG(log2) 1<<log2
+#elif __APPLE__
+# define ASM_TYPE_DIRECTIVE(name,typearg) 
+# define ASM_SIZE_DIRECTIVE(name) 
+# define cfi_adjust_cfa_offset(a)
+# define C_LABEL(name)       _ ## name:
+# define C_SYMBOL_NAME(name) _ ## name
+# define ASM_GLOBAL_DIRECTIVE .globl
+# define ALIGNARG(log2) log2
+#else
+# define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg;
+# define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
+# define C_LABEL(name)		name:
+# define C_SYMBOL_NAME(name) name
+# /* figure this one out. */
+# define cfi_adjust_cfa_offset(a)
+# define ASM_GLOBAL_DIRECTIVE .global
+# define ALIGNARG(log2) 1<<log2
+#endif
+
+#define	ENTRY(name)							      \
+  ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);				      \
+  ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)			      \
+  .align ALIGNARG(4);							      \
+  C_LABEL(name)
+
+#undef	END
+#define END(name)							      \
+  ASM_SIZE_DIRECTIVE(name)
+
+#ifdef __ELF__
+	.section .rodata
+#else
+	.text
+#endif
+
+	.align ALIGNARG(4)
+	ASM_TYPE_DIRECTIVE(infinity,@object)
+inf_zero:
+infinity:
+	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
+	ASM_SIZE_DIRECTIVE(infinity)
+	ASM_TYPE_DIRECTIVE(zero,@object)
+zero:	.double 0.0
+	ASM_SIZE_DIRECTIVE(zero)
+	ASM_TYPE_DIRECTIVE(minf_mzero,@object)
+minf_mzero:
+minfinity:
+	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
+mzero:
+	.byte 0, 0, 0, 0, 0, 0, 0, 0x80
+	ASM_SIZE_DIRECTIVE(minf_mzero)
+	ASM_TYPE_DIRECTIVE(one,@object)
+one:	.double 1.0
+	ASM_SIZE_DIRECTIVE(one)
+	ASM_TYPE_DIRECTIVE(limit,@object)
+limit:	.double 0.29
+	ASM_SIZE_DIRECTIVE(limit)
+	ASM_TYPE_DIRECTIVE(p63,@object)
+p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+	ASM_SIZE_DIRECTIVE(p63)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%ecx)
+#define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
+#else
+#define MO(op) op
+#define MOX(op,x,f) op(,x,f)
+#endif
+
+	.text
+//ENTRY(__ieee754_powl)
+ENTRY(RT_NOCRT(powl))
+#ifdef RT_OS_DARWIN /* 16-byte long double with 8 byte alignment requirements */
+	fldt	20(%esp)	// y
+#else
+	fldt	16(%esp)	// y
+#endif
+	fxam
+
+#ifdef	PIC
+	LOAD_PIC_REG (cx)
+#endif
+
+	fnstsw
+	movb	%ah, %dl
+	andb	$0x45, %ah
+	cmpb	$0x40, %ah	// is y == 0 ?
+	je	.L11
+
+	cmpb	$0x05, %ah	// is y == ±inf ?
+	je	.L12
+
+	cmpb	$0x01, %ah	// is y == NaN ?
+	je	.L30
+
+	fldt	4(%esp)		// x : y
+
+	subl	$8,%esp
+	cfi_adjust_cfa_offset (8)
+
+	fxam
+	fnstsw
+	movb	%ah, %dh
+	andb	$0x45, %ah
+	cmpb	$0x40, %ah
+	je	.L20		// x is ±0
+
+	cmpb	$0x05, %ah
+	je	.L15		// x is ±inf
+
+	fxch			// y : x
+
+	/* fistpll raises invalid exception for |y| >= 1L<<63.  */
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p63)		// y : x
+	fnstsw
+	sahf
+	jnc	.L2
+
+	/* First see whether `y' is a natural number.  In this case we
+	   can use a more precise algorithm.  */
+	fld	%st		// y : y : x
+	fistpll	(%esp)		// y : x
+	fildll	(%esp)		// int(y) : y : x
+	fucomp	%st(1)		// y : x
+	fnstsw
+	sahf
+	jne	.L2
+
+	/* OK, we have an integer value for y.  */
+	popl	%eax
+	cfi_adjust_cfa_offset (-4)
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	orl	$0, %edx
+	fstp	%st(0)		// x
+	jns	.L4		// y >= 0, jump
+	fdivrl	MO(one)		// 1/x		(now referred to as x)
+	negl	%eax
+	adcl	$0, %edx
+	negl	%edx
+.L4:	fldl	MO(one)		// 1 : x
+	fxch
+
+.L6:	shrdl	$1, %edx, %eax
+	jnc	.L5
+	fxch
+	fmul	%st(1)		// x : ST*x
+	fxch
+.L5:	fmul	%st(0), %st	// x*x : ST*x
+	shrl	$1, %edx
+	movl	%eax, %ecx
+	orl	%edx, %ecx
+	jnz	.L6
+	fstp	%st(0)		// ST*x
+	ret
+
+	/* y is ±NAN */
+.L30:	fldt	4(%esp)		// x : y
+	fldl	MO(one)		// 1.0 : x : y
+	fucomp	%st(1)		// x : y
+	fnstsw
+	sahf
+	je	.L31
+	fxch			// y : x
+.L31:	fstp	%st(1)
+	ret
+
+	cfi_adjust_cfa_offset (8)
+	.align ALIGNARG(4)
+.L2:	/* y is a real number.  */
+	fxch			// x : y
+	fldl	MO(one)		// 1.0 : x : y
+	fld	%st(1)		// x : 1.0 : x : y
+	fsub	%st(1)		// x-1 : 1.0 : x : y
+	fabs			// |x-1| : 1.0 : x : y
+	fcompl	MO(limit)	// 1.0 : x : y
+	fnstsw
+	fxch			// x : 1.0 : y
+	sahf
+	ja	.L7
+	fsub	%st(1)		// x-1 : 1.0 : y
+	fyl2xp1			// log2(x) : y
+	jmp	.L8
+
+.L7:	fyl2x			// log2(x) : y
+.L8:	fmul	%st(1)		// y*log2(x) : y
+	fxam
+	fnstsw
+	andb	$0x45, %ah
+	cmpb	$0x05, %ah	// is y*log2(x) == ±inf ?
+	je	.L28
+	fst	%st(1)		// y*log2(x) : y*log2(x)
+	frndint			// int(y*log2(x)) : y*log2(x)
+	fsubr	%st, %st(1)	// int(y*log2(x)) : fract(y*log2(x))
+	fxch			// fract(y*log2(x)) : int(y*log2(x))
+	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
+	faddl	MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
+	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
+	addl	$8, %esp
+	cfi_adjust_cfa_offset (-8)
+	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
+	ret
+
+	cfi_adjust_cfa_offset (8)
+.L28:	fstp	%st(1)		// y*log2(x)
+	fldl	MO(one)		// 1 : y*log2(x)
+	fscale			// 2^(y*log2(x)) : y*log2(x)
+	addl	$8, %esp
+	cfi_adjust_cfa_offset (-8)
+	fstp	%st(1)		// 2^(y*log2(x))
+	ret
+
+	// pow(x,±0) = 1
+	.align ALIGNARG(4)
+.L11:	fstp	%st(0)		// pop y
+	fldl	MO(one)
+	ret
+
+	// y == ±inf
+	.align ALIGNARG(4)
+.L12:	fstp	%st(0)		// pop y
+	fldt	4(%esp)		// x
+	fabs
+	fcompl	MO(one)		// < 1, == 1, or > 1
+	fnstsw
+	andb	$0x45, %ah
+	cmpb	$0x45, %ah
+	je	.L13		// jump if x is NaN
+
+	cmpb	$0x40, %ah
+	je	.L14		// jump if |x| == 1
+
+	shlb	$1, %ah
+	xorb	%ah, %dl
+	andl	$2, %edx
+	fldl	MOX(inf_zero, %edx, 4)
+	ret
+
+	.align ALIGNARG(4)
+.L14:	fldl	MO(one)
+	ret
+
+	.align ALIGNARG(4)
+.L13:	fldt	4(%esp)		// load x == NaN
+	ret
+
+	cfi_adjust_cfa_offset (8)
+	.align ALIGNARG(4)
+	// x is ±inf
+.L15:	fstp	%st(0)		// y
+	testb	$2, %dh
+	jz	.L16		// jump if x == +inf
+
+	// We must find out whether y is an odd integer.
+	fld	%st		// y : y
+	fistpll	(%esp)		// y
+	fildll	(%esp)		// int(y) : y
+	fucompp			// <empty>
+	fnstsw
+	sahf
+	jne	.L17
+
+	// OK, the value is an integer, but is it odd?
+	popl	%eax
+	cfi_adjust_cfa_offset (-4)
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	andb	$1, %al
+	jz	.L18		// jump if not odd
+	// It's an odd integer.
+	shrl	$31, %edx
+	fldl	MOX(minf_mzero, %edx, 8)
+	ret
+
+	cfi_adjust_cfa_offset (8)
+	.align ALIGNARG(4)
+.L16:	fcompl	MO(zero)
+	addl	$8, %esp
+	cfi_adjust_cfa_offset (-8)
+	fnstsw
+	shrl	$5, %eax
+	andl	$8, %eax
+	fldl	MOX(inf_zero, %eax, 1)
+	ret
+
+	cfi_adjust_cfa_offset (8)
+	.align ALIGNARG(4)
+.L17:	shll	$30, %edx	// sign bit for y in right position
+	addl	$8, %esp
+	cfi_adjust_cfa_offset (-8)
+.L18:	shrl	$31, %edx
+	fldl	MOX(inf_zero, %edx, 8)
+	ret
+
+	cfi_adjust_cfa_offset (8)
+	.align ALIGNARG(4)
+	// x is ±0
+.L20:	fstp	%st(0)		// y
+	testb	$2, %dl
+	jz	.L21		// y > 0
+
+	// x is ±0 and y is < 0.  We must find out whether y is an odd integer.
+	testb	$2, %dh
+	jz	.L25
+
+	fld	%st		// y : y
+	fistpll	(%esp)		// y
+	fildll	(%esp)		// int(y) : y
+	fucompp			// <empty>
+	fnstsw
+	sahf
+	jne	.L26
+
+	// OK, the value is an integer, but is it odd?
+	popl	%eax
+	cfi_adjust_cfa_offset (-4)
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	andb	$1, %al
+	jz	.L27		// jump if not odd
+	// It's an odd integer.
+	// Raise divide-by-zero exception and get minus infinity value.
+	fldl	MO(one)
+	fdivl	MO(zero)
+	fchs
+	ret
+
+	cfi_adjust_cfa_offset (8)
+.L25:	fstp	%st(0)
+.L26:	addl	$8, %esp
+	cfi_adjust_cfa_offset (-8)
+.L27:	// Raise divide-by-zero exception and get infinity value.
+	fldl	MO(one)
+	fdivl	MO(zero)
+	ret
+
+	cfi_adjust_cfa_offset (8)
+	.align ALIGNARG(4)
+	// x is ±0 and y is > 0.  We must find out whether y is an odd integer.
+.L21:	testb	$2, %dh
+	jz	.L22
+
+	fld	%st		// y : y
+	fistpll	(%esp)		// y
+	fildll	(%esp)		// int(y) : y
+	fucompp			// <empty>
+	fnstsw
+	sahf
+	jne	.L23
+
+	// OK, the value is an integer, but is it odd?
+	popl	%eax
+	cfi_adjust_cfa_offset (-4)
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	andb	$1, %al
+	jz	.L24		// jump if not odd
+	// It's an odd integer.
+	fldl	MO(mzero)
+	ret
+
+	cfi_adjust_cfa_offset (8)
+.L22:	fstp	%st(0)
+.L23:	addl	$8, %esp	// Don't use 2 x pop
+	cfi_adjust_cfa_offset (-8)
+.L24:	fldl	MO(zero)
+	ret
+
+END(RT_NOCRT(powl))
+//END(__ieee754_powl)
Index: /trunk/src/recompiler_new/Sun/op-darwin.sed
===================================================================
--- /trunk/src/recompiler_new/Sun/op-darwin.sed	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/op-darwin.sed	(revision 13168)
@@ -0,0 +1,57 @@
+# $Id$
+## @file
+#
+# SED script for transforming op.S (i386 ELF from GNU/linux) into
+# something that the Darwin (Mac OS X) assembler can make use of.
+#
+
+#
+# Copyright (C) 2006-2007 Sun Microsystems, Inc.
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+# Clara, CA 95054 USA or visit http://www.sun.com if you need
+# additional information or have any questions.
+#
+
+# This is a generic transformation, only encountered with i386-elf-gcc-3.4.3 so far.
+s/^\([[:blank:]]*\)\//\1#/
+
+# Darwin doesn't grok .type and .size, remove those lines.
+/^[[:blank:]]*\.type[[:blank:]]/d
+/^[[:blank:]]*\.size[[:blank:]]/d
+
+# Darwin names the .rodata section '.const'.
+s/^[[:blank:]]*\.section[[:blank:]][[:blank:]]*\.rodata[[:blank:]]*$/\t.const/
+
+# Darwin doesn't grok the the .note.GNU-stack section, remove that line.
+/^[[:blank:]]*\.section[[:blank:]][[:blank:]]*\.note\.GNU-stack,/d
+
+# .zero seems to be similar to .spaces...
+s/^\([[:blank:]]*\)\.zero[[:blank:]][[:blank:]]*\([0-9][0-9]*\)/\1.space \2/
+
+# It looks like if .align is taking a byte count on linux and a power of 
+# two on Darwin, translate to power of two.
+s/\.align 128/\.align 7/
+s/\.align 64/\.align 6/
+s/\.align 32/\.align 5/
+s/\.align 16/\.align 4/
+s/\.align 8/\.align 3/
+s/\.align 4/\.align 2/
+s/\.align 2/\.align 1/
+
+
+# Darwin uses underscore prefixed names like the DOS based i386 OSes
+# linux does. So, all global symbols needs to be translated.
+s/^[[:blank:]]*\.globl[[:blank:]][[:blank:]]*\([^\t\n ]*\)[[:blank:]]*$/#define \1 _\1\n.globl \1/
+
+# special hack for __op_labelN
+s/__op_label\([0-9]\)/___op_label\1/g
+
Index: /trunk/src/recompiler_new/Sun/op-undefined.lst
===================================================================
--- /trunk/src/recompiler_new/Sun/op-undefined.lst	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/op-undefined.lst	(revision 13168)
@@ -0,0 +1,155 @@
+__ldb_mmu
+__ldl_mmu
+__ldq_mmu
+__ldw_mmu
+__op_gen_label1
+__op_param1
+__op_param2
+__op_param3
+__stb_mmu
+__stl_mmu
+__stq_mmu
+__stw_mmu
+approx_rcp
+approx_rsqrt
+check_iob_DX
+check_iob_T0
+check_iol_DX
+check_iol_T0
+check_iow_DX
+check_iow_T0
+cpu_get_apic_tpr
+cpu_inb
+cpu_inl
+cpu_inw
+cpu_lock
+cpu_loop_exit
+cpu_outb
+cpu_outl
+cpu_outw
+cpu_unlock
+f15rk
+float32_compare
+float32_compare_quiet
+float32_sqrt
+float32_to_float64
+float32_to_int32
+float32_to_int32_round_to_zero
+float32_to_int64
+float32_to_int64_round_to_zero
+float64_compare
+float64_compare_quiet
+float64_sqrt
+float64_to_float32
+float64_to_int32
+float64_to_int32_round_to_zero
+float64_to_int64
+float64_to_int64_round_to_zero
+floatx80_compare
+floatx80_compare_quiet
+floatx80_to_int32
+floatx80_to_int32_round_to_zero
+floatx80_to_int64
+floatx80_to_int64_round_to_zero
+fpu_raise_exception
+helper_bswapq_T0
+helper_cmpxchg8b
+helper_single_step
+helper_cpuid
+helper_divl_EAX_T0
+helper_divq_EAX_T0
+helper_enter_level
+helper_enter64_level
+helper_external_event
+helper_f2xm1
+helper_fbld_ST0_A0
+helper_fbst_ST0_A0
+helper_fcos
+helper_fdiv
+helper_fldenv
+helper_fldt_ST0_A0
+helper_fpatan
+helper_fprem
+helper_fprem1
+helper_fptan
+helper_frndint
+helper_frstor
+helper_fsave
+helper_fscale
+helper_fsin
+helper_fsincos
+helper_fsqrt
+helper_fstenv
+helper_fstt_ST0_A0
+helper_fxam_ST0
+helper_fxrstor
+helper_fxsave
+helper_fxtract
+helper_fyl2x
+helper_fyl2xp1
+helper_hlt
+helper_idivl_EAX_T0
+helper_idivq_EAX_T0
+helper_imulq_EAX_T0
+helper_imulq_T0_T1
+helper_invlpg
+helper_iret_protected
+helper_iret_real
+helper_lar
+helper_lcall_protected_T0_T1
+helper_lcall_real_T0_T1
+helper_ljmp_protected_T0_T1
+helper_lldt_T0
+helper_lret_protected
+helper_lsl
+helper_ltr_T0
+helper_monitor
+helper_movl_crN_T0
+helper_movl_drN_T0
+helper_mulq_EAX_T0
+helper_mwait
+helper_record_call
+helper_rdmsr
+helper_rdtsc
+helper_rsm
+helper_syscall
+helper_sysenter
+helper_sysexit
+helper_sysret
+helper_verr
+helper_verw
+helper_wrmsr
+int32_to_float32
+int32_to_float64
+int64_to_float32
+int64_to_float64
+load_seg
+parity_table
+raise_exception
+raise_interrupt
+rclb_table
+rclw_table
+remR3PhysReadS16
+remR3PhysReadS8
+remR3PhysReadU16
+remR3PhysReadU32
+remR3PhysReadU64
+remR3PhysReadU8
+remR3PhysWriteU16
+remR3PhysWriteU32
+remR3PhysWriteU64
+remR3PhysWriteU8
+remR3PhysReadHCPtrS16
+remR3PhysReadHCPtrS8
+remR3PhysReadHCPtrU16
+remR3PhysReadHCPtrU32
+remR3PhysReadHCPtrU64
+remR3PhysReadHCPtrU8
+remR3PhysWriteHCPtrU16
+remR3PhysWriteHCPtrU32
+remR3PhysWriteHCPtrU64
+remR3PhysWriteHCPtrU8
+sync_seg
+update_fp_status
+fabsl
+nocrt_fabsl
Index: /trunk/src/recompiler_new/Sun/op-validate.sed
===================================================================
--- /trunk/src/recompiler_new/Sun/op-validate.sed	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/op-validate.sed	(revision 13168)
@@ -0,0 +1,92 @@
+# $Id$
+## @file
+#
+# Just some quit sed hacks for validating an op.S assembly file.
+# Will try this with gcc 4.x later to see if we can permit gcc 4
+# to build op.c by using this script as guard against bad code.
+#
+
+## @todo need to check that we've the got two __op_label[0-1].op_goto_tb[0-1] symbols!
+
+# if (ret) goto return
+/^[[:space:]]*ret[[:space:]]*$/b return
+#/^[[:space:]]*retn[[:space:]]*$/b bad
+
+# if (jmp) goto jump
+/^[[:space:]]*[^j]/b skip_jump_checks
+/^[[:space:]]*jmp[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*ja[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jae[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jb[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jbe[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jc[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*je[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jg[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jge[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jl[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jle[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnae[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnb[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnbe[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnc[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jne[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jng[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnge[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnl[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnle[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jno[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnp[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jns[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jnz[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jo[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jp[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jpe[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jpo[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*js[[:space:]][[:space:]]*/b jump
+/^[[:space:]]*jz[[:space:]][[:space:]]*/b jump
+:skip_jump_checks
+
+# Everything else is discarded!
+d
+b end
+
+
+#
+# Verify that all ret statements are at the end of a function by
+# inspecting what's on the following line. It must either be a
+# .size statement, a .LfeXXXX label, a .LfeXXXX label or #NO_APP comment.
+#
+# @todo figure out how to discard the first line in a simpler fashion.
+:return
+N
+s/^[[:blank:]]*ret[[:blank:]]*\n*[[:blank:]]*//
+/\.Lfe[0-9][0-9]*:/d
+/\.LFE[0-9][0-9]*:/d
+/size[[:space:]]/d
+/^[/#]NO_APP[[:space:]]*$/d
+/^$/!b bad
+b end
+
+#
+# Verify that all jumps are to internal labels or to few select
+# external labels.
+#
+#/^[[:blank:]]*jmp/
+:jump
+s/^[[:space:]]*j[a-z]*[[:space:]][[:space:]]*//
+/^\.L/d
+/^[1-9][fb]$/d
+/^__op_gen_label1$/d
+# two very special cases.
+/^\*__op_param1+48[[:space:]][[:space:]]*#[[:space:]]*<variable>.tb_next[[:space:]]*$/d
+/^\*__op_param1+52[[:space:]][[:space:]]*#[[:space:]]*<variable>.tb_next[[:space:]]*$/d
+/^$/!b bad
+b end
+
+# An error was found
+:bad
+q 1
+
+# next expression
+:end
+
Index: /trunk/src/recompiler_new/Sun/staged-op-elf-amd64.S
===================================================================
--- /trunk/src/recompiler_new/Sun/staged-op-elf-amd64.S	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/staged-op-elf-amd64.S	(revision 13168)
@@ -0,0 +1,2 @@
+This file is just a place holder. If you see this message, it means that your GCC compiler is not generating the right. We have yet to see this happen, but when it does let us know. (Note that we do not intend to support GCC 4.0.x compilers.)
+
Index: /trunk/src/recompiler_new/Sun/staged-op-elf-x86.S
===================================================================
--- /trunk/src/recompiler_new/Sun/staged-op-elf-x86.S	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/staged-op-elf-x86.S	(revision 13168)
@@ -0,0 +1,2 @@
+This file is just a place holder. If you see this message, it means that your GCC compiler is not generating the right. We have yet to see this happen, but when it does let us know. (Note that we do not intend to support GCC 4.0.x compilers.)
+
Index: /trunk/src/recompiler_new/Sun/structs.h
===================================================================
--- /trunk/src/recompiler_new/Sun/structs.h	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/structs.h	(revision 13168)
@@ -0,0 +1,290 @@
+/* $Id$ */
+/** @file
+ * VBox Recompiler - structure offset tables.
+ *
+ * Used by op.c and VBoxRecompiler.c to verify they have the
+ * same understanding of the internal structures when using
+ * different compilers (GCC 4.x vs. 3.x/ELF).
+ */
+
+/*
+ * Copyright (C) 2006-2007 Sun Microsystems, Inc.
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+ * Clara, CA 95054 USA or visit http://www.sun.com if you need
+ * additional information or have any questions.
+ */
+
+
+#if REM_STRUCT_OP
+
+/* 
+ * we're in op.c 
+ */
+# define REM_STRUCT_TABLE(strct)        const int g_aiOpStruct_ ## strct []
+
+# define REM_THE_END(strct)             sizeof(strct) | 0x42000000
+# define REM_SIZEOF(strct)              sizeof(strct)
+# define REM_OFFSETOF(strct, memb)      RT_OFFSETOF(strct, memb)
+# define REM_SIZEOFMEMB(strct, memb)    RT_SIZEOFMEMB(strct, memb)
+
+#else /* !REM_STRUCT_OP */
+/*
+ * We're in VBoxRecompiler.c. 
+ */
+# define REM_STRUCT_TABLE(strct)  \
+    extern const int g_aiOpStruct_ ## strct []; \
+    \
+    const REMSTRUCTENTRY g_aMyStruct_ ## strct []
+
+# define REM_THE_END(strct)             { sizeof(strct) | 0x42000000, #strct " - the end" }
+# define REM_SIZEOF(strct)              { sizeof(strct)             , #strct " - size of" }
+# define REM_OFFSETOF(strct, memb)      { RT_OFFSETOF(strct, memb)  , #strct "::" #memb " - offsetof" }
+# define REM_SIZEOFMEMB(strct, memb)    { RT_SIZEOFMEMB(strct, memb), #strct "::" #memb " - sizeof" }
+
+/** Matches the My and Op tables for a strct. */
+# define ASSERT_STRUCT_TABLE(strct) \
+    for (i = 0; i < RT_ELEMENTS(g_aMyStruct_ ## strct); i++) \
+        AssertReleaseMsg(g_aMyStruct_ ## strct [i].iValue == g_aiOpStruct_ ## strct [i], \
+                         (#strct "[%d] - %d != %d - %s\n", \
+                          i, \
+                          g_aMyStruct_ ## strct [i].iValue,\
+                          g_aiOpStruct_ ## strct [i], \
+                          g_aMyStruct_ ## strct [i].pszExpr))
+
+/** Struct check entry. */
+typedef struct REMSTRUCTENTRY
+{
+    int iValue;
+    const char *pszExpr;
+} REMSTRUCTENTRY;
+
+#endif /* !REM_STRUCT_OP */
+
+
+REM_STRUCT_TABLE(Misc) =
+{
+    REM_SIZEOF(char),
+    REM_SIZEOF(short),
+    REM_SIZEOF(int),
+    REM_SIZEOF(long),
+    REM_SIZEOF(float),
+    REM_SIZEOF(double),
+    REM_SIZEOF(long double),
+    REM_SIZEOF(void *),
+    REM_SIZEOF(char *),
+    REM_SIZEOF(short *),
+    REM_SIZEOF(long *),
+    REM_SIZEOF(size_t),
+    REM_SIZEOF(uint8_t),
+    REM_SIZEOF(uint16_t),
+    REM_SIZEOF(uint32_t),
+    REM_SIZEOF(uint64_t),
+    REM_SIZEOF(uintptr_t),
+    REM_SIZEOF(RTGCUINTPTR),
+    REM_SIZEOF(RTHCUINTPTR),
+    REM_SIZEOF(RTR3UINTPTR),
+    REM_SIZEOF(RTR0UINTPTR),
+    REM_THE_END(char)
+};
+
+REM_STRUCT_TABLE(TLB) =
+{
+    REM_SIZEOF(CPUTLBEntry),
+    REM_OFFSETOF(CPUTLBEntry, addr_read),
+    REM_OFFSETOF(CPUTLBEntry, addr_write),
+    REM_OFFSETOF(CPUTLBEntry, addr_code),
+    REM_OFFSETOF(CPUTLBEntry, addend),
+    REM_THE_END(CPUTLBEntry)
+};
+
+REM_STRUCT_TABLE(SegmentCache) =
+{
+    REM_SIZEOF(SegmentCache),
+    REM_OFFSETOF(SegmentCache, selector),
+    REM_OFFSETOF(SegmentCache, base),
+    REM_OFFSETOF(SegmentCache, limit),
+    REM_OFFSETOF(SegmentCache, flags),
+    REM_OFFSETOF(SegmentCache, newselector),
+    REM_THE_END(SegmentCache)
+};
+
+REM_STRUCT_TABLE(XMMReg) =
+{
+    REM_SIZEOF(XMMReg),
+    REM_OFFSETOF(XMMReg, _b[1]),
+    REM_OFFSETOF(XMMReg, _w[1]),
+    REM_OFFSETOF(XMMReg, _l[1]),
+    REM_OFFSETOF(XMMReg, _q[1]),
+    REM_OFFSETOF(XMMReg, _s[1]),
+    REM_OFFSETOF(XMMReg, _d[1]),
+    REM_THE_END(XMMReg)
+};
+
+REM_STRUCT_TABLE(MMXReg) =
+{
+    REM_SIZEOF(MMXReg),
+    REM_OFFSETOF(MMXReg, _b[1]),
+    REM_OFFSETOF(MMXReg, _w[1]),
+    REM_OFFSETOF(MMXReg, _l[1]),
+    REM_OFFSETOF(MMXReg, q),
+    REM_THE_END(MMXReg)
+};
+
+REM_STRUCT_TABLE(float_status) =
+{
+    REM_SIZEOF(float_status),
+    //REM_OFFSETOF(float_status, float_detect_tininess),
+    REM_OFFSETOF(float_status, float_rounding_mode),
+    //REM_OFFSETOF(float_status, float_exception_flags),
+    REM_OFFSETOF(float_status, floatx80_rounding_precision),
+    REM_THE_END(float_status)
+};
+
+REM_STRUCT_TABLE(float32u) =
+{
+    REM_SIZEOF(float32u),
+    REM_OFFSETOF(float32u, f),
+    REM_SIZEOFMEMB(float32u, f),
+    REM_OFFSETOF(float32u, i),
+    REM_SIZEOFMEMB(float32u, i),
+    REM_THE_END(float32u)
+};
+
+REM_STRUCT_TABLE(float64u) =
+{
+    REM_SIZEOF(float64u),
+    REM_OFFSETOF(float64u, f),
+    REM_SIZEOFMEMB(float64u, f),
+    REM_OFFSETOF(float64u, i),
+    REM_SIZEOFMEMB(float64u, i),
+    REM_THE_END(float64u)
+};
+
+REM_STRUCT_TABLE(floatx80u) =
+{
+    REM_SIZEOF(floatx80u),
+    REM_OFFSETOF(floatx80u, f),
+    REM_SIZEOFMEMB(floatx80u, f),
+    REM_OFFSETOF(floatx80u, i),
+    REM_SIZEOFMEMB(floatx80u, i),
+    REM_OFFSETOF(floatx80u, i.low),
+    REM_SIZEOFMEMB(floatx80u, i.low),
+    REM_OFFSETOF(floatx80u, i.high),
+    REM_SIZEOFMEMB(floatx80u, i.high),
+    REM_THE_END(floatx80u)
+};
+
+REM_STRUCT_TABLE(CPUState) =
+{
+    REM_SIZEOF(CPUState),
+    REM_OFFSETOF(CPUState, regs),
+    REM_OFFSETOF(CPUState, regs[R_EAX]),
+    REM_OFFSETOF(CPUState, regs[R_EBX]),
+    REM_OFFSETOF(CPUState, regs[R_ECX]),
+    REM_OFFSETOF(CPUState, regs[R_EDX]),
+    REM_OFFSETOF(CPUState, regs[R_ESI]),
+    REM_OFFSETOF(CPUState, regs[R_EDI]),
+    REM_OFFSETOF(CPUState, regs[R_EBP]),
+    REM_OFFSETOF(CPUState, regs[R_ESP]),
+    REM_OFFSETOF(CPUState, eip),
+    REM_OFFSETOF(CPUState, eflags),
+    REM_OFFSETOF(CPUState, cc_src),
+    REM_OFFSETOF(CPUState, cc_dst),
+    REM_OFFSETOF(CPUState, cc_op),
+    REM_OFFSETOF(CPUState, df),
+    REM_OFFSETOF(CPUState, hflags),
+    REM_OFFSETOF(CPUState, segs),
+    REM_OFFSETOF(CPUState, segs[R_SS]),
+    REM_OFFSETOF(CPUState, segs[R_CS]),
+    REM_OFFSETOF(CPUState, segs[R_DS]),
+    REM_OFFSETOF(CPUState, segs[R_ES]),
+    REM_OFFSETOF(CPUState, segs[R_FS]),
+    REM_OFFSETOF(CPUState, segs[R_GS]),
+    REM_OFFSETOF(CPUState, ldt),
+    REM_OFFSETOF(CPUState, tr),
+    REM_OFFSETOF(CPUState, gdt),
+    REM_OFFSETOF(CPUState, idt),
+    REM_OFFSETOF(CPUState, cr),
+    REM_SIZEOFMEMB(CPUState, cr),
+    REM_OFFSETOF(CPUState, cr[1]),
+    REM_OFFSETOF(CPUState, a20_mask),
+    REM_OFFSETOF(CPUState, fpstt),
+    REM_OFFSETOF(CPUState, fpus),
+    REM_OFFSETOF(CPUState, fpuc),
+    REM_OFFSETOF(CPUState, fptags),
+    REM_SIZEOFMEMB(CPUState, fptags),
+    REM_OFFSETOF(CPUState, fptags[1]),
+    REM_OFFSETOF(CPUState, fpregs),
+    REM_SIZEOFMEMB(CPUState, fpregs),
+    REM_OFFSETOF(CPUState, fpregs[1]),
+    REM_OFFSETOF(CPUState, fp_status),
+    REM_OFFSETOF(CPUState, ft0),
+    REM_SIZEOFMEMB(CPUState, ft0),
+    REM_OFFSETOF(CPUState, fp_convert),
+    REM_SIZEOFMEMB(CPUState, fp_convert),
+    REM_OFFSETOF(CPUState, sse_status),
+    REM_OFFSETOF(CPUState, mxcsr),
+    REM_OFFSETOF(CPUState, xmm_regs),
+    REM_SIZEOFMEMB(CPUState, xmm_regs),
+    REM_OFFSETOF(CPUState, xmm_regs[1]),
+    REM_OFFSETOF(CPUState, mmx_t0),
+    REM_OFFSETOF(CPUState, sysenter_cs),
+    REM_OFFSETOF(CPUState, sysenter_esp),
+    REM_OFFSETOF(CPUState, sysenter_eip),
+    REM_OFFSETOF(CPUState, efer),
+    REM_OFFSETOF(CPUState, star),
+    REM_OFFSETOF(CPUState, pat),
+    REM_OFFSETOF(CPUState, jmp_env),
+    REM_SIZEOFMEMB(CPUState, jmp_env),
+    REM_OFFSETOF(CPUState, exception_index),
+    REM_OFFSETOF(CPUState, error_code),
+    REM_OFFSETOF(CPUState, exception_is_int),
+    REM_OFFSETOF(CPUState, exception_next_eip),
+    REM_OFFSETOF(CPUState, dr),
+    REM_SIZEOFMEMB(CPUState, dr),
+    REM_OFFSETOF(CPUState, dr[1]),
+    REM_OFFSETOF(CPUState, smbase),
+    REM_OFFSETOF(CPUState, interrupt_request),
+    REM_OFFSETOF(CPUState, user_mode_only),
+    REM_OFFSETOF(CPUState, state),
+    REM_OFFSETOF(CPUState, pVM),
+    REM_OFFSETOF(CPUState, pvCodeBuffer),
+    REM_OFFSETOF(CPUState, cbCodeBuffer),
+    REM_OFFSETOF(CPUState, cpuid_features),
+    REM_OFFSETOF(CPUState, cpuid_ext_features),
+    REM_OFFSETOF(CPUState, cpuid_ext2_features),
+
+    /* cpu-defs.h */
+    REM_OFFSETOF(CPUState, current_tb),
+    REM_OFFSETOF(CPUState, mem_write_pc),
+    REM_OFFSETOF(CPUState, mem_write_vaddr),
+    REM_OFFSETOF(CPUState, tlb_table),
+    REM_SIZEOFMEMB(CPUState, tlb_table),
+    REM_OFFSETOF(CPUState, tb_jmp_cache),
+    REM_SIZEOFMEMB(CPUState, tb_jmp_cache),
+    REM_OFFSETOF(CPUState, breakpoints),
+    REM_SIZEOFMEMB(CPUState, breakpoints),
+    REM_OFFSETOF(CPUState, nb_breakpoints),
+    REM_OFFSETOF(CPUState, singlestep_enabled),
+    REM_OFFSETOF(CPUState, next_cpu),
+    REM_OFFSETOF(CPUState, cpu_index),
+    REM_OFFSETOF(CPUState, opaque),
+
+    REM_THE_END(CPUState)
+};
+
+#undef REM_THE_END
+#undef REM_OFFSETOF
+#undef REM_SIZEOF
+#undef REM_SIZEOFMEMB
+#undef REM_STRUCT_TABLE
+
Index: /trunk/src/recompiler_new/Sun/testmath.c
===================================================================
--- /trunk/src/recompiler_new/Sun/testmath.c	(revision 13168)
+++ /trunk/src/recompiler_new/Sun/testmath.c	(revision 13168)
@@ -0,0 +1,798 @@
+/* $Id$ */
+/** @file
+ * Testcase for the no-crt math stuff.
+ */
+
+
+/*******************************************************************************
+*   Header Files                                                               *
+*******************************************************************************/
+#ifndef MATHTEST_STANDALONE
+# include <iprt/assert.h>
+# include <math.h>
+# undef printf
+# define printf AssertMsg2
+#else
+# include <stdio.h>
+# include <math.h>
+#endif
+
+/* gcc starting with version 4.3 uses the MPFR library which results in more accurate results. gcc-4.3.1 seems to emit the less accurate result. So just allow both results. */
+#define SIN180a -0.8011526357338304777463731115L
+#define SIN180b -0.801152635733830477871L
+
+static void bitch(const char *pszWhat, const long double *plrdResult, const long double *plrdExpected)
+{
+    const unsigned char *pach1 = (const unsigned char *)plrdResult;
+    const unsigned char *pach2 = (const unsigned char *)plrdExpected;
+#ifndef MATHTEST_STANDALONE
+    printf("error: %s - %d instead of %d\n", pszWhat, (int)(*plrdResult * 100000), (int)(*plrdExpected * 100000));
+#else
+    printf("error: %s - %.25f instead of %.25f\n", pszWhat, (double)*plrdResult, (double)*plrdExpected);
+#endif
+    printf("   %02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x\n", pach1[0], pach1[1], pach1[2], pach1[3], pach1[4], pach1[5], pach1[6], pach1[7], pach1[8], pach1[9]);
+    printf("   %02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x\n", pach2[0], pach2[1], pach2[2], pach2[3], pach2[4], pach2[5], pach2[6], pach2[7], pach2[8], pach2[9]);
+}
+
+static void bitchll(const char *pszWhat, long long llResult, long long llExpected)
+{
+#if defined(__MINGW32__) && !defined(Assert)
+    printf("error: %s - %I64d instead of %I64d\n", pszWhat, llResult, llExpected);
+#else
+    printf("error: %s - %lld instead of %lld\n", pszWhat, llResult, llExpected);
+#endif
+}
+
+static void bitchl(const char *pszWhat, long lResult, long lExpected)
+{
+    printf("error: %s - %ld instead of %ld\n", pszWhat, lResult, lExpected);
+}
+
+extern int testsin(void)
+{
+    return sinl(180.0L) == SIN180a || sinl(180.0L) == SIN180b;
+}
+
+extern int testremainder(void)
+{
+    static double s_rd1 = 2.5;
+    static double s_rd2 = 2.0;
+    static double s_rd3 = 0.5;
+    return remainder(s_rd1, s_rd2) == s_rd3;
+}
+
+static __inline__ void set_cw(unsigned cw)
+{
+    __asm __volatile("fldcw %0" : : "m" (cw));
+}
+
+static __inline__ unsigned get_cw(void)
+{
+    unsigned cw;
+    __asm __volatile("fstcw %0" : : "m" (cw));
+    return cw & 0xffff;
+}
+
+static long double check_lrd(const long double lrd, const unsigned long long ull, const unsigned short us)
+{
+    static volatile long double lrd2;
+    lrd2 = lrd;
+    if (    *(unsigned long long *)&lrd2 != ull
+        ||  ((unsigned short *)&lrd2)[4] != us)
+    {
+#if defined(__MINGW32__) && !defined(Assert)
+        printf("%I64x:%04x instead of %I64x:%04x\n", *(unsigned long long *)&lrd2, ((unsigned short *)&lrd2)[4], ull, us);
+#else
+        printf("%llx:%04x instead of %llx:%04x\n", *(unsigned long long *)&lrd2, ((unsigned short *)&lrd2)[4], ull, us);
+#endif
+        __asm__("int3\n");
+    }
+    return lrd;
+}
+
+
+static long double make_lrd(const unsigned long long ull, const unsigned short us)
+{
+    union
+    {
+        long double lrd;
+        struct
+        {
+            unsigned long long ull;
+            unsigned short us;
+        } i;
+    } u;
+
+    u.i.ull = ull;
+    u.i.us = us;
+    return u.lrd;
+}
+
+static long double check_lrd_cw(const long double lrd, const unsigned long long ull, const unsigned short us, const unsigned cw)
+{
+    set_cw(cw);
+    if (cw != get_cw())
+    {
+        printf("get_cw() -> %#x expected %#x\n", get_cw(), cw);
+        __asm__("int3\n");
+    }
+    return check_lrd(lrd, ull, us);
+}
+
+static long double make_lrd_cw(unsigned long long ull, unsigned short us, unsigned cw)
+{
+    set_cw(cw);
+    return check_lrd_cw(make_lrd(ull, us), ull, us, cw);
+}
+
+extern int testmath(void)
+{
+    unsigned cErrors = 0;
+    long double lrdResult;
+    long double lrdExpect;
+    long double lrd;
+#define CHECK(operation, expect) \
+    do { \
+        lrdExpect = expect; \
+        lrdResult = operation; \
+        if (lrdResult != lrdExpect) \
+        {  \
+            bitch(#operation,  &lrdResult, &lrdExpect); \
+            cErrors++; \
+        } \
+    } while (0)
+
+    long long llResult;
+    long long llExpect;
+#define CHECKLL(operation, expect) \
+    do { \
+        llExpect = expect; \
+        llResult = operation; \
+        if (llResult != llExpect) \
+        {  \
+            bitchll(#operation,  llResult, llExpect); \
+            cErrors++; \
+        } \
+    } while (0)
+
+    long lResult;
+    long lExpect;
+#define CHECKL(operation, expect) \
+    do { \
+        lExpect = expect; \
+        lResult = operation; \
+        if (lResult != lExpect) \
+        {  \
+            bitchl(#operation,  lResult, lExpect); \
+            cErrors++; \
+        } \
+    } while (0)
+
+    CHECK(atan2l(1.0L, 1.0L), 0.785398163397448309603L);
+    CHECK(atan2l(2.3L, 3.3L), 0.608689307327411694890L);
+
+    CHECK(ceill(1.9L), 2.0L);
+    CHECK(ceill(4.5L), 5.0L);
+    CHECK(ceill(3.3L), 4.0L);
+    CHECK(ceill(6.1L), 7.0L);
+
+    CHECK(floorl(1.9L), 1.0L);
+    CHECK(floorl(4.5L), 4.0L);
+    CHECK(floorl(7.3L), 7.0L);
+    CHECK(floorl(1234.1L), 1234.0L);
+    CHECK(floor(1233.1), 1233.0);
+    CHECK(floor(1239.98989898), 1239.0);
+    CHECK(floorf(9999.999), 9999.0);
+
+    CHECK(ldexpl(1.0L, 1), 2.0L);
+    CHECK(ldexpl(1.0L, 10), 1024.0L);
+    CHECK(ldexpl(2.25L, 10), 2304.0L);
+
+    CHECKLL(llrintl(1.0L), 1);
+    CHECKLL(llrintl(1.3L), 1);
+    CHECKLL(llrintl(1.5L), 2);
+    CHECKLL(llrintl(1.9L), 2);
+    CHECKLL(llrintf(123.34), 123);
+    CHECKLL(llrintf(-123.50), -124);
+    CHECKLL(llrint(42.42), 42);
+    CHECKLL(llrint(-2147483648.12343), -2147483648LL);
+#if !defined(RT_ARCH_AMD64)
+    CHECKLL(lrint(-21474836499.12343), -2147483648LL);
+    CHECKLL(lrint(-2147483649932412.12343), -2147483648LL);
+#else
+    CHECKLL(lrint(-21474836499.12343), -21474836499L);
+    CHECKLL(lrint(-2147483649932412.12343), -2147483649932412L);
+#endif
+
+//    __asm__("int3");
+    CHECKL(lrintl(make_lrd_cw(000000000000000000ULL,000000,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x067f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x067f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0a7f)), 1L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0a7f)), 1L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0e7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0e7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x067f)), -1L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x067f)), -1L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0a7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0a7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0e7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0e7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x067f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x067f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0a7f)), 1L);
+    CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0a7f)), 1L);
+    CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0e7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0e7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x027f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x067f)), -1L);
+    CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x067f)), -1L);
+    CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0a7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0a7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0e7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0e7f)), 0L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x027f)), 32768L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x027f)), 32768L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x067f)), 32768L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x067f)), 32768L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0a7f)), 32768L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0a7f)), 32768L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0e7f)), 32768L);
+    CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0e7f)), 32768L);
+#if !defined(RT_ARCH_AMD64)
+    /* c90 says that the constant is 2147483648 (which is not representable as a signed 32-bit
+     * value).  To that constant you've then applied the negation operation. c90 doesn't have
+     * negative constants, only positive ones that have been negated.  */
+    CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x027f)), (long)(-2147483647L - 1));
+    CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x027f)), (long)(-2147483647L - 1));
+    CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x067f)), (long)(-2147483647L - 1));
+    CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x067f)), (long)(-2147483647L - 1));
+    CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0a7f)), (long)(-2147483647L - 1));
+    CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0a7f)), (long)(-2147483647L - 1));
+    CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0e7f)), (long)(-2147483647L - 1));
+    CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0e7f)), (long)(-2147483647L - 1));
+#endif
+    set_cw(0x27f);
+
+    CHECK(logl(2.7182818284590452353602874713526625L), 1.0);
+
+    CHECK(remainderl(1.0L, 1.0L), 0.0);
+    CHECK(remainderl(1.0L, 1.5L), -0.5);
+    CHECK(remainderl(42.0L, 34.25L), 7.75);
+    CHECK(remainderf(43.0, 34.25), 8.75);
+    CHECK(remainder(44.25, 34.25), 10.00);
+    double rd1 = 44.25;
+    double rd2 = 34.25;
+    CHECK(remainder(rd1, rd2), 10.00);
+    CHECK(remainder(2.5, 2.0), 0.5);
+    CHECK(remainder(2.5, 2.0), 0.5);
+    CHECK(remainder(2.5, 2.0), 0.5);
+    CHECKLL(testremainder(), 1);
+
+
+    CHECK(rintl(1.0L), 1.0);
+    CHECK(rintl(1.4L), 1.0);
+    CHECK(rintl(1.3L), 1.0);
+    CHECK(rintl(0.9L), 1.0);
+    CHECK(rintl(3123.1232L), 3123.0);
+    CHECK(rint(3985.13454), 3985.0);
+    CHECK(rintf(9999.999), 10000.0);
+
+    CHECK(sinl(1.0L),  0.84147098480789650664L);
+    lrd = 180.0L;
+    CHECK(sinl(lrd), -0.801152635733830477871L);
+#if 0
+    CHECK(sinl(180.0L), SIN180);
+#else
+    lrdExpect = SIN180a;
+    lrdResult = sinl(180.0L);
+    if (lrdResult != lrdExpect)
+    {
+        lrdExpect = SIN180b;
+        if (lrdResult != lrdExpect)
+        {
+            bitch("sinl(180.0L)",  &lrdResult, &lrdExpect);
+            cErrors++;
+        }
+    }
+#endif
+    CHECKLL(testsin(), 1);
+
+    CHECK(sqrtl(1.0L), 1.0);
+    CHECK(sqrtl(4.0L), 2.0);
+    CHECK(sqrtl(1525225.0L), 1235.0);
+
+    CHECK(tanl(0.0L), 0.0);
+    CHECK(tanl(0.7853981633974483096156608458198757L), 1.0);
+
+    CHECK(powl(0.0, 0.0), 1.0);
+    CHECK(powl(2.0, 2.0), 4.0);
+    CHECK(powl(3.0, 3.0), 27.0);
+
+    return cErrors;
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
+#if 0
+
+#define floatx_to_int32 floatx80_to_int32
+#define floatx_to_int64 floatx80_to_int64
+#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
+#define floatx_abs floatx80_abs
+#define floatx_chs floatx80_chs
+#define floatx_round_to_int(foo, bar) floatx80_round_to_int(foo, NULL)
+#define floatx_compare floatx80_compare
+#define floatx_compare_quiet floatx80_compare_quiet
+#undef sin
+#undef cos
+#undef sqrt
+#undef pow
+#undef log
+#undef tan
+#undef atan2
+#undef floor
+#undef ceil
+#undef ldexp
+#define sin sinl
+#define cos cosl
+#define sqrt sqrtl
+#define pow powl
+#define log logl
+#define tan tanl
+#define atan2 atan2l
+#define floor floorl
+#define ceil ceill
+#define ldexp ldexpl
+
+
+typedef long double CPU86_LDouble;
+
+typedef union {
+    long double d;
+    struct {
+        unsigned long long lower;
+        unsigned short upper;
+    } l;
+} CPU86_LDoubleU;
+
+/* the following deal with x86 long double-precision numbers */
+#define MAXEXPD 0x7fff
+#define EXPBIAS 16383
+#define EXPD(fp)	(fp.l.upper & 0x7fff)
+#define SIGND(fp)	((fp.l.upper) & 0x8000)
+#define MANTD(fp)       (fp.l.lower)
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
+
+typedef long double floatx80;
+#define STATUS_PARAM , void *pv
+
+static floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM)
+{
+    return rintl(a);
+}
+
+
+
+struct myenv
+{
+    unsigned int fpstt; /* top of stack index */
+    unsigned int fpus;
+    unsigned int fpuc;
+    unsigned char fptags[8];   /* 0 = valid, 1 = empty */
+    union {
+#ifdef USE_X86LDOUBLE
+        CPU86_LDouble d __attribute__((aligned(16)));
+#else
+        CPU86_LDouble d;
+#endif
+    } fpregs[8];
+
+} my_env, env_org, env_res, *env = &my_env;
+
+
+#define ST0    (env->fpregs[env->fpstt].d)
+#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
+#define ST1    ST(1)
+#define MAXTAN 9223372036854775808.0
+
+
+static inline void fpush(void)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+
+static inline void fpop(void)
+{
+    env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
+    env->fpstt = (env->fpstt + 1) & 7;
+}
+
+static void helper_f2xm1(void)
+{
+    ST0 = pow(2.0,ST0) - 1.0;
+}
+
+static void helper_fyl2x(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if (fptemp>0.0){
+        fptemp = log(fptemp)/log(2.0);	 /* log2(ST) */
+        ST1 *= fptemp;
+        fpop();
+    } else {
+        env->fpus &= (~0x4700);
+        env->fpus |= 0x400;
+    }
+}
+
+static void helper_fptan(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = tan(fptemp);
+        fpush();
+        ST0 = 1.0;
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**52 only */
+    }
+}
+
+static void helper_fpatan(void)
+{
+    CPU86_LDouble fptemp, fpsrcop;
+
+    fpsrcop = ST1;
+    fptemp = ST0;
+    ST1 = atan2(fpsrcop,fptemp);
+    fpop();
+}
+
+static void helper_fxtract(void)
+{
+    CPU86_LDoubleU temp;
+    unsigned int expdif;
+
+    temp.d = ST0;
+    expdif = EXPD(temp) - EXPBIAS;
+    /*DP exponent bias*/
+    ST0 = expdif;
+    fpush();
+    BIASEXPONENT(temp);
+    ST0 = temp.d;
+}
+
+static void helper_fprem1(void)
+{
+    CPU86_LDouble dblq, fpsrcop, fptemp;
+    CPU86_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    int q;
+
+    fpsrcop = ST0;
+    fptemp = ST1;
+    fpsrcop1.d = fpsrcop;
+    fptemp1.d = fptemp;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+    if (expdif < 53) {
+        dblq = fpsrcop / fptemp;
+        dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
+        ST0 = fpsrcop - fptemp*dblq;
+        q = (int)dblq; /* cutting off top bits is assumed here */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+				/* (C0,C1,C3) <-- (q2,q1,q0) */
+        env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
+        env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
+        env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
+    } else {
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, expdif-50);
+        fpsrcop = (ST0 / ST1) / fptemp;
+        /* fpsrcop = integer obtained by rounding to the nearest */
+        fpsrcop = (fpsrcop-floor(fpsrcop) < ceil(fpsrcop)-fpsrcop)?
+            floor(fpsrcop): ceil(fpsrcop);
+        ST0 -= (ST1 * fpsrcop * fptemp);
+    }
+}
+
+static void helper_fprem(void)
+{
+#if 0
+LogFlow(("helper_fprem: ST0=%.*Vhxs ST1=%.*Vhxs fpus=%#x\n", sizeof(ST0), &ST0, sizeof(ST1), &ST1, env->fpus));
+
+    __asm__ __volatile__("fldt (%2)\n"
+                         "fldt (%1)\n"
+                         "fprem \n"
+                         "fnstsw (%0)\n"
+                         "fstpt (%1)\n"
+                         "fstpt (%2)\n"
+                         : : "r" (&env->fpus), "r" (&ST0), "r" (&ST1) : "memory");
+
+LogFlow(("helper_fprem: -> ST0=%.*Vhxs fpus=%#x c\n", sizeof(ST0), &ST0, env->fpus));
+#else
+    CPU86_LDouble dblq, fpsrcop, fptemp;
+    CPU86_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    int q;
+
+    fpsrcop = ST0;
+    fptemp = ST1;
+    fpsrcop1.d = fpsrcop;
+    fptemp1.d = fptemp;
+
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+    if ( expdif < 53 ) {
+        dblq = fpsrcop / fptemp;
+        dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
+        ST0 = fpsrcop - fptemp*dblq;
+        q = (int)dblq; /* cutting off top bits is assumed here */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+				/* (C0,C1,C3) <-- (q2,q1,q0) */
+        env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
+        env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
+        env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
+    } else {
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, expdif-50);
+        fpsrcop = (ST0 / ST1) / fptemp;
+        /* fpsrcop = integer obtained by chopping */
+        fpsrcop = (fpsrcop < 0.0)?
+            -(floor(fabs(fpsrcop))): floor(fpsrcop);
+        ST0 -= (ST1 * fpsrcop * fptemp);
+    }
+#endif
+}
+
+static void helper_fyl2xp1(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp+1.0)>0.0) {
+        fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
+        ST1 *= fptemp;
+        fpop();
+    } else {
+        env->fpus &= (~0x4700);
+        env->fpus |= 0x400;
+    }
+}
+
+static void helper_fsqrt(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if (fptemp<0.0) {
+        env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
+        env->fpus |= 0x400;
+    }
+    ST0 = sqrt(fptemp);
+}
+
+static void helper_fsincos(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = sin(fptemp);
+        fpush();
+        ST0 = cos(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**63 only */
+    }
+}
+
+static void helper_frndint(void)
+{
+    ST0 = floatx_round_to_int(ST0, &env->fp_status);
+}
+
+static void helper_fscale(void)
+{
+    ST0 = ldexp (ST0, (int)(ST1));
+}
+
+static void helper_fsin(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = sin(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**53 only */
+    }
+}
+
+static void helper_fcos(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = cos(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg5 < 2**63 only */
+    }
+}
+
+static void helper_fxam_ST0(void)
+{
+    CPU86_LDoubleU temp;
+    int expdif;
+
+    temp.d = ST0;
+
+    env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
+    if (SIGND(temp))
+        env->fpus |= 0x200; /* C1 <-- 1 */
+
+    /* XXX: test fptags too */
+    expdif = EXPD(temp);
+    if (expdif == MAXEXPD) {
+#ifdef USE_X86LDOUBLE
+        if (MANTD(temp) == 0x8000000000000000ULL)
+#else
+        if (MANTD(temp) == 0)
+#endif
+            env->fpus |=  0x500 /*Infinity*/;
+        else
+            env->fpus |=  0x100 /*NaN*/;
+    } else if (expdif == 0) {
+        if (MANTD(temp) == 0)
+            env->fpus |=  0x4000 /*Zero*/;
+        else
+            env->fpus |= 0x4400 /*Denormal*/;
+    } else {
+        env->fpus |= 0x400;
+    }
+}
+
+
+void check_env(void)
+{
+    int i;
+    for (i = 0; i < 8; i++)
+    {
+        CPU86_LDoubleU my, res;
+        my.d = env->fpregs[i].d;
+        res.d = env_res.fpregs[i].d;
+
+        if (    my.l.lower != res.l.lower
+            ||  my.l.upper != res.l.upper)
+            printf("register %i: %#018llx:%#06x\n"
+                   "    expected %#018llx:%#06x\n",
+                   i,
+                   my.l.lower, my.l.upper,
+                   res.l.lower, res.l.upper);
+    }
+    for (i = 0; i < 8; i++)
+        if (env->fptags[i] != env_res.fptags[i])
+            printf("tag %i: %d != %d\n", i, env->fptags[i], env_res.fptags[i]);
+    if (env->fpstt != env_res.fpstt)
+        printf("fpstt: %#06x != %#06x\n", env->fpstt, env_res.fpstt);
+    if (env->fpuc != env_res.fpuc)
+        printf("fpuc:  %#06x != %#06x\n", env->fpuc, env_res.fpuc);
+    if (env->fpus != env_res.fpus)
+        printf("fpus:  %#06x != %#06x\n", env->fpus, env_res.fpus);
+}
+#endif /* not used. */
+
+#if 0 /* insert this into helper.c */
+/* FPU helpers */
+CPU86_LDoubleU  my_st[8];
+unsigned int    my_fpstt;
+unsigned int    my_fpus;
+unsigned int    my_fpuc;
+unsigned char my_fptags[8];
+
+void hlp_fpu_enter(void)
+{
+    int i;
+    for (i = 0; i < 8; i++)
+        my_st[i].d = env->fpregs[i].d;
+    my_fpstt = env->fpstt;
+    my_fpus = env->fpus;
+    my_fpuc = env->fpuc;
+    memcpy(&my_fptags, &env->fptags, sizeof(my_fptags));
+}
+
+void hlp_fpu_leave(const char *psz)
+{
+    int i;
+    Log(("/*code*/ \n"));
+    for (i = 0; i < 8; i++)
+        Log(("/*code*/ *(unsigned long long *)&env_org.fpregs[%d] = %#018llxULL; ((unsigned short *)&env_org.fpregs[%d])[4] = %#06x; env_org.fptags[%d]=%d;\n",
+             i, my_st[i].l.lower, i, my_st[i].l.upper, i, my_fptags[i]));
+    Log(("/*code*/ env_org.fpstt=%#x;\n", my_fpstt));
+    Log(("/*code*/ env_org.fpus=%#x;\n", my_fpus));
+    Log(("/*code*/ env_org.fpuc=%#x;\n", my_fpuc));
+    for (i = 0; i < 8; i++)
+    {
+        CPU86_LDoubleU u;
+        u.d = env->fpregs[i].d;
+        Log(("/*code*/ *(unsigned long long *)&env_res.fpregs[%d] = %#018llxULL; ((unsigned short *)&env_res.fpregs[%d])[4] = %#06x; env_res.fptags[%d]=%d;\n",
+             i, u.l.lower, i, u.l.upper, i, env->fptags[i]));
+    }
+    Log(("/*code*/ env_res.fpstt=%#x;\n", env->fpstt));
+    Log(("/*code*/ env_res.fpus=%#x;\n", env->fpus));
+    Log(("/*code*/ env_res.fpuc=%#x;\n", env->fpuc));
+
+    Log(("/*code*/ my_env = env_org;\n"));
+    Log(("/*code*/ %s();\n", psz));
+    Log(("/*code*/ check_env();\n"));
+}
+#endif /* helper.c */
+
+extern void testmath2(void )
+{
+#if 0
+#include "/tmp/code.h"
+#endif
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
+
+#ifdef MATHTEST_STANDALONE
+
+void test_fops(double a, double b)
+{
+    printf("a=%f b=%f a+b=%f\n", a, b, a + b);
+    printf("a=%f b=%f a-b=%f\n", a, b, a - b);
+    printf("a=%f b=%f a*b=%f\n", a, b, a * b);
+    printf("a=%f b=%f a/b=%f\n", a, b, a / b);
+    printf("a=%f b=%f fmod(a, b)=%f\n", a, b, (double)fmod(a, b));
+    printf("a=%f sqrt(a)=%f\n", a, (double)sqrtl(a));
+    printf("a=%f sin(a)=%f\n", a, (double)sinl(a));
+    printf("a=%f cos(a)=%f\n", a, (double)cos(a));
+    printf("a=%f tan(a)=%f\n", a, (double)tanl(a));
+    printf("a=%f log(a)=%f\n", a, (double)log(a));
+    printf("a=%f exp(a)=%f\n", a, (double)exp(a));
+    printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
+    /* just to test some op combining */
+    printf("a=%f asin(sinl(a))=%f\n", a, (double)asin(sinl(a)));
+    printf("a=%f acos(cos(a))=%f\n", a, (double)acos(cos(a)));
+    printf("a=%f atan(tanl(a))=%f\n", a, (double)atan(tanl(a)));
+}
+
+int main()
+{
+    unsigned cErrors = testmath();
+
+    testmath2();
+    test_fops(2, 3);
+    test_fops(1.4, -5);
+
+    printf("cErrors=%d\n", cErrors);
+    return cErrors;
+}
+#endif
+
Index: /trunk/src/recompiler_new/VBoxREM.def
===================================================================
--- /trunk/src/recompiler_new/VBoxREM.def	(revision 13168)
+++ /trunk/src/recompiler_new/VBoxREM.def	(revision 13168)
@@ -0,0 +1,56 @@
+; $Id$
+;; @file
+;
+; VBoxREM Definition File.
+;
+; Copyright (C) 2006-2007 Sun Microsystems, Inc.
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+; Clara, CA 95054 USA or visit http://www.sun.com if you need
+; additional information or have any questions.
+;
+
+LIBRARY VBoxREM.dll
+
+EXPORTS
+    REMR3Init
+    REMR3Term
+    REMR3Reset
+    REMR3Step
+    REMR3BreakpointSet
+    REMR3BreakpointClear
+    REMR3Run
+    REMR3EmulateInstruction
+    REMR3State
+    REMR3StateBack
+    REMR3StateUpdate
+    REMR3A20Set
+    REMR3DisasEnableStepping
+    REMR3ReplayInvalidatedPages
+    REMR3ReplayHandlerNotifications
+    REMR3NotifyPhysRamRegister
+    REMR3NotifyPhysRamChunkRegister
+    REMR3NotifyPhysReserve
+    REMR3NotifyPhysRomRegister
+    REMR3NotifyHandlerPhysicalModify
+    REMR3NotifyHandlerPhysicalRegister
+    REMR3NotifyHandlerPhysicalDeregister
+    REMR3NotifyInterruptSet
+    REMR3NotifyInterruptClear
+    REMR3NotifyTimerPending
+    REMR3NotifyDmaPending
+    REMR3NotifyQueuePending
+    REMR3NotifyFF
+    REMR3NotifyCodePageChanged
+    REMR3IsPageAccessHandled
+    REMR3NotifyPendingInterrupt
+    REMR3QueryPendingInterrupt
+
Index: /trunk/src/recompiler_new/VBoxREMWrapper.cpp
===================================================================
--- /trunk/src/recompiler_new/VBoxREMWrapper.cpp	(revision 13168)
+++ /trunk/src/recompiler_new/VBoxREMWrapper.cpp	(revision 13168)
@@ -0,0 +1,2189 @@
+/* $Id$ */
+/** @file
+ *
+ * VBoxREM Win64 DLL Wrapper.
+ */
+/*
+ * Copyright (C) 2006-2007 Sun Microsystems, Inc.
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+ * Clara, CA 95054 USA or visit http://www.sun.com if you need
+ * additional information or have any questions.
+ */
+
+
+/** @page pg_vboxrem_amd64      VBoxREM Hacks on AMD64
+ *
+ * There are problems with building BoxREM both on WIN64 and 64-bit linux.
+ *
+ * On linux binutils refuses to link shared objects without -fPIC compiled code
+ * (bitches about some fixup types). But when trying to build with -fPIC dyngen
+ * doesn't like the code anymore. Sweet. The current solution is to build the
+ * VBoxREM code as a relocatable module and use our ELF loader to load it.
+ *
+ * On WIN64 we're not aware of any GCC port which can emit code using the MSC
+ * calling convention. So, we're in for some real fun here. The choice is between
+ * porting GCC to AMD64 WIN64 and comming up with some kind of wrapper around
+ * either the win32 build or the 64-bit linux build.
+ *
+ *  -# Porting GCC will be a lot of work. For one thing the calling convention differs
+ *     and messing with such stuff can easily create ugly bugs. We would also have to
+ *     do some binutils changes, but I think those are rather small compared to GCC.
+ *     (That said, the MSC calling convention is far simpler than the linux one, it
+ *     reminds me of _Optlink which we have working already.)
+ *  -# Wrapping win32 code will work, but addresses outside the first 4GB are
+ *     inaccessible and we will have to create 32-64 thunks for all imported functions.
+ *     (To switch between 32-bit and 64-bit is load the right CS using far jmps (32->64)
+ *     or far returns (both).)
+ *  -# Wrapping 64-bit linux code might be the easier solution. The requirements here
+ *     are:
+ *       - Remove all CRT references we possibly, either by using intrinsics or using
+ *         IPRT. Part of IPRT will be linked into VBoxREM2.rel, this will be yet another
+ *         IPRT mode which I've dubbed 'no-crt'. The no-crt mode provide basic non-system
+ *         dependent stuff.
+ *       - Compile and link it into a relocatable object (include the gcc intrinsics
+ *         in libgcc). Call this VBoxREM2.rel.
+ *       - Write a wrapper dll, VBoxREM.dll, for which during REMR3Init() will load
+ *         VBoxREM2.rel (using IPRT) and generate calling convention wrappers
+ *         for all IPRT functions and VBoxVMM functions that it uses. All exports
+ *         will be wrapped vice versa.
+ *       - For building on windows hosts, we will use a mingw32 hosted cross compiler.
+ *         and add a 'no-crt' mode to IPRT where it provides the necessary CRT headers
+ *         and function implementations.
+ *
+ * The 3rd solution will be tried out first since it requires the least effort and
+ * will let us make use of the full 64-bit register set.
+ *
+ *
+ *
+ * @section sec_vboxrem_amd64_compare   Comparing the GCC and MSC calling conventions
+ *
+ * GCC expects the following (cut & past from page 20 in the ABI draft 0.96):
+ *
+ * @verbatim
+    %rax     temporary register; with variable arguments passes information about the
+             number of SSE registers used; 1st return register.
+             [Not preserved]
+    %rbx     callee-saved register; optionally used as base pointer.
+             [Preserved]
+    %rcx     used to pass 4th integer argument to functions.
+             [Not preserved]
+    %rdx     used to pass 3rd argument to functions; 2nd return register
+             [Not preserved]
+    %rsp     stack pointer
+             [Preserved]
+    %rbp     callee-saved register; optionally used as frame pointer
+             [Preserved]
+    %rsi     used to pass 2nd argument to functions
+             [Not preserved]
+    %rdi     used to pass 1st argument to functions
+             [Not preserved]
+    %r8      used to pass 5th argument to functions
+             [Not preserved]
+    %r9      used to pass 6th argument to functions
+             [Not preserved]
+    %r10     temporary register, used for passing a function's static chain
+             pointer [Not preserved]
+    %r11     temporary register
+             [Not preserved]
+    %r12-r15 callee-saved registers
+             [Preserved]
+    %xmm0-%xmm1  used to pass and return floating point arguments
+             [Not preserved]
+    %xmm2-%xmm7  used to pass floating point arguments
+             [Not preserved]
+    %xmm8-%xmm15 temporary registers
+             [Not preserved]
+    %mmx0-%mmx7  temporary registers
+             [Not preserved]
+    %st0     temporary register; used to return long double arguments
+             [Not preserved]
+    %st1     temporary registers; used to return long double arguments
+             [Not preserved]
+    %st2-%st7 temporary registers
+             [Not preserved]
+    %fs      Reserved for system use (as thread specific data register)
+             [Not preserved]
+   @endverbatim
+ *
+ * Direction flag is preserved as cleared.
+ * The stack must be aligned on a 16-byte boundrary before the 'call/jmp' instruction.
+ *
+ *
+ *
+ * MSC expects the following:
+ * @verbatim
+    rax      return value, not preserved.
+    rbx      preserved.
+    rcx      1st argument, integer, not preserved.
+    rdx      2nd argument, integer, not preserved.
+    rbp      preserved.
+    rsp      preserved.
+    rsi      preserved.
+    rdi      preserved.
+    r8       3rd argument, integer, not preserved.
+    r9       4th argument, integer, not preserved.
+    r10      scratch register, not preserved.
+    r11      scratch register, not preserved.
+    r12-r15  preserved.
+    xmm0     1st argument, fp, return value, not preserved.
+    xmm1     2st argument, fp, not preserved.
+    xmm2     3st argument, fp, not preserved.
+    xmm3     4st argument, fp, not preserved.
+    xmm4-xmm5    scratch, not preserved.
+    xmm6-xmm15   preserved.
+   @endverbatim
+ *
+ * Dunno what the direction flag is...
+ * The stack must be aligned on a 16-byte boundrary before the 'call/jmp' instruction.
+ *
+ *
+ * Thus, When GCC code is calling MSC code we don't really have to preserve
+ * anything. But but MSC code is calling GCC code, we'll have to save esi and edi.
+ *
+ */
+
+
+/*******************************************************************************
+*   Defined Constants And Macros                                               *
+*******************************************************************************/
+/** @def USE_REM_STUBS
+ * Define USE_REM_STUBS to stub the entire REM stuff. This is useful during
+ * early porting (before we start running stuff).
+ */
+#if defined(__DOXYGEN__)
+# define USE_REM_STUBS
+#endif
+
+/** @def USE_REM_CALLING_CONVENTION_GLUE
+ * Define USE_REM_CALLING_CONVENTION_GLUE for platforms where it's necessary to
+ * use calling convention wrappers.
+ */
+#if (defined(RT_ARCH_AMD64) && defined(RT_OS_WINDOWS)) || defined(__DOXYGEN__)
+# define USE_REM_CALLING_CONVENTION_GLUE
+#endif
+
+/** @def USE_REM_IMPORT_JUMP_GLUE
+ * Define USE_REM_IMPORT_JUMP_GLUE for platforms where we need to
+ * emit some jump glue to deal with big addresses.
+ */
+#if (defined(RT_ARCH_AMD64) && !defined(USE_REM_CALLING_CONVENTION_GLUE) && !defined(RT_OS_DARWIN)) || defined(__DOXYGEN__)
+# define USE_REM_IMPORT_JUMP_GLUE
+#endif
+
+
+/*******************************************************************************
+*   Header Files                                                               *
+*******************************************************************************/
+#define LOG_GROUP LOG_GROUP_REM
+#include <VBox/rem.h>
+#include <VBox/vmm.h>
+#include <VBox/dbgf.h>
+#include <VBox/dbg.h>
+#include <VBox/csam.h>
+#include <VBox/mm.h>
+#include <VBox/em.h>
+#include <VBox/ssm.h>
+#include <VBox/hwaccm.h>
+#include <VBox/patm.h>
+#include <VBox/pdm.h>
+#include <VBox/pgm.h>
+#include <VBox/iom.h>
+#include <VBox/vm.h>
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <VBox/dis.h>
+
+#include <iprt/alloc.h>
+#include <iprt/assert.h>
+#include <iprt/ldr.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/string.h>
+#include <iprt/stream.h>
+
+
+/*******************************************************************************
+*   Structures and Typedefs                                                    *
+*******************************************************************************/
+/**
+ * Parameter descriptor.
+ */
+typedef struct REMPARMDESC
+{
+    /** Parameter flags (REMPARMDESC_FLAGS_*). */
+    uint8_t     fFlags;
+    /** The parameter size if REMPARMDESC_FLAGS_SIZE is set. */
+    uint8_t     cb;
+    /** Pointer to additional data.
+     * For REMPARMDESC_FLAGS_PFN this is a PREMFNDESC. */
+    void       *pvExtra;
+
+} REMPARMDESC, *PREMPARMDESC;
+/** Pointer to a constant parameter descriptor. */
+typedef const REMPARMDESC *PCREMPARMDESC;
+
+/** @name Parameter descriptor flags.
+ * @{ */
+/** The parameter type is a kind of integer which could fit in a register. This includes pointers. */
+#define REMPARMDESC_FLAGS_INT           0
+/** The parameter is a GC pointer. */
+#define REMPARMDESC_FLAGS_GCPTR         1
+/** The parameter is a GC physical address. */
+#define REMPARMDESC_FLAGS_GCPHYS        2
+/** The parameter is a HC physical address. */
+#define REMPARMDESC_FLAGS_HCPHYS        3
+/** The parameter type is a kind of floating point. */
+#define REMPARMDESC_FLAGS_FLOAT         4
+/** The parameter value is a struct. This type takes a size. */
+#define REMPARMDESC_FLAGS_STRUCT        5
+/** The parameter is an elipsis. */
+#define REMPARMDESC_FLAGS_ELLIPSIS      6
+/** The parameter is a va_list. */
+#define REMPARMDESC_FLAGS_VALIST        7
+/** The parameter is a function pointer. pvExtra is a PREMFNDESC. */
+#define REMPARMDESC_FLAGS_PFN           8
+/** The parameter type mask. */
+#define REMPARMDESC_FLAGS_TYPE_MASK     15
+/** The parameter size field is valid. */
+#define REMPARMDESC_FLAGS_SIZE          RT_BIT(7)
+/** @} */
+
+/**
+ * Function descriptor.
+ */
+typedef struct REMFNDESC
+{
+    /** The function name. */
+    const char         *pszName;
+    /** Exports: Pointer to the function pointer.
+     * Imports: Pointer to the function. */
+    void               *pv;
+    /** Array of parameter descriptors. */
+    PCREMPARMDESC       paParams;
+    /** The number of parameter descriptors pointed to by paParams. */
+    uint8_t             cParams;
+    /** Function flags (REMFNDESC_FLAGS_*). */
+    uint8_t             fFlags;
+    /** The size of the return value. */
+    uint8_t             cbReturn;
+    /** Pointer to the wrapper code for imports. */
+    void               *pvWrapper;
+} REMFNDESC, *PREMFNDESC;
+/** Pointer to a constant function descriptor. */
+typedef const REMFNDESC *PCREMFNDESC;
+
+/** @name Function descriptor flags.
+ * @{ */
+/** The return type is void. */
+#define REMFNDESC_FLAGS_RET_VOID        0
+/** The return type is a kind of integer passed in rax/eax. This includes pointers. */
+#define REMFNDESC_FLAGS_RET_INT         1
+/** The return type is a kind of floating point. */
+#define REMFNDESC_FLAGS_RET_FLOAT       2
+/** The return value is a struct. This type take a size. */
+#define REMFNDESC_FLAGS_RET_STRUCT      3
+/** The return type mask. */
+#define REMFNDESC_FLAGS_RET_TYPE_MASK   7
+/** The argument list contains one or more va_list arguments (i.e. problems). */
+#define REMFNDESC_FLAGS_VALIST          RT_BIT(6)
+/** The function has an ellipsis (i.e. a problem). */
+#define REMFNDESC_FLAGS_ELLIPSIS        RT_BIT(7)
+/** @} */
+
+/**
+ * Chunk of read-write-executable memory.
+ */
+typedef struct REMEXECMEM
+{
+    /** The number of bytes left. */
+    struct REMEXECMEM  *pNext;
+    /** The size of this chunk. */
+    uint32_t            cb;
+    /** The offset of the next code block. */
+    uint32_t            off;
+#if ARCH_BITS == 32
+    uint32_t            padding;
+#endif
+} REMEXECMEM, *PREMEXECMEM;
+
+
+/*******************************************************************************
+*   Global Variables                                                           *
+*******************************************************************************/
+#ifndef USE_REM_STUBS
+/** Loader handle of the REM object/DLL. */
+static RTLDRMOD g_ModREM2;
+/** Pointer to the memory containing the loaded REM2 object/DLL. */
+static void    *g_pvREM2;
+
+/** Linux object export addresses.
+ * These are references from the assembly wrapper code.
+ * @{ */
+static DECLCALLBACKPTR(int, pfnREMR3Init)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3Term)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3Reset)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3Step)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3BreakpointSet)(PVM, RTGCUINTPTR);
+static DECLCALLBACKPTR(int, pfnREMR3BreakpointClear)(PVM, RTGCUINTPTR);
+static DECLCALLBACKPTR(int, pfnREMR3EmulateInstruction)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3Run)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3State)(PVM, bool fFlushTBs);
+static DECLCALLBACKPTR(int, pfnREMR3StateBack)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3StateUpdate)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3A20Set)(PVM, bool);
+static DECLCALLBACKPTR(void, pfnREMR3ReplayInvalidatedPages)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3ReplayHandlerNotifications)(PVM pVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRamRegister)(PVM, RTGCPHYS, RTUINT, unsigned);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRamChunkRegister)(PVM, RTGCPHYS, RTUINT, RTHCUINTPTR, unsigned);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysReserve)(PVM, RTGCPHYS, RTUINT);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRomRegister)(PVM, RTGCPHYS, RTUINT, void *, bool);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalModify)(PVM, PGMPHYSHANDLERTYPE, RTGCPHYS, RTGCPHYS, RTGCPHYS, bool, bool);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalRegister)(PVM, PGMPHYSHANDLERTYPE, RTGCPHYS, RTGCPHYS, bool);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalDeregister)(PVM, PGMPHYSHANDLERTYPE, RTGCPHYS, RTGCPHYS, bool, bool);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyInterruptSet)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyInterruptClear)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyTimerPending)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyDmaPending)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyQueuePending)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyFF)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3NotifyCodePageChanged)(PVM, RTGCPTR);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyPendingInterrupt)(PVM, uint8_t);
+static DECLCALLBACKPTR(uint32_t, pfnREMR3QueryPendingInterrupt)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3DisasEnableStepping)(PVM, bool);
+static DECLCALLBACKPTR(bool, pfnREMR3IsPageAccessHandled)(PVM, RTGCPHYS);
+/** @} */
+
+/** Export and import parameter descriptors.
+ * @{
+ */
+/* Common args. */
+static const REMPARMDESC g_aArgsSIZE_T[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t) }
+};
+static const REMPARMDESC g_aArgsPTR[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *) }
+};
+static const REMPARMDESC g_aArgsVM[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM) }
+};
+
+/* REM args */
+static const REMPARMDESC g_aArgsBreakpoint[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCUINTPTR), NULL }
+};
+static const REMPARMDESC g_aArgsA20Set[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPhysRamRegister[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTUINT), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPhysRamChunkRegister[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTUINT), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTHCUINTPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPhysReserve[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTUINT), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPhysRomRegister[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTUINT), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyHandlerPhysicalModify[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PGMPHYSHANDLERTYPE), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyHandlerPhysicalRegister[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PGMPHYSHANDLERTYPE), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyHandlerPhysicalDeregister[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PGMPHYSHANDLERTYPE), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyCodePageChanged[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCUINTPTR), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPendingInterrupt[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t), NULL }
+};
+static const REMPARMDESC g_aArgsDisasEnableStepping[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsIsPageAccessHandled[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL }
+};
+
+
+/* VMM args */
+static const REMPARMDESC g_aArgsCPUMGetGuestCpl[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PCPUMCTXCORE), NULL },
+};
+
+static const REMPARMDESC g_aArgsCPUMGetGuestCpuId[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL }
+};
+
+static const REMPARMDESC g_aArgsCPUMSetChangedFlags[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+
+static const REMPARMDESC g_aArgsCPUMQueryGuestCtxPtr[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PCPUMCTX *), NULL }
+};
+static const REMPARMDESC g_aArgsCSAMR3MonitorPage[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTRCPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(CSAMTAG), NULL }
+};
+static const REMPARMDESC g_aArgsCSAMR3UnmonitorPage[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTRCPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(CSAMTAG), NULL }
+};
+
+static const REMPARMDESC g_aArgsCSAMR3RecordCallAddress[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTRCPTR), NULL }
+};
+
+#if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)) /* the callbacks are problematic */
+static const REMPARMDESC g_aArgsDBGCRegisterCommands[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PCDBGCCMD), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL }
+};
+#endif
+static const REMPARMDESC g_aArgsDBGFR3DisasInstrEx[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTSEL), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTGCPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(char *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL }
+};
+static const REMPARMDESC g_aArgsDBGFR3Info[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PCDBGFINFOHLP), NULL }
+};
+static const REMPARMDESC g_aArgsDBGFR3SymbolByAddr[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCUINTPTR), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCINTPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PDBGFSYMBOL), NULL }
+};
+static const REMPARMDESC g_aArgsDISInstr[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTUINTPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(char *), NULL }
+};
+static const REMPARMDESC g_aArgsEMR3FatalError[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(int), NULL }
+};
+static const REMPARMDESC g_aArgsHWACCMR3CanExecuteGuest[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsIOMIOPortRead[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTIOPORT), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsIOMIOPortWrite[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTIOPORT), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsIOMMMIORead[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsIOMMMIOWrite[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsMMR3HeapAlloc[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(MMTAG), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsMMR3HeapAllocZ[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(MMTAG), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsPATMIsPatchGCAddr[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTRCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsPATMR3QueryOpcode[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTRCPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t *), NULL }
+};
+static const REMPARMDESC g_aArgsPATMR3QueryPatchMem[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL }
+};
+static const REMPARMDESC g_aArgsPDMApicGetBase[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t *), NULL }
+};
+static const REMPARMDESC g_aArgsPDMApicGetTPR[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t *), NULL }
+};
+static const REMPARMDESC g_aArgsPDMApicSetBase[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t), NULL }
+};
+static const REMPARMDESC g_aArgsPDMApicSetTPR[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t), NULL }
+};
+static const REMPARMDESC g_aArgsPDMApicWriteMSR[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(VMCPUID), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t), NULL }
+};
+static const REMPARMDESC g_aArgsPDMApicReadMSR[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(VMCPUID), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t *), NULL }
+};
+static const REMPARMDESC g_aArgsPDMGetInterrupt[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t *), NULL }
+};
+static const REMPARMDESC g_aArgsPDMIsaSetIrq[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMGetGuestMode[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+};
+static const REMPARMDESC g_aArgsPGMGstGetPage[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTGCPHYS), NULL }
+};
+static const REMPARMDESC g_aArgsPGMInvalidatePage[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysGCPhys2HCPtr[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTUINT), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTHCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysGCPtr2HCPtrByGstCR3[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTHCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsPGM3PhysGrowRange[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PCRTGCPHYS), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysIsGCPhysValid[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysRead[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysSimpleReadGCPtr[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCPTR), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysWrite[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMChangeMode[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMFlushTLB[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysReadUxx[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysWriteU8[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysWriteU16[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint16_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysWriteU32[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysWriteU64[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPHYS,     sizeof(RTGCPHYS), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint64_t), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3GetGCPtr[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTGCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3GetMem[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3GetU32[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t *), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3GetUInt[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTUINT), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3PutGCPtr[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3PutMem[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3PutU32[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t), NULL },
+};
+static const REMPARMDESC g_aArgsSSMR3PutUInt[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTUINT), NULL },
+};
+
+static const REMPARMDESC g_aArgsSSMIntCallback[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE), NULL },
+};
+static REMFNDESC g_SSMIntCallback =
+{
+    "SSMIntCallback", NULL, &g_aArgsSSMIntCallback[0], RT_ELEMENTS(g_aArgsSSMIntCallback), REMFNDESC_FLAGS_RET_INT, sizeof(int),  NULL
+};
+
+static const REMPARMDESC g_aArgsSSMIntLoadExecCallback[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM),                NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PSSMHANDLE),         NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t),           NULL },
+};
+static REMFNDESC g_SSMIntLoadExecCallback =
+{
+    "SSMIntLoadExecCallback", NULL, &g_aArgsSSMIntLoadExecCallback[0], RT_ELEMENTS(g_aArgsSSMIntLoadExecCallback), REMFNDESC_FLAGS_RET_INT, sizeof(int),  NULL
+};
+static const REMPARMDESC g_aArgsSSMR3RegisterInternal[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM),                NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *),       NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t),           NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint32_t),           NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t),             NULL },
+    { REMPARMDESC_FLAGS_PFN,        sizeof(PFNSSMINTSAVEPREP),  &g_SSMIntCallback },
+    { REMPARMDESC_FLAGS_PFN,        sizeof(PFNSSMINTSAVEEXEC),  &g_SSMIntCallback },
+    { REMPARMDESC_FLAGS_PFN,        sizeof(PFNSSMINTSAVEDONE),  &g_SSMIntCallback },
+    { REMPARMDESC_FLAGS_PFN,        sizeof(PFNSSMINTLOADPREP),  &g_SSMIntCallback },
+    { REMPARMDESC_FLAGS_PFN,        sizeof(PFNSSMINTLOADEXEC),  &g_SSMIntLoadExecCallback },
+    { REMPARMDESC_FLAGS_PFN,        sizeof(PFNSSMINTLOADDONE),  &g_SSMIntCallback },
+};
+
+static const REMPARMDESC g_aArgsSTAMR3Register[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(STAMTYPE), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(STAMVISIBILITY), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(STAMUNIT), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsTRPMAssertTrap[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(TRPMEVENT), NULL }
+};
+static const REMPARMDESC g_aArgsTRPMQueryTrap[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(uint8_t *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(TRPMEVENT *), NULL }
+};
+static const REMPARMDESC g_aArgsTRPMSetErrorCode[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCUINT), NULL }
+};
+static const REMPARMDESC g_aArgsTRPMSetFaultAddress[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_GCPTR,      sizeof(RTGCUINT), NULL }
+};
+static const REMPARMDESC g_aArgsVMR3ReqCall[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVMREQ *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_ELLIPSIS,   0 }
+};
+static const REMPARMDESC g_aArgsVMR3ReqFree[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVMREQ), NULL }
+};
+
+
+/* IPRT args */
+static const REMPARMDESC g_aArgsAssertMsg1[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsAssertMsg2[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_ELLIPSIS,   0 }
+};
+static const REMPARMDESC g_aArgsRTLogFlags[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTLOGGER), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsRTLogFlush[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTLOGGER), NULL }
+};
+static const REMPARMDESC g_aArgsRTLogLoggerEx[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTLOGGER), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_ELLIPSIS,   0 }
+};
+static const REMPARMDESC g_aArgsRTLogLoggerExV[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PRTLOGGER), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_VALIST,     0 }
+};
+static const REMPARMDESC g_aArgsRTLogPrintf[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_ELLIPSIS,   0 }
+};
+static const REMPARMDESC g_aArgsRTMemProtect[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(unsigned), NULL }
+};
+static const REMPARMDESC g_aArgsRTStrPrintf[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(char *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_ELLIPSIS,   0 }
+};
+static const REMPARMDESC g_aArgsRTStrPrintfV[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(char *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const char *), NULL },
+    { REMPARMDESC_FLAGS_VALIST,     0 }
+};
+static const REMPARMDESC g_aArgsThread[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(RTTHREAD), NULL }
+};
+
+
+/* CRT args */
+static const REMPARMDESC g_aArgsmemcpy[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(const  void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsmemset[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(void *), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(int), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsState[] =
+{
+    { REMPARMDESC_FLAGS_INT,        sizeof(PVM), NULL },
+    { REMPARMDESC_FLAGS_INT,        sizeof(bool), NULL }
+};
+
+/** @} */
+
+/**
+ * Descriptors for the exported functions.
+ */
+static const REMFNDESC g_aExports[] =
+{  /* pszName,                                  (void *)pv,                                         pParams,                                    cParams,                                            fFlags,                     cb,             pvWrapper. */
+    { "REMR3Init",                              (void *)&pfnREMR3Init,                              &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3Term",                              (void *)&pfnREMR3Term,                              &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3Reset",                             (void *)&pfnREMR3Reset,                             &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3Step",                              (void *)&pfnREMR3Step,                              &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3BreakpointSet",                     (void *)&pfnREMR3BreakpointSet,                     &g_aArgsBreakpoint[0],                      RT_ELEMENTS(g_aArgsBreakpoint),                        REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3BreakpointClear",                   (void *)&pfnREMR3BreakpointClear,                   &g_aArgsBreakpoint[0],                      RT_ELEMENTS(g_aArgsBreakpoint),                        REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3EmulateInstruction",                (void *)&pfnREMR3EmulateInstruction,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3Run",                               (void *)&pfnREMR3Run,                               &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3State",                             (void *)&pfnREMR3State,                             &g_aArgsState[0],                           RT_ELEMENTS(g_aArgsState),                             REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3StateBack",                         (void *)&pfnREMR3StateBack,                         &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3StateUpdate",                       (void *)&pfnREMR3StateUpdate,                       &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3A20Set",                            (void *)&pfnREMR3A20Set,                            &g_aArgsA20Set[0],                          RT_ELEMENTS(g_aArgsA20Set),                            REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3ReplayInvalidatedPages",            (void *)&pfnREMR3ReplayInvalidatedPages,            &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3ReplayHandlerNotifications",        (void *)&pfnREMR3ReplayHandlerNotifications,        &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyPhysRamRegister",             (void *)&pfnREMR3NotifyPhysRamRegister,             &g_aArgsNotifyPhysRamRegister[0],           RT_ELEMENTS(g_aArgsNotifyPhysRamRegister),             REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyPhysRamChunkRegister",        (void *)&pfnREMR3NotifyPhysRamChunkRegister,        &g_aArgsNotifyPhysRamChunkRegister[0],      RT_ELEMENTS(g_aArgsNotifyPhysRamChunkRegister),        REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyPhysReserve",                 (void *)&pfnREMR3NotifyPhysReserve,                 &g_aArgsNotifyPhysReserve[0],               RT_ELEMENTS(g_aArgsNotifyPhysReserve),                 REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyPhysRomRegister",             (void *)&pfnREMR3NotifyPhysRomRegister,             &g_aArgsNotifyPhysRomRegister[0],           RT_ELEMENTS(g_aArgsNotifyPhysRomRegister),             REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyHandlerPhysicalModify",       (void *)&pfnREMR3NotifyHandlerPhysicalModify,       &g_aArgsNotifyHandlerPhysicalModify[0],     RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalModify),       REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyHandlerPhysicalRegister",     (void *)&pfnREMR3NotifyHandlerPhysicalRegister,     &g_aArgsNotifyHandlerPhysicalRegister[0],   RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalRegister),     REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyHandlerPhysicalDeregister",   (void *)&pfnREMR3NotifyHandlerPhysicalDeregister,   &g_aArgsNotifyHandlerPhysicalDeregister[0], RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalDeregister),   REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyInterruptSet",                (void *)&pfnREMR3NotifyInterruptSet,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyInterruptClear",              (void *)&pfnREMR3NotifyInterruptClear,              &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyTimerPending",                (void *)&pfnREMR3NotifyTimerPending,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyDmaPending",                  (void *)&pfnREMR3NotifyDmaPending,                  &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyQueuePending",                (void *)&pfnREMR3NotifyQueuePending,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyFF",                          (void *)&pfnREMR3NotifyFF,                          &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3NotifyCodePageChanged",             (void *)&pfnREMR3NotifyCodePageChanged,             &g_aArgsNotifyCodePageChanged[0],           RT_ELEMENTS(g_aArgsNotifyCodePageChanged),             REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3NotifyPendingInterrupt",            (void *)&pfnREMR3NotifyPendingInterrupt,            &g_aArgsNotifyPendingInterrupt[0],          RT_ELEMENTS(g_aArgsNotifyPendingInterrupt),            REMFNDESC_FLAGS_RET_VOID,   0,              NULL },
+    { "REMR3QueryPendingInterrupt",             (void *)&pfnREMR3QueryPendingInterrupt,             &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t), NULL },
+    { "REMR3DisasEnableStepping",               (void *)&pfnREMR3DisasEnableStepping,               &g_aArgsDisasEnableStepping[0],             RT_ELEMENTS(g_aArgsDisasEnableStepping),               REMFNDESC_FLAGS_RET_INT,    sizeof(int),    NULL },
+    { "REMR3IsPageAccessHandled",               (void *)&pfnREMR3IsPageAccessHandled,               &g_aArgsIsPageAccessHandled[0],             RT_ELEMENTS(g_aArgsIsPageAccessHandled),               REMFNDESC_FLAGS_RET_INT,    sizeof(bool),   NULL }
+};
+
+
+/**
+ * Descriptors for the functions imported from VBoxVMM.
+ */
+static REMFNDESC g_aVMMImports[] =
+{
+    { "CPUMAreHiddenSelRegsValid",              (void *)(uintptr_t)&CPUMAreHiddenSelRegsValid,      &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(bool),       NULL },
+    { "CPUMGetAndClearChangedFlagsREM",         (void *)(uintptr_t)&CPUMGetAndClearChangedFlagsREM, &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(unsigned),   NULL },
+    { "CPUMSetChangedFlags",                    (void *)(uintptr_t)&CPUMSetChangedFlags,            &g_aArgsCPUMSetChangedFlags[0],             RT_ELEMENTS(g_aArgsCPUMSetChangedFlags),               REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "CPUMGetGuestCPL",                        (void *)(uintptr_t)&CPUMGetGuestCPL,                &g_aArgsCPUMGetGuestCpl[0],                 RT_ELEMENTS(g_aArgsCPUMGetGuestCpl),                   REMFNDESC_FLAGS_RET_INT,    sizeof(unsigned),   NULL },
+    { "CPUMGetGuestCpuId",                      (void *)(uintptr_t)&CPUMGetGuestCpuId,              &g_aArgsCPUMGetGuestCpuId[0],               RT_ELEMENTS(g_aArgsCPUMGetGuestCpuId),                 REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "CPUMGetGuestEAX",                        (void *)(uintptr_t)&CPUMGetGuestEAX,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestEBP",                        (void *)(uintptr_t)&CPUMGetGuestEBP,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestEBX",                        (void *)(uintptr_t)&CPUMGetGuestEBX,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestECX",                        (void *)(uintptr_t)&CPUMGetGuestECX,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestEDI",                        (void *)(uintptr_t)&CPUMGetGuestEDI,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestEDX",                        (void *)(uintptr_t)&CPUMGetGuestEDX,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestEIP",                        (void *)(uintptr_t)&CPUMGetGuestEIP,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestESI",                        (void *)(uintptr_t)&CPUMGetGuestESI,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestESP",                        (void *)(uintptr_t)&CPUMGetGuestESP,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "CPUMGetGuestCS",                         (void *)(uintptr_t)&CPUMGetGuestCS,                 &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(RTSEL),      NULL },
+    { "CPUMGetGuestSS",                         (void *)(uintptr_t)&CPUMGetGuestSS,                 &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(RTSEL),      NULL },
+    { "CPUMQueryGuestCtxPtr",                   (void *)(uintptr_t)&CPUMQueryGuestCtxPtr,           &g_aArgsCPUMQueryGuestCtxPtr[0],            RT_ELEMENTS(g_aArgsCPUMQueryGuestCtxPtr),              REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "CSAMR3MonitorPage",                      (void *)(uintptr_t)&CSAMR3MonitorPage,              &g_aArgsCSAMR3MonitorPage[0],               RT_ELEMENTS(g_aArgsCSAMR3MonitorPage),                 REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "CSAMR3UnmonitorPage",                    (void *)(uintptr_t)&CSAMR3UnmonitorPage,            &g_aArgsCSAMR3UnmonitorPage[0],             RT_ELEMENTS(g_aArgsCSAMR3UnmonitorPage),               REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "CSAMR3RecordCallAddress",                (void *)(uintptr_t)&CSAMR3RecordCallAddress,        &g_aArgsCSAMR3RecordCallAddress[0],         RT_ELEMENTS(g_aArgsCSAMR3RecordCallAddress),           REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+#if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)) /* the callbacks are problematic */
+    { "DBGCRegisterCommands",                   (void *)(uintptr_t)&DBGCRegisterCommands,           &g_aArgsDBGCRegisterCommands[0],            RT_ELEMENTS(g_aArgsDBGCRegisterCommands),              REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+#endif
+    { "DBGFR3DisasInstrEx",                     (void *)(uintptr_t)&DBGFR3DisasInstrEx,             &g_aArgsDBGFR3DisasInstrEx[0],              RT_ELEMENTS(g_aArgsDBGFR3DisasInstrEx),                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "DBGFR3Info",                             (void *)(uintptr_t)&DBGFR3Info,                     &g_aArgsDBGFR3Info[0],                      RT_ELEMENTS(g_aArgsDBGFR3Info),                        REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "DBGFR3InfoLogRelHlp",                    (void *)(uintptr_t)&DBGFR3InfoLogRelHlp,            NULL,                                       0,                                                     REMFNDESC_FLAGS_RET_INT,    sizeof(void *),     NULL },
+    { "DBGFR3SymbolByAddr",                     (void *)(uintptr_t)&DBGFR3SymbolByAddr,             &g_aArgsDBGFR3SymbolByAddr[0],              RT_ELEMENTS(g_aArgsDBGFR3SymbolByAddr),                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "DISInstr",                               (void *)(uintptr_t)&DISInstr,                       &g_aArgsDISInstr[0],                        RT_ELEMENTS(g_aArgsDISInstr),                          REMFNDESC_FLAGS_RET_INT,    sizeof(bool),       NULL },
+    { "EMR3FatalError",                         (void *)(uintptr_t)&EMR3FatalError,                 &g_aArgsEMR3FatalError[0],                  RT_ELEMENTS(g_aArgsEMR3FatalError),                    REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "HWACCMR3CanExecuteGuest",                (void *)(uintptr_t)&HWACCMR3CanExecuteGuest,        &g_aArgsHWACCMR3CanExecuteGuest[0],         RT_ELEMENTS(g_aArgsHWACCMR3CanExecuteGuest),           REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "IOMIOPortRead",                          (void *)(uintptr_t)&IOMIOPortRead,                  &g_aArgsIOMIOPortRead[0],                   RT_ELEMENTS(g_aArgsIOMIOPortRead),                     REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "IOMIOPortWrite",                         (void *)(uintptr_t)&IOMIOPortWrite,                 &g_aArgsIOMIOPortWrite[0],                  RT_ELEMENTS(g_aArgsIOMIOPortWrite),                    REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "IOMMMIORead",                            (void *)(uintptr_t)&IOMMMIORead,                    &g_aArgsIOMMMIORead[0],                     RT_ELEMENTS(g_aArgsIOMMMIORead),                       REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "IOMMMIOWrite",                           (void *)(uintptr_t)&IOMMMIOWrite,                   &g_aArgsIOMMMIOWrite[0],                    RT_ELEMENTS(g_aArgsIOMMMIOWrite),                      REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "MMR3HeapAlloc",                          (void *)(uintptr_t)&MMR3HeapAlloc,                  &g_aArgsMMR3HeapAlloc[0],                   RT_ELEMENTS(g_aArgsMMR3HeapAlloc),                     REMFNDESC_FLAGS_RET_INT,    sizeof(void *),     NULL },
+    { "MMR3HeapAllocZ",                         (void *)(uintptr_t)&MMR3HeapAllocZ,                 &g_aArgsMMR3HeapAllocZ[0],                  RT_ELEMENTS(g_aArgsMMR3HeapAllocZ),                    REMFNDESC_FLAGS_RET_INT,    sizeof(void *),     NULL },
+    { "MMR3PhysGetRamSize",                     (void *)(uintptr_t)&MMR3PhysGetRamSize,             &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint64_t),   NULL },
+    { "PATMIsPatchGCAddr",                      (void *)(uintptr_t)&PATMIsPatchGCAddr,              &g_aArgsPATMIsPatchGCAddr[0],               RT_ELEMENTS(g_aArgsPATMIsPatchGCAddr),                 REMFNDESC_FLAGS_RET_INT,    sizeof(bool),       NULL },
+    { "PATMR3QueryOpcode",                      (void *)(uintptr_t)&PATMR3QueryOpcode,              &g_aArgsPATMR3QueryOpcode[0],               RT_ELEMENTS(g_aArgsPATMR3QueryOpcode),                 REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PATMR3QueryPatchMemGC",                  (void *)(uintptr_t)&PATMR3QueryPatchMemGC,          &g_aArgsPATMR3QueryPatchMem[0],             RT_ELEMENTS(g_aArgsPATMR3QueryPatchMem),               REMFNDESC_FLAGS_RET_INT,    sizeof(RTGCPTR),    NULL },
+    { "PATMR3QueryPatchMemHC",                  (void *)(uintptr_t)&PATMR3QueryPatchMemHC,          &g_aArgsPATMR3QueryPatchMem[0],             RT_ELEMENTS(g_aArgsPATMR3QueryPatchMem),               REMFNDESC_FLAGS_RET_INT,    sizeof(void *),     NULL },
+    { "PDMApicGetBase",                         (void *)(uintptr_t)&PDMApicGetBase,                 &g_aArgsPDMApicGetBase[0],                  RT_ELEMENTS(g_aArgsPDMApicGetBase),                    REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PDMApicGetTPR",                          (void *)(uintptr_t)&PDMApicGetTPR,                  &g_aArgsPDMApicGetTPR[0],                   RT_ELEMENTS(g_aArgsPDMApicGetTPR),                     REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PDMApicSetBase",                         (void *)(uintptr_t)&PDMApicSetBase,                 &g_aArgsPDMApicSetBase[0],                  RT_ELEMENTS(g_aArgsPDMApicSetBase),                    REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PDMApicSetTPR",                          (void *)(uintptr_t)&PDMApicSetTPR,                  &g_aArgsPDMApicSetTPR[0],                   RT_ELEMENTS(g_aArgsPDMApicSetTPR),                     REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PDMApicWriteMSR",                        (void *)(uintptr_t)&PDMApicWriteMSR,                &g_aArgsPDMApicWriteMSR[0],                 RT_ELEMENTS(g_aArgsPDMApicWriteMSR),                   REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PDMApicReadMSR",                         (void *)(uintptr_t)&PDMApicReadMSR,                 &g_aArgsPDMApicReadMSR[0],                  RT_ELEMENTS(g_aArgsPDMApicReadMSR),                    REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PDMR3DmaRun",                            (void *)(uintptr_t)&PDMR3DmaRun,                    &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "PDMGetInterrupt",                        (void *)(uintptr_t)&PDMGetInterrupt,                &g_aArgsPDMGetInterrupt[0],                 RT_ELEMENTS(g_aArgsPDMGetInterrupt),                   REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PDMIsaSetIrq",                           (void *)(uintptr_t)&PDMIsaSetIrq,                   &g_aArgsPDMIsaSetIrq[0],                    RT_ELEMENTS(g_aArgsPDMIsaSetIrq),                      REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMGetGuestMode",                        (void *)(uintptr_t)&PGMGetGuestMode,                &g_aArgsPGMGetGuestMode[0],                 RT_ELEMENTS(g_aArgsPGMGetGuestMode),                   REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMGstGetPage",                          (void *)(uintptr_t)&PGMGstGetPage,                  &g_aArgsPGMGstGetPage[0],                   RT_ELEMENTS(g_aArgsPGMGstGetPage),                     REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMInvalidatePage",                      (void *)(uintptr_t)&PGMInvalidatePage,              &g_aArgsPGMInvalidatePage[0],               RT_ELEMENTS(g_aArgsPGMInvalidatePage),                 REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMPhysGCPhys2HCPtr",                    (void *)(uintptr_t)&PGMPhysGCPhys2HCPtr,            &g_aArgsPGMPhysGCPhys2HCPtr[0],             RT_ELEMENTS(g_aArgsPGMPhysGCPhys2HCPtr),               REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMPhysGCPtr2HCPtrByGstCR3",             (void *)(uintptr_t)&PGMPhysGCPtr2HCPtrByGstCR3,     &g_aArgsPGMPhysGCPtr2HCPtrByGstCR3[0],      RT_ELEMENTS(g_aArgsPGMPhysGCPtr2HCPtrByGstCR3),        REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+#ifndef VBOX_WITH_NEW_PHYS_CODE
+    { "PGM3PhysGrowRange",                      (void *)(uintptr_t)&PGM3PhysGrowRange,              &g_aArgsPGM3PhysGrowRange[0],               RT_ELEMENTS(g_aArgsPGM3PhysGrowRange),                 REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+#endif
+    { "PGMPhysIsGCPhysValid",                   (void *)(uintptr_t)&PGMPhysIsGCPhysValid,           &g_aArgsPGMPhysIsGCPhysValid[0],            RT_ELEMENTS(g_aArgsPGMPhysIsGCPhysValid),              REMFNDESC_FLAGS_RET_INT,    sizeof(bool),       NULL },
+    { "PGMPhysIsA20Enabled",                    (void *)(uintptr_t)&PGMPhysIsA20Enabled,            &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(bool),       NULL },
+    { "PGMPhysRead",                            (void *)(uintptr_t)&PGMPhysRead,                    &g_aArgsPGMPhysRead[0],                     RT_ELEMENTS(g_aArgsPGMPhysRead),                       REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMPhysSimpleReadGCPtr",                 (void *)(uintptr_t)&PGMPhysSimpleReadGCPtr,         &g_aArgsPGMPhysSimpleReadGCPtr[0],          RT_ELEMENTS(g_aArgsPGMPhysSimpleReadGCPtr),            REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMPhysWrite",                           (void *)(uintptr_t)&PGMPhysWrite,                   &g_aArgsPGMPhysWrite[0],                    RT_ELEMENTS(g_aArgsPGMPhysWrite),                      REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "PGMChangeMode",                          (void *)(uintptr_t)&PGMChangeMode,                  &g_aArgsPGMChangeMode[0],                   RT_ELEMENTS(g_aArgsPGMChangeMode),                     REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMFlushTLB",                            (void *)(uintptr_t)&PGMFlushTLB,                    &g_aArgsPGMFlushTLB[0],                     RT_ELEMENTS(g_aArgsPGMFlushTLB),                       REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "PGMR3PhysReadU8",                        (void *)(uintptr_t)&PGMR3PhysReadU8,                &g_aArgsPGMR3PhysReadUxx[0],                RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx),                  REMFNDESC_FLAGS_RET_INT,    sizeof(uint8_t),    NULL },
+    { "PGMR3PhysReadU16",                       (void *)(uintptr_t)&PGMR3PhysReadU16,               &g_aArgsPGMR3PhysReadUxx[0],                RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx),                  REMFNDESC_FLAGS_RET_INT,    sizeof(uint16_t),   NULL },
+    { "PGMR3PhysReadU32",                       (void *)(uintptr_t)&PGMR3PhysReadU32,               &g_aArgsPGMR3PhysReadUxx[0],                RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx),                  REMFNDESC_FLAGS_RET_INT,    sizeof(uint32_t),   NULL },
+    { "PGMR3PhysReadU64",                       (void *)(uintptr_t)&PGMR3PhysReadU64,               &g_aArgsPGMR3PhysReadUxx[0],                RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx),                  REMFNDESC_FLAGS_RET_INT,    sizeof(uint64_t),   NULL },
+    { "PGMR3PhysWriteU8",                       (void *)(uintptr_t)&PGMR3PhysWriteU8,               &g_aArgsPGMR3PhysWriteU8[0],                RT_ELEMENTS(g_aArgsPGMR3PhysWriteU8),                  REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "PGMR3PhysWriteU16",                      (void *)(uintptr_t)&PGMR3PhysWriteU16,              &g_aArgsPGMR3PhysWriteU16[0],               RT_ELEMENTS(g_aArgsPGMR3PhysWriteU16),                 REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "PGMR3PhysWriteU32",                      (void *)(uintptr_t)&PGMR3PhysWriteU32,              &g_aArgsPGMR3PhysWriteU32[0],               RT_ELEMENTS(g_aArgsPGMR3PhysWriteU32),                 REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "PGMR3PhysWriteU64",                      (void *)(uintptr_t)&PGMR3PhysWriteU64,              &g_aArgsPGMR3PhysWriteU64[0],               RT_ELEMENTS(g_aArgsPGMR3PhysWriteU32),                 REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "SSMR3GetGCPtr",                          (void *)(uintptr_t)&SSMR3GetGCPtr,                  &g_aArgsSSMR3GetGCPtr[0],                   RT_ELEMENTS(g_aArgsSSMR3GetGCPtr),                     REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "SSMR3GetMem",                            (void *)(uintptr_t)&SSMR3GetMem,                    &g_aArgsSSMR3GetMem[0],                     RT_ELEMENTS(g_aArgsSSMR3GetMem),                       REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "SSMR3GetU32",                            (void *)(uintptr_t)&SSMR3GetU32,                    &g_aArgsSSMR3GetU32[0],                     RT_ELEMENTS(g_aArgsSSMR3GetU32),                       REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "SSMR3GetUInt",                           (void *)(uintptr_t)&SSMR3GetUInt,                   &g_aArgsSSMR3GetUInt[0],                    RT_ELEMENTS(g_aArgsSSMR3GetUInt),                      REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "SSMR3PutGCPtr",                          (void *)(uintptr_t)&SSMR3PutGCPtr,                  &g_aArgsSSMR3PutGCPtr[0],                   RT_ELEMENTS(g_aArgsSSMR3PutGCPtr),                     REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "SSMR3PutMem",                            (void *)(uintptr_t)&SSMR3PutMem,                    &g_aArgsSSMR3PutMem[0],                     RT_ELEMENTS(g_aArgsSSMR3PutMem),                       REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "SSMR3PutU32",                            (void *)(uintptr_t)&SSMR3PutU32,                    &g_aArgsSSMR3PutU32[0],                     RT_ELEMENTS(g_aArgsSSMR3PutU32),                       REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "SSMR3PutUInt",                           (void *)(uintptr_t)&SSMR3PutUInt,                   &g_aArgsSSMR3PutUInt[0],                    RT_ELEMENTS(g_aArgsSSMR3PutUInt),                      REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "SSMR3RegisterInternal",                  (void *)(uintptr_t)&SSMR3RegisterInternal,          &g_aArgsSSMR3RegisterInternal[0],           RT_ELEMENTS(g_aArgsSSMR3RegisterInternal),             REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "STAMR3Register",                         (void *)(uintptr_t)&STAMR3Register,                 &g_aArgsSTAMR3Register[0],                  RT_ELEMENTS(g_aArgsSTAMR3Register),                    REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "TMCpuTickGet",                           (void *)(uintptr_t)&TMCpuTickGet,                   &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint64_t),   NULL },
+    { "TMCpuTickPause",                         (void *)(uintptr_t)&TMCpuTickPause,                 &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "TMCpuTickResume",                        (void *)(uintptr_t)&TMCpuTickResume,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "TMNotifyEndOfExecution",                 (void *)(uintptr_t)&TMNotifyEndOfExecution,         &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "TMNotifyStartOfExecution",               (void *)(uintptr_t)&TMNotifyStartOfExecution,       &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "TMTimerPoll",                            (void *)(uintptr_t)&TMTimerPoll,                    &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(uint64_t),   NULL },
+    { "TMR3TimerQueuesDo",                      (void *)(uintptr_t)&TMR3TimerQueuesDo,              &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "TMVirtualPause",                         (void *)(uintptr_t)&TMVirtualPause,                 &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "TMVirtualResume",                        (void *)(uintptr_t)&TMVirtualResume,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "TRPMAssertTrap",                         (void *)(uintptr_t)&TRPMAssertTrap,                 &g_aArgsTRPMAssertTrap[0],                  RT_ELEMENTS(g_aArgsTRPMAssertTrap),                    REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "TRPMGetErrorCode",                       (void *)(uintptr_t)&TRPMGetErrorCode,               &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(RTGCUINT),   NULL },
+    { "TRPMGetFaultAddress",                    (void *)(uintptr_t)&TRPMGetFaultAddress,            &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(RTGCUINTPTR),NULL },
+    { "TRPMQueryTrap",                          (void *)(uintptr_t)&TRPMQueryTrap,                  &g_aArgsTRPMQueryTrap[0],                   RT_ELEMENTS(g_aArgsTRPMQueryTrap),                     REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "TRPMResetTrap",                          (void *)(uintptr_t)&TRPMResetTrap,                  &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "TRPMSetErrorCode",                       (void *)(uintptr_t)&TRPMSetErrorCode,               &g_aArgsTRPMSetErrorCode[0],                RT_ELEMENTS(g_aArgsTRPMSetErrorCode),                  REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "TRPMSetFaultAddress",                    (void *)(uintptr_t)&TRPMSetFaultAddress,            &g_aArgsTRPMSetFaultAddress[0],             RT_ELEMENTS(g_aArgsTRPMSetFaultAddress),               REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "VMMR3Lock",                              (void *)(uintptr_t)&VMMR3Lock,                      &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "VMMR3Unlock",                            (void *)(uintptr_t)&VMMR3Unlock,                    &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "VMR3ReqCall",                            (void *)(uintptr_t)&VMR3ReqCall,                    &g_aArgsVMR3ReqCall[0],                     RT_ELEMENTS(g_aArgsVMR3ReqCall),                       REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "VMR3ReqFree",                            (void *)(uintptr_t)&VMR3ReqFree,                    &g_aArgsVMR3ReqFree[0],                     RT_ELEMENTS(g_aArgsVMR3ReqFree),                       REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_ELLIPSIS, sizeof(int), NULL },
+//    { "",                        (void *)(uintptr_t)&,                &g_aArgsVM[0],                              RT_ELEMENTS(g_aArgsVM),                                REMFNDESC_FLAGS_RET_INT,    sizeof(int),   NULL },
+};
+
+
+/**
+ * Descriptors for the functions imported from VBoxRT.
+ */
+static REMFNDESC g_aRTImports[] =
+{
+    { "AssertMsg1",                             (void *)(uintptr_t)&AssertMsg1,                     &g_aArgsAssertMsg1[0],                      RT_ELEMENTS(g_aArgsAssertMsg1),                        REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "AssertMsg2",                             (void *)(uintptr_t)&AssertMsg2,                     &g_aArgsAssertMsg2[0],                      RT_ELEMENTS(g_aArgsAssertMsg2),                        REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_ELLIPSIS, 0, NULL },
+    { "RTAssertDoBreakpoint",                   (void *)(uintptr_t)&RTAssertDoBreakpoint,           NULL,                                       0,                                                     REMFNDESC_FLAGS_RET_INT,    sizeof(bool),       NULL },
+    { "RTLogDefaultInstance",                   (void *)(uintptr_t)&RTLogDefaultInstance,           NULL,                                       0,                                                     REMFNDESC_FLAGS_RET_INT,    sizeof(PRTLOGGER),  NULL },
+    { "RTLogRelDefaultInstance",                (void *)(uintptr_t)&RTLogRelDefaultInstance,        NULL,                                       0,                                                     REMFNDESC_FLAGS_RET_INT,    sizeof(PRTLOGGER),  NULL },
+    { "RTLogFlags",                             (void *)(uintptr_t)&RTLogFlags,                     &g_aArgsRTLogFlags[0],                      RT_ELEMENTS(g_aArgsRTLogFlags),                        REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "RTLogFlush",                             (void *)(uintptr_t)&RTLogFlush,                     &g_aArgsRTLogFlush[0],                      RT_ELEMENTS(g_aArgsRTLogFlush),                        REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "RTLogLoggerEx",                          (void *)(uintptr_t)&RTLogLoggerEx,                  &g_aArgsRTLogLoggerEx[0],                   RT_ELEMENTS(g_aArgsRTLogLoggerEx),                     REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_ELLIPSIS, 0, NULL },
+    { "RTLogLoggerExV",                         (void *)(uintptr_t)&RTLogLoggerExV,                 &g_aArgsRTLogLoggerExV[0],                  RT_ELEMENTS(g_aArgsRTLogLoggerExV),                    REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_VALIST, 0, NULL },
+    { "RTLogPrintf",                            (void *)(uintptr_t)&RTLogPrintf,                    &g_aArgsRTLogPrintf[0],                     RT_ELEMENTS(g_aArgsRTLogPrintf),                       REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "RTMemAlloc",                             (void *)(uintptr_t)&RTMemAlloc,                     &g_aArgsSIZE_T[0],                          RT_ELEMENTS(g_aArgsSIZE_T),                            REMFNDESC_FLAGS_RET_INT,    sizeof(void *),     NULL },
+    { "RTMemExecAlloc",                         (void *)(uintptr_t)&RTMemExecAlloc,                 &g_aArgsSIZE_T[0],                          RT_ELEMENTS(g_aArgsSIZE_T),                            REMFNDESC_FLAGS_RET_INT,    sizeof(void *),     NULL },
+    { "RTMemExecFree",                          (void *)(uintptr_t)&RTMemExecFree,                  &g_aArgsPTR[0],                             RT_ELEMENTS(g_aArgsPTR),                               REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "RTMemFree",                              (void *)(uintptr_t)&RTMemFree,                      &g_aArgsPTR[0],                             RT_ELEMENTS(g_aArgsPTR),                               REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "RTMemPageAlloc",                         (void *)(uintptr_t)&RTMemPageAlloc,                 &g_aArgsSIZE_T[0],                          RT_ELEMENTS(g_aArgsSIZE_T),                            REMFNDESC_FLAGS_RET_INT,    sizeof(void *),     NULL },
+    { "RTMemPageFree",                          (void *)(uintptr_t)&RTMemPageFree,                  &g_aArgsPTR[0],                             RT_ELEMENTS(g_aArgsPTR),                               REMFNDESC_FLAGS_RET_VOID,   0,                  NULL },
+    { "RTMemProtect",                           (void *)(uintptr_t)&RTMemProtect,                   &g_aArgsRTMemProtect[0],                    RT_ELEMENTS(g_aArgsRTMemProtect),                      REMFNDESC_FLAGS_RET_INT,    sizeof(int),        NULL },
+    { "RTStrPrintf",                            (void *)(uintptr_t)&RTStrPrintf,                    &g_aArgsRTStrPrintf[0],                     RT_ELEMENTS(g_aArgsRTStrPrintf),                       REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_ELLIPSIS, sizeof(size_t), NULL },
+    { "RTStrPrintfV",                           (void *)(uintptr_t)&RTStrPrintfV,                   &g_aArgsRTStrPrintfV[0],                    RT_ELEMENTS(g_aArgsRTStrPrintfV),                      REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_VALIST, sizeof(size_t), NULL },
+    { "RTThreadSelf",                           (void *)(uintptr_t)&RTThreadSelf,                   NULL,                                       0,                                                     REMFNDESC_FLAGS_RET_INT,    sizeof(RTTHREAD),    NULL },
+    { "RTThreadNativeSelf",                     (void *)(uintptr_t)&RTThreadNativeSelf,             NULL,                                       0,                                                     REMFNDESC_FLAGS_RET_INT, sizeof(RTNATIVETHREAD), NULL },
+    { "RTThreadGetWriteLockCount",              (void *)(uintptr_t)&RTThreadGetWriteLockCount,      &g_aArgsThread[0],                          0,                                                     REMFNDESC_FLAGS_RET_INT,    sizeof(int32_t),     NULL },
+};
+
+
+/**
+ * Descriptors for the functions imported from VBoxRT.
+ */
+static REMFNDESC g_aCRTImports[] =
+{
+    { "memcpy",                                (void *)(uintptr_t)&memcpy,                          &g_aArgsmemcpy[0],                          RT_ELEMENTS(g_aArgsmemcpy),                            REMFNDESC_FLAGS_RET_INT,    sizeof(void *), NULL },
+    { "memset",                                (void *)(uintptr_t)&memset,                          &g_aArgsmemset[0],                          RT_ELEMENTS(g_aArgsmemset),                            REMFNDESC_FLAGS_RET_INT,    sizeof(void *), NULL }
+/*
+floor               floor
+memcpy              memcpy
+sqrt                sqrt
+sqrtf               sqrtf
+*/
+};
+
+
+# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE)
+/** LIFO of read-write-executable memory chunks used for wrappers. */
+static PREMEXECMEM g_pExecMemHead;
+# endif
+
+
+/*******************************************************************************
+*   Internal Functions                                                         *
+*******************************************************************************/
+static int remGenerateExportGlue(PRTUINTPTR pValue, PCREMFNDESC pDesc);
+
+# ifdef USE_REM_CALLING_CONVENTION_GLUE
+DECLASM(int) WrapGCC2MSC0Int(void);  DECLASM(int) WrapGCC2MSC0Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC1Int(void);  DECLASM(int) WrapGCC2MSC1Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC2Int(void);  DECLASM(int) WrapGCC2MSC2Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC3Int(void);  DECLASM(int) WrapGCC2MSC3Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC4Int(void);  DECLASM(int) WrapGCC2MSC4Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC5Int(void);  DECLASM(int) WrapGCC2MSC5Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC6Int(void);  DECLASM(int) WrapGCC2MSC6Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC7Int(void);  DECLASM(int) WrapGCC2MSC7Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC8Int(void);  DECLASM(int) WrapGCC2MSC8Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC9Int(void);  DECLASM(int) WrapGCC2MSC9Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC10Int(void); DECLASM(int) WrapGCC2MSC10Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC11Int(void); DECLASM(int) WrapGCC2MSC11Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC12Int(void); DECLASM(int) WrapGCC2MSC12Int_EndProc(void);
+DECLASM(int) WrapGCC2MSCVariadictInt(void); DECLASM(int) WrapGCC2MSCVariadictInt_EndProc(void);
+DECLASM(int) WrapGCC2MSC_SSMR3RegisterInternal(void); DECLASM(int) WrapGCC2MSC_SSMR3RegisterInternal_EndProc(void);
+
+DECLASM(int) WrapMSC2GCC0Int(void);  DECLASM(int) WrapMSC2GCC0Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC1Int(void);  DECLASM(int) WrapMSC2GCC1Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC2Int(void);  DECLASM(int) WrapMSC2GCC2Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC3Int(void);  DECLASM(int) WrapMSC2GCC3Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC4Int(void);  DECLASM(int) WrapMSC2GCC4Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC5Int(void);  DECLASM(int) WrapMSC2GCC5Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC6Int(void);  DECLASM(int) WrapMSC2GCC6Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC7Int(void);  DECLASM(int) WrapMSC2GCC7Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC8Int(void);  DECLASM(int) WrapMSC2GCC8Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC9Int(void);  DECLASM(int) WrapMSC2GCC9Int_EndProc(void);
+# endif
+
+
+# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE)
+/**
+ * Allocates a block of memory for glue code.
+ *
+ * The returned memory is padded with INT3s.
+ *
+ * @returns Pointer to the allocated memory.
+ * @param   The amount of memory to allocate.
+ */
+static void *remAllocGlue(size_t cb)
+{
+    PREMEXECMEM pCur = g_pExecMemHead;
+    uint32_t cbAligned = (uint32_t)RT_ALIGN_32(cb, 32);
+    while (pCur)
+    {
+        if (pCur->cb - pCur->off >= cbAligned)
+        {
+            void *pv = (uint8_t *)pCur + pCur->off;
+            pCur->off += cbAligned;
+            return memset(pv, 0xcc, cbAligned);
+        }
+        pCur = pCur->pNext;
+    }
+
+    /* add a new chunk */
+    AssertReturn(_64K - RT_ALIGN_Z(sizeof(*pCur), 32) > cbAligned, NULL);
+    pCur = (PREMEXECMEM)RTMemExecAlloc(_64K);
+    AssertReturn(pCur, NULL);
+    pCur->cb = _64K;
+    pCur->off = RT_ALIGN_32(sizeof(*pCur), 32) + cbAligned;
+    pCur->pNext = g_pExecMemHead;
+    g_pExecMemHead = pCur;
+    return memset((uint8_t *)pCur + RT_ALIGN_Z(sizeof(*pCur), 32), 0xcc, cbAligned);
+}
+# endif /* USE_REM_CALLING_CONVENTION_GLUE || USE_REM_IMPORT_JUMP_GLUE */
+
+
+# ifdef USE_REM_CALLING_CONVENTION_GLUE
+/**
+ * Checks if a function is all straight forward integers.
+ *
+ * @returns True if it's simple, false if it's bothersome.
+ * @param   pDesc       The function descriptor.
+ */
+static bool remIsFunctionAllInts(PCREMFNDESC pDesc)
+{
+    if (    (   (pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) != REMFNDESC_FLAGS_RET_INT
+             || pDesc->cbReturn > sizeof(uint64_t))
+        &&  (pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) != REMFNDESC_FLAGS_RET_VOID)
+        return false;
+    unsigned i = pDesc->cParams;
+    while (i-- > 0)
+        switch (pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK)
+        {
+            case REMPARMDESC_FLAGS_INT:
+            case REMPARMDESC_FLAGS_GCPTR:
+            case REMPARMDESC_FLAGS_GCPHYS:
+            case REMPARMDESC_FLAGS_HCPHYS:
+                break;
+
+            default:
+                AssertReleaseMsgFailed(("Invalid param flags %#x for #%d of %s!\n", pDesc->paParams[i].fFlags, i, pDesc->pszName));
+            case REMPARMDESC_FLAGS_VALIST:
+            case REMPARMDESC_FLAGS_ELLIPSIS:
+            case REMPARMDESC_FLAGS_FLOAT:
+            case REMPARMDESC_FLAGS_STRUCT:
+            case REMPARMDESC_FLAGS_PFN:
+                return false;
+        }
+    return true;
+}
+
+
+/**
+ * Checks if the function has an ellipsis (...) argument.
+ *
+ * @returns true if it has an ellipsis, otherwise false.
+ * @param   pDesc       The function descriptor.
+ */
+static bool remHasFunctionEllipsis(PCREMFNDESC pDesc)
+{
+    unsigned i = pDesc->cParams;
+    while (i-- > 0)
+        if ((pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK) == REMPARMDESC_FLAGS_ELLIPSIS)
+            return true;
+    return false;
+}
+
+
+/**
+ * Checks if the function uses floating point (FP) arguments or return value.
+ *
+ * @returns true if it uses floating point, otherwise false.
+ * @param   pDesc       The function descriptor.
+ */
+static bool remIsFunctionUsingFP(PCREMFNDESC pDesc)
+{
+    if ((pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) == REMFNDESC_FLAGS_RET_FLOAT)
+        return true;
+    unsigned i = pDesc->cParams;
+    while (i-- > 0)
+        if ((pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK) == REMPARMDESC_FLAGS_FLOAT)
+            return true;
+    return false;
+}
+
+
+/** @name The export and import fixups.
+ * @{ */
+#define REM_FIXUP_32_REAL_STUFF    UINT32_C(0xdeadbeef)
+#define REM_FIXUP_64_REAL_STUFF    UINT64_C(0xdeadf00df00ddead)
+#define REM_FIXUP_64_DESC          UINT64_C(0xdead00010001dead)
+#define REM_FIXUP_64_LOG_ENTRY     UINT64_C(0xdead00020002dead)
+#define REM_FIXUP_64_LOG_EXIT      UINT64_C(0xdead00030003dead)
+#define REM_FIXUP_64_WRAP_GCC_CB   UINT64_C(0xdead00040004dead)
+/** @} */
+
+
+/**
+ * Entry logger function.
+ *
+ * @param   pDesc       The description.
+ */
+DECLASM(void) remLogEntry(PCREMFNDESC pDesc)
+{
+    RTPrintf("calling %s\n", pDesc->pszName);
+}
+
+
+/**
+ * Exit logger function.
+ *
+ * @param   pDesc       The description.
+ * @param   pvRet       The return code.
+ */
+DECLASM(void) remLogExit(PCREMFNDESC pDesc, void *pvRet)
+{
+    RTPrintf("returning %p from %s\n", pvRet, pDesc->pszName);
+}
+
+
+/**
+ * Creates a wrapper for the specified callback function at run time.
+ *
+ * @param   pDesc       The function descriptor.
+ * @param   pValue      Upon entry *pValue contains the address of the function to be wrapped.
+ *                      Upon return *pValue contains the address of the wrapper glue function.
+ * @param   iParam      The parameter index in the function descriptor (0 based).
+ *                      If UINT32_MAX pDesc is the descriptor for *pValue.
+ */
+DECLASM(void) remWrapGCCCallback(PCREMFNDESC pDesc, PRTUINTPTR pValue, uint32_t iParam)
+{
+    AssertPtr(pDesc);
+    AssertPtr(pValue);
+
+    /*
+     * Simple?
+     */
+    if (!*pValue)
+        return;
+
+    /*
+     * Locate the right function descriptor.
+     */
+    if (iParam != UINT32_MAX)
+    {
+        AssertRelease(iParam < pDesc->cParams);
+        pDesc = (PCREMFNDESC)pDesc->paParams[iParam].pvExtra;
+        AssertPtr(pDesc);
+    }
+
+    /*
+     * When we get serious, here is where to insert the hash table lookup.
+     */
+
+    /*
+     * Create a new glue patch.
+     */
+#ifdef RT_OS_WINDOWS
+    int rc = remGenerateExportGlue(pValue, pDesc);
+#else
+#error "port me"
+#endif
+    AssertReleaseRC(rc);
+
+    /*
+     * Add it to the hash (later)
+     */
+}
+
+
+/**
+ * Fixes export glue.
+ *
+ * @param   pvGlue      The glue code.
+ * @param   cb          The size of the glue code.
+ * @param   pvExport    The address of the export we're wrapping.
+ * @param   pDesc       The export descriptor.
+ */
+static void remGenerateExportGlueFixup(void *pvGlue, size_t cb, uintptr_t uExport, PCREMFNDESC pDesc)
+{
+    union
+    {
+        uint8_t  *pu8;
+        int32_t  *pi32;
+        uint32_t *pu32;
+        uint64_t *pu64;
+        void     *pv;
+    } u;
+    u.pv = pvGlue;
+
+    while (cb >= 4)
+    {
+        /** @todo add defines for the fixup constants... */
+        if (*u.pu32 == REM_FIXUP_32_REAL_STUFF)
+        {
+            /* 32-bit rel jmp/call to real export. */
+            *u.pi32 = uExport - (uintptr_t)(u.pi32 + 1);
+            Assert((uintptr_t)(u.pi32 + 1) + *u.pi32 == uExport);
+            u.pi32++;
+            cb -= 4;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_REAL_STUFF)
+        {
+            /* 64-bit address to the real export. */
+            *u.pu64++ = uExport;
+            cb -= 8;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_DESC)
+        {
+            /* 64-bit address to the descriptor. */
+            *u.pu64++ = (uintptr_t)pDesc;
+            cb -= 8;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_WRAP_GCC_CB)
+        {
+            /* 64-bit address to the entry logger function. */
+            *u.pu64++ = (uintptr_t)remWrapGCCCallback;
+            cb -= 8;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_ENTRY)
+        {
+            /* 64-bit address to the entry logger function. */
+            *u.pu64++ = (uintptr_t)remLogEntry;
+            cb -= 8;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_EXIT)
+        {
+            /* 64-bit address to the entry logger function. */
+            *u.pu64++ = (uintptr_t)remLogExit;
+            cb -= 8;
+            continue;
+        }
+
+        /* move on. */
+        u.pu8++;
+        cb--;
+    }
+}
+
+
+/**
+ * Fixes import glue.
+ *
+ * @param   pvGlue  The glue code.
+ * @param   cb      The size of the glue code.
+ * @param   pDesc   The import descriptor.
+ */
+static void remGenerateImportGlueFixup(void *pvGlue, size_t cb, PCREMFNDESC pDesc)
+{
+    union
+    {
+        uint8_t  *pu8;
+        int32_t  *pi32;
+        uint32_t *pu32;
+        uint64_t *pu64;
+        void     *pv;
+    } u;
+    u.pv = pvGlue;
+
+    while (cb >= 4)
+    {
+        if (*u.pu32 == REM_FIXUP_32_REAL_STUFF)
+        {
+            /* 32-bit rel jmp/call to real function. */
+            *u.pi32 = (uintptr_t)pDesc->pv - (uintptr_t)(u.pi32 + 1);
+            Assert((uintptr_t)(u.pi32 + 1) + *u.pi32 == (uintptr_t)pDesc->pv);
+            u.pi32++;
+            cb -= 4;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_REAL_STUFF)
+        {
+            /* 64-bit address to the real function. */
+            *u.pu64++ = (uintptr_t)pDesc->pv;
+            cb -= 8;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_DESC)
+        {
+            /* 64-bit address to the descriptor. */
+            *u.pu64++ = (uintptr_t)pDesc;
+            cb -= 8;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_WRAP_GCC_CB)
+        {
+            /* 64-bit address to the entry logger function. */
+            *u.pu64++ = (uintptr_t)remWrapGCCCallback;
+            cb -= 8;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_ENTRY)
+        {
+            /* 64-bit address to the entry logger function. */
+            *u.pu64++ = (uintptr_t)remLogEntry;
+            cb -= 8;
+            continue;
+        }
+        if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_EXIT)
+        {
+            /* 64-bit address to the entry logger function. */
+            *u.pu64++ = (uintptr_t)remLogExit;
+            cb -= 8;
+            continue;
+        }
+
+        /* move on. */
+        u.pu8++;
+        cb--;
+    }
+}
+
+# endif /* USE_REM_CALLING_CONVENTION_GLUE */
+
+
+/**
+ * Generate wrapper glue code for an export.
+ *
+ * This is only used on win64 when loading a 64-bit linux module. So, on other
+ * platforms it will not do anything.
+ *
+ * @returns VBox status code.
+ * @param   pValue      IN: Where to get the address of the function to wrap.
+ *                      OUT: Where to store the glue address.
+ * @param   pDesc       The export descriptor.
+ */
+static int remGenerateExportGlue(PRTUINTPTR pValue, PCREMFNDESC pDesc)
+{
+# ifdef USE_REM_CALLING_CONVENTION_GLUE
+    uintptr_t *ppfn = (uintptr_t *)pDesc->pv;
+
+    uintptr_t pfn = 0; /* a little hack for the callback glue */
+    if (!ppfn)
+        ppfn = &pfn;
+
+    if (!*ppfn)
+    {
+        if (remIsFunctionAllInts(pDesc))
+        {
+            static const struct { void *pvStart, *pvEnd; } s_aTemplates[] =
+            {
+                { (void *)&WrapMSC2GCC0Int,  (void *)&WrapMSC2GCC0Int_EndProc },
+                { (void *)&WrapMSC2GCC1Int,  (void *)&WrapMSC2GCC1Int_EndProc },
+                { (void *)&WrapMSC2GCC2Int,  (void *)&WrapMSC2GCC2Int_EndProc },
+                { (void *)&WrapMSC2GCC3Int,  (void *)&WrapMSC2GCC3Int_EndProc },
+                { (void *)&WrapMSC2GCC4Int,  (void *)&WrapMSC2GCC4Int_EndProc },
+                { (void *)&WrapMSC2GCC5Int,  (void *)&WrapMSC2GCC5Int_EndProc },
+                { (void *)&WrapMSC2GCC6Int,  (void *)&WrapMSC2GCC6Int_EndProc },
+                { (void *)&WrapMSC2GCC7Int,  (void *)&WrapMSC2GCC7Int_EndProc },
+                { (void *)&WrapMSC2GCC8Int,  (void *)&WrapMSC2GCC8Int_EndProc },
+                { (void *)&WrapMSC2GCC9Int,  (void *)&WrapMSC2GCC9Int_EndProc },
+            };
+            const unsigned i = pDesc->cParams;
+            AssertReleaseMsg(i < RT_ELEMENTS(s_aTemplates), ("%d (%s)\n", i, pDesc->pszName));
+
+            /* duplicate the patch. */
+            const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart;
+            uint8_t *pb = (uint8_t *)remAllocGlue(cb);
+            AssertReturn(pb, VERR_NO_MEMORY);
+            memcpy(pb, s_aTemplates[i].pvStart, cb);
+
+            /* fix it up. */
+            remGenerateExportGlueFixup(pb, cb, *pValue, pDesc);
+            *ppfn = (uintptr_t)pb;
+        }
+        else
+        {
+            /* custom hacks - it's simpler to make assembly templates than writing a more generic code generator... */
+            static const struct { const char *pszName; PFNRT pvStart, pvEnd; } s_aTemplates[] =
+            {
+                { "somefunction",  (PFNRT)&WrapMSC2GCC9Int,  (PFNRT)&WrapMSC2GCC9Int_EndProc },
+            };
+            unsigned i;
+            for (i = 0; i < RT_ELEMENTS(s_aTemplates); i++)
+                if (!strcmp(pDesc->pszName, s_aTemplates[i].pszName))
+                    break;
+            AssertReleaseMsgReturn(i < RT_ELEMENTS(s_aTemplates), ("Not implemented! %s\n", pDesc->pszName), VERR_NOT_IMPLEMENTED);
+
+            /* duplicate the patch. */
+            const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart;
+            uint8_t *pb = (uint8_t *)remAllocGlue(cb);
+            AssertReturn(pb, VERR_NO_MEMORY);
+            memcpy(pb, s_aTemplates[i].pvStart, cb);
+
+            /* fix it up. */
+            remGenerateExportGlueFixup(pb, cb, *pValue, pDesc);
+            *ppfn = (uintptr_t)pb;
+        }
+    }
+    *pValue = *ppfn;
+    return VINF_SUCCESS;
+# else  /* !USE_REM_CALLING_CONVENTION_GLUE */
+    return VINF_SUCCESS;
+# endif /* !USE_REM_CALLING_CONVENTION_GLUE */
+}
+
+
+/**
+ * Generate wrapper glue code for an import.
+ *
+ * This is only used on win64 when loading a 64-bit linux module. So, on other
+ * platforms it will simply return the address of the imported function
+ * without generating any glue code.
+ *
+ * @returns VBox status code.
+ * @param   pValue      Where to store the glue address.
+ * @param   pDesc       The export descriptor.
+ */
+static int remGenerateImportGlue(PRTUINTPTR pValue, PREMFNDESC pDesc)
+{
+# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE)
+    if (!pDesc->pvWrapper)
+    {
+#  ifdef USE_REM_CALLING_CONVENTION_GLUE
+        if (remIsFunctionAllInts(pDesc))
+        {
+            static const struct { void *pvStart, *pvEnd; } s_aTemplates[] =
+            {
+                { (void *)&WrapGCC2MSC0Int,  (void *)&WrapGCC2MSC0Int_EndProc },
+                { (void *)&WrapGCC2MSC1Int,  (void *)&WrapGCC2MSC1Int_EndProc },
+                { (void *)&WrapGCC2MSC2Int,  (void *)&WrapGCC2MSC2Int_EndProc },
+                { (void *)&WrapGCC2MSC3Int,  (void *)&WrapGCC2MSC3Int_EndProc },
+                { (void *)&WrapGCC2MSC4Int,  (void *)&WrapGCC2MSC4Int_EndProc },
+                { (void *)&WrapGCC2MSC5Int,  (void *)&WrapGCC2MSC5Int_EndProc },
+                { (void *)&WrapGCC2MSC6Int,  (void *)&WrapGCC2MSC6Int_EndProc },
+                { (void *)&WrapGCC2MSC7Int,  (void *)&WrapGCC2MSC7Int_EndProc },
+                { (void *)&WrapGCC2MSC8Int,  (void *)&WrapGCC2MSC8Int_EndProc },
+                { (void *)&WrapGCC2MSC9Int,  (void *)&WrapGCC2MSC9Int_EndProc },
+                { (void *)&WrapGCC2MSC10Int, (void *)&WrapGCC2MSC10Int_EndProc },
+                { (void *)&WrapGCC2MSC11Int, (void *)&WrapGCC2MSC11Int_EndProc },
+                { (void *)&WrapGCC2MSC12Int, (void *)&WrapGCC2MSC12Int_EndProc }
+            };
+            const unsigned i = pDesc->cParams;
+            AssertReleaseMsg(i < RT_ELEMENTS(s_aTemplates), ("%d (%s)\n", i, pDesc->pszName));
+
+            /* duplicate the patch. */
+            const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart;
+            pDesc->pvWrapper = remAllocGlue(cb);
+            AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+            memcpy(pDesc->pvWrapper, s_aTemplates[i].pvStart, cb);
+
+            /* fix it up. */
+            remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc);
+        }
+        else if (   remHasFunctionEllipsis(pDesc)
+                 && !remIsFunctionUsingFP(pDesc))
+        {
+            /* duplicate the patch. */
+            const size_t cb = (uintptr_t)&WrapGCC2MSCVariadictInt_EndProc - (uintptr_t)&WrapGCC2MSCVariadictInt;
+            pDesc->pvWrapper = remAllocGlue(cb);
+            AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+            memcpy(pDesc->pvWrapper, (void *)&WrapGCC2MSCVariadictInt, cb);
+
+            /* fix it up. */
+            remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc);
+        }
+        else
+        {
+            /* custom hacks - it's simpler to make assembly templates than writing a more generic code generator... */
+            static const struct { const char *pszName; PFNRT pvStart, pvEnd; } s_aTemplates[] =
+            {
+                { "SSMR3RegisterInternal",  (PFNRT)&WrapGCC2MSC_SSMR3RegisterInternal,  (PFNRT)&WrapGCC2MSC_SSMR3RegisterInternal_EndProc },
+            };
+            unsigned i;
+            for (i = 0; i < RT_ELEMENTS(s_aTemplates); i++)
+                if (!strcmp(pDesc->pszName, s_aTemplates[i].pszName))
+                    break;
+            AssertReleaseMsgReturn(i < RT_ELEMENTS(s_aTemplates), ("Not implemented! %s\n", pDesc->pszName), VERR_NOT_IMPLEMENTED);
+
+            /* duplicate the patch. */
+            const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart;
+            pDesc->pvWrapper = remAllocGlue(cb);
+            AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+            memcpy(pDesc->pvWrapper, s_aTemplates[i].pvStart, cb);
+
+            /* fix it up. */
+            remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc);
+        }
+#  else  /* !USE_REM_CALLING_CONVENTION_GLUE */
+
+        /*
+         * Generate a jump patch.
+         */
+        uint8_t *pb;
+#   ifdef RT_ARCH_AMD64
+        pDesc->pvWrapper = pb = (uint8_t *)remAllocGlue(32);
+        AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+        /**pb++ = 0xcc;*/
+        *pb++ = 0xff;
+        *pb++ = 0x24;
+        *pb++ = 0x25;
+        *(uint32_t *)pb = (uintptr_t)pb + 5;
+        pb += 5;
+        *(uint64_t *)pb = (uint64_t)pDesc->pv;
+#   else
+        pDesc->pvWrapper = pb = (uint8_t *)remAllocGlue(8);
+        AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+        *pb++ = 0xea;
+        *(uint32_t *)pb = (uint32_t)pDesc->pv;
+#   endif
+#  endif /* !USE_REM_CALLING_CONVENTION_GLUE */
+    }
+    *pValue = (uintptr_t)pDesc->pvWrapper;
+# else  /* !USE_REM_CALLING_CONVENTION_GLUE */
+    *pValue = (uintptr_t)pDesc->pv;
+# endif /* !USE_REM_CALLING_CONVENTION_GLUE */
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Resolve an external symbol during RTLdrGetBits().
+ *
+ * @returns iprt status code.
+ * @param   hLdrMod         The loader module handle.
+ * @param   pszModule       Module name.
+ * @param   pszSymbol       Symbol name, NULL if uSymbol should be used.
+ * @param   uSymbol         Symbol ordinal, ~0 if pszSymbol should be used.
+ * @param   pValue          Where to store the symbol value (address).
+ * @param   pvUser          User argument.
+ */
+static DECLCALLBACK(int) remGetImport(RTLDRMOD hLdrMod, const char *pszModule, const char *pszSymbol, unsigned uSymbol, RTUINTPTR *pValue, void *pvUser)
+{
+    unsigned i;
+    for (i = 0; i < RT_ELEMENTS(g_aVMMImports); i++)
+        if (!strcmp(g_aVMMImports[i].pszName, pszSymbol))
+            return remGenerateImportGlue(pValue, &g_aVMMImports[i]);
+    for (i = 0; i < RT_ELEMENTS(g_aRTImports); i++)
+        if (!strcmp(g_aRTImports[i].pszName, pszSymbol))
+            return remGenerateImportGlue(pValue, &g_aRTImports[i]);
+    for (i = 0; i < RT_ELEMENTS(g_aCRTImports); i++)
+        if (!strcmp(g_aCRTImports[i].pszName, pszSymbol))
+            return remGenerateImportGlue(pValue, &g_aCRTImports[i]);
+    LogRel(("Missing REM Import: %s\n", pszSymbol));
+#if 1
+    *pValue = 0;
+    AssertMsgFailed(("%s.%s\n", pszModule, pszSymbol));
+    return VERR_SYMBOL_NOT_FOUND;
+#else
+    return remGenerateImportGlue(pValue, &g_aCRTImports[0]);
+#endif
+}
+
+/**
+ * Loads the linux object, resolves all imports and exports.
+ *
+ * @returns VBox status code.
+ */
+static int remLoadLinuxObj(void)
+{
+    size_t  offFilename;
+    char    szPath[RTPATH_MAX];
+    int rc = RTPathAppPrivateArch(szPath, sizeof(szPath) - 32);
+    AssertRCReturn(rc, rc);
+    offFilename = strlen(szPath);
+
+    /*
+     * Load the VBoxREM2.rel object/DLL.
+     */
+    strcpy(&szPath[offFilename], "/VBoxREM2.rel");
+    rc = RTLdrOpen(szPath, &g_ModREM2);
+    if (VBOX_SUCCESS(rc))
+    {
+        g_pvREM2 = RTMemExecAlloc(RTLdrSize(g_ModREM2));
+        if (g_pvREM2)
+        {
+#ifdef DEBUG /* How to load the VBoxREM2.rel symbols into the GNU debugger. */
+            RTPrintf("VBoxREMWrapper: (gdb) add-symbol-file %s 0x%p\n", szPath, g_pvREM2);
+#endif
+            LogRel(("REM: Loading %s at 0x%p (%d bytes)\n"
+                    "REM: (gdb) add-symbol-file %s 0x%p\n",
+                    szPath, g_pvREM2, RTLdrSize(g_ModREM2), szPath, g_pvREM2));
+            rc = RTLdrGetBits(g_ModREM2, g_pvREM2, (RTUINTPTR)g_pvREM2, remGetImport, NULL);
+            if (VBOX_SUCCESS(rc))
+            {
+                /*
+                 * Resolve exports.
+                 */
+                unsigned i;
+                for (i = 0; i < RT_ELEMENTS(g_aExports); i++)
+                {
+                    RTUINTPTR Value;
+                    rc = RTLdrGetSymbolEx(g_ModREM2, g_pvREM2, (RTUINTPTR)g_pvREM2, g_aExports[i].pszName, &Value);
+                    AssertMsgRC(rc, ("%s rc=%Vrc\n", g_aExports[i].pszName, rc));
+                    if (VBOX_FAILURE(rc))
+                        break;
+                    rc = remGenerateExportGlue(&Value, &g_aExports[i]);
+                    if (VBOX_FAILURE(rc))
+                        break;
+                    *(void **)g_aExports[i].pv = (void *)(uintptr_t)Value;
+                }
+                return rc;
+            }
+            RTMemExecFree(g_pvREM2);
+        }
+        RTLdrClose(g_ModREM2);
+        g_ModREM2 = NIL_RTLDRMOD;
+    }
+    LogRel(("REM: failed loading '%s', rc=%Vrc\n", szPath, rc));
+    return rc;
+}
+
+
+/**
+ * Unloads the linux object, freeing up all resources (dlls and
+ * import glue) we allocated during remLoadLinuxObj().
+ */
+static void remUnloadLinuxObj(void)
+{
+    unsigned i;
+
+    /* close modules. */
+    RTLdrClose(g_ModREM2);
+    g_ModREM2 = NIL_RTLDRMOD;
+    RTMemExecFree(g_pvREM2);
+    g_pvREM2 = NULL;
+
+    /* clear the pointers. */
+    for (i = 0; i < RT_ELEMENTS(g_aExports); i++)
+        *(void **)g_aExports[i].pv = NULL;
+# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE)
+    for (i = 0; i < RT_ELEMENTS(g_aVMMImports); i++)
+        g_aVMMImports[i].pvWrapper = NULL;
+    for (i = 0; i < RT_ELEMENTS(g_aRTImports); i++)
+        g_aRTImports[i].pvWrapper = NULL;
+    for (i = 0; i < RT_ELEMENTS(g_aCRTImports); i++)
+        g_aCRTImports[i].pvWrapper = NULL;
+
+    /* free wrapper memory. */
+    while (g_pExecMemHead)
+    {
+        PREMEXECMEM pCur = g_pExecMemHead;
+        g_pExecMemHead = pCur->pNext;
+        memset(pCur, 0xcc, pCur->cb);
+        RTMemExecFree(pCur);
+    }
+# endif
+}
+#endif
+
+
+REMR3DECL(int) REMR3Init(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+    return VINF_SUCCESS;
+#else
+    if (!pfnREMR3Init)
+    {
+        int rc = remLoadLinuxObj();
+        if (VBOX_FAILURE(rc))
+            return rc;
+    }
+    return pfnREMR3Init(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3Term(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+    return VINF_SUCCESS;
+#else
+    int rc;
+    Assert(VALID_PTR(pfnREMR3Term));
+    rc = pfnREMR3Term(pVM);
+    remUnloadLinuxObj();
+    return rc;
+#endif
+}
+
+REMR3DECL(void) REMR3Reset(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3Reset));
+    pfnREMR3Reset(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3Step(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+    return VERR_NOT_IMPLEMENTED;
+#else
+    Assert(VALID_PTR(pfnREMR3Step));
+    return pfnREMR3Step(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3BreakpointSet(PVM pVM, RTGCUINTPTR Address)
+{
+#ifdef USE_REM_STUBS
+    return VERR_REM_NO_MORE_BP_SLOTS;
+#else
+    Assert(VALID_PTR(pfnREMR3BreakpointSet));
+    return pfnREMR3BreakpointSet(pVM, Address);
+#endif
+}
+
+REMR3DECL(int) REMR3BreakpointClear(PVM pVM, RTGCUINTPTR Address)
+{
+#ifdef USE_REM_STUBS
+    return VERR_NOT_IMPLEMENTED;
+#else
+    Assert(VALID_PTR(pfnREMR3BreakpointClear));
+    return pfnREMR3BreakpointClear(pVM, Address);
+#endif
+}
+
+REMR3DECL(int) REMR3EmulateInstruction(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+    return VERR_NOT_IMPLEMENTED;
+#else
+    Assert(VALID_PTR(pfnREMR3EmulateInstruction));
+    return pfnREMR3EmulateInstruction(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3Run(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+    return VERR_NOT_IMPLEMENTED;
+#else
+    Assert(VALID_PTR(pfnREMR3Run));
+    return pfnREMR3Run(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3State(PVM pVM, bool fFlushTBs)
+{
+#ifdef USE_REM_STUBS
+    return VERR_NOT_IMPLEMENTED;
+#else
+    Assert(VALID_PTR(pfnREMR3State));
+    return pfnREMR3State(pVM, fFlushTBs);
+#endif
+}
+
+REMR3DECL(int) REMR3StateBack(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+    return VERR_NOT_IMPLEMENTED;
+#else
+    Assert(VALID_PTR(pfnREMR3StateBack));
+    return pfnREMR3StateBack(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3StateUpdate(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3StateUpdate));
+    pfnREMR3StateUpdate(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3A20Set(PVM pVM, bool fEnable)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3A20Set));
+    pfnREMR3A20Set(pVM, fEnable);
+#endif
+}
+
+REMR3DECL(void) REMR3ReplayInvalidatedPages(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3ReplayInvalidatedPages));
+    pfnREMR3ReplayInvalidatedPages(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3ReplayHandlerNotifications(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3ReplayHandlerNotifications));
+    pfnREMR3ReplayHandlerNotifications(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3NotifyCodePageChanged(PVM pVM, RTGCPTR pvCodePage)
+{
+#ifdef USE_REM_STUBS
+    return VINF_SUCCESS;
+#else
+    Assert(VALID_PTR(pfnREMR3NotifyCodePageChanged));
+    return pfnREMR3NotifyCodePageChanged(pVM, pvCodePage);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, unsigned fFlags)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyPhysRamRegister));
+    pfnREMR3NotifyPhysRamRegister(pVM, GCPhys, cb, fFlags);
+#endif
+}
+
+#ifndef VBOX_WITH_NEW_PHYS_CODE
+REMR3DECL(void) REMR3NotifyPhysRamChunkRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, RTHCUINTPTR pvRam, unsigned fFlags)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyPhysRamChunkRegister));
+    pfnREMR3NotifyPhysRamChunkRegister(pVM, GCPhys, cb, pvRam, fFlags);
+#endif
+}
+#endif
+
+REMR3DECL(void) REMR3NotifyPhysRomRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, void *pvCopy, bool fShadow)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyPhysRomRegister));
+    pfnREMR3NotifyPhysRomRegister(pVM, GCPhys, cb, pvCopy, fShadow);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyPhysReserve(PVM pVM, RTGCPHYS GCPhys, RTUINT cb)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyPhysReserve));
+    pfnREMR3NotifyPhysReserve(pVM, GCPhys, cb);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERTYPE enmType, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalRegister));
+    pfnREMR3NotifyHandlerPhysicalRegister(pVM, enmType, GCPhys, cb, fHasHCHandler);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERTYPE enmType, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalDeregister));
+    pfnREMR3NotifyHandlerPhysicalDeregister(pVM, enmType, GCPhys, cb, fHasHCHandler, fRestoreAsRAM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERTYPE enmType, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalModify));
+    pfnREMR3NotifyHandlerPhysicalModify(pVM, enmType, GCPhysOld, GCPhysNew, cb, fHasHCHandler, fRestoreAsRAM);
+#endif
+}
+
+REMR3DECL(bool) REMR3IsPageAccessHandled(PVM pVM, RTGCPHYS GCPhys)
+{
+#ifdef USE_REM_STUBS
+    return false;
+#else
+    Assert(VALID_PTR(pfnREMR3IsPageAccessHandled));
+    return pfnREMR3IsPageAccessHandled(pVM, GCPhys);
+#endif
+}
+
+REMR3DECL(int) REMR3DisasEnableStepping(PVM pVM, bool fEnable)
+{
+#ifdef USE_REM_STUBS
+    return VERR_NOT_IMPLEMENTED;
+#else
+    Assert(VALID_PTR(pfnREMR3DisasEnableStepping));
+    return pfnREMR3DisasEnableStepping(pVM, fEnable);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyPendingInterrupt(PVM pVM, uint8_t u8Interrupt)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyPendingInterrupt));
+    pfnREMR3NotifyPendingInterrupt(pVM, u8Interrupt);
+#endif
+}
+
+REMR3DECL(uint32_t) REMR3QueryPendingInterrupt(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+    return REM_NO_PENDING_IRQ;
+#else
+    Assert(VALID_PTR(pfnREMR3QueryPendingInterrupt));
+    return pfnREMR3QueryPendingInterrupt(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyInterruptSet(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyInterruptSet));
+    pfnREMR3NotifyInterruptSet(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyInterruptClear(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyInterruptClear));
+    pfnREMR3NotifyInterruptClear(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyTimerPending(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyTimerPending));
+    pfnREMR3NotifyTimerPending(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyDmaPending(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyDmaPending));
+    pfnREMR3NotifyDmaPending(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyQueuePending(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    Assert(VALID_PTR(pfnREMR3NotifyQueuePending));
+    pfnREMR3NotifyQueuePending(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyFF(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+    /* the timer can call this early on, so don't be picky. */
+    if (pfnREMR3NotifyFF)
+        pfnREMR3NotifyFF(pVM);
+#endif
+}
+
Index: /trunk/src/recompiler_new/VBoxREMWrapperA.asm
===================================================================
--- /trunk/src/recompiler_new/VBoxREMWrapperA.asm	(revision 13168)
+++ /trunk/src/recompiler_new/VBoxREMWrapperA.asm	(revision 13168)
@@ -0,0 +1,887 @@
+; $Id$
+;; @file
+;
+; VBoxREM Wrapper, Assembly routines and wrapper Templates.
+;
+; Copyright (C) 2006-2007 Sun Microsystems, Inc.
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+; Clara, CA 95054 USA or visit http://www.sun.com if you need
+; additional information or have any questions.
+;
+
+
+
+
+;*******************************************************************************
+;*  Header Files                                                               *
+;*******************************************************************************
+%include "iprt/asmdefs.mac"
+
+%define REM_FIXUP_32_REAL_STUFF    0deadbeefh
+%define REM_FIXUP_64_REAL_STUFF    0deadf00df00ddeadh
+%define REM_FIXUP_64_DESC          0dead00010001deadh
+%define REM_FIXUP_64_LOG_ENTRY     0dead00020002deadh
+%define REM_FIXUP_64_LOG_EXIT      0dead00030003deadh
+%define REM_FIXUP_64_WRAP_GCC_CB   0dead00040004deadh
+
+;%define ENTRY_LOGGING   1
+;%define EXIT_LOGGING    1
+
+
+%ifdef RT_ARCH_AMD64
+ ;;
+ ; 64-bit pushad
+ %macro MY_PUSHAQ 0
+    push    rax
+    push    rbx
+    push    rcx
+    push    rdx
+    push    rsi
+    push    rdi
+    push    rbp
+    push    r8
+    push    r9
+    push    r10
+    push    r11
+    push    r12
+    push    r13
+    push    r14
+    push    r15
+ %endmacro
+
+ ;;
+ ; 64-bit popad
+ %macro MY_POPAQ 0
+    pop     r15
+    pop     r14
+    pop     r13
+    pop     r12
+    pop     r11
+    pop     r10
+    pop     r9
+    pop     r8
+    pop     rbp
+    pop     rdi
+    pop     rsi
+    pop     rdx
+    pop     rcx
+    pop     rbx
+    pop     rax
+ %endmacro
+
+ ;;
+ ; Entry logging
+ %ifdef ENTRY_LOGGING
+  %macro LOG_ENTRY 0
+    MY_PUSHAQ
+    push    rbp
+    mov     rbp, rsp
+    and     rsp, ~0fh
+    sub     rsp, 20h                    ; shadow space
+
+   %ifdef RT_OS_WINDOWS
+    mov     rcx, REM_FIXUP_64_DESC
+   %else
+    mov     rdi, REM_FIXUP_64_DESC
+   %endif
+    mov     rax, REM_FIXUP_64_LOG_ENTRY
+    call    rax
+
+    leave
+    MY_POPAQ
+  %endmacro
+ %else
+  %define LOG_ENTRY
+ %endif
+
+ ;;
+ ; Exit logging
+ %ifdef EXIT_LOGGING
+  %macro LOG_EXIT 0
+    MY_PUSHAQ
+    push    rbp
+    mov     rbp, rsp
+    and     rsp, ~0fh
+    sub     rsp, 20h                    ; shadow space
+
+   %ifdef RT_OS_WINDOWS
+    mov     rdx, rax
+    mov     rcx, REM_FIXUP_64_DESC
+   %else
+    mov     rsi, eax
+    mov     rdi, REM_FIXUP_64_DESC
+   %endif
+    mov     rax, REM_FIXUP_64_LOG_EXIT
+    call    rax
+
+    leave
+    MY_POPAQ
+  %endmacro
+ %else
+  %define LOG_EXIT
+ %endif
+
+%else
+ %define LOG_ENTRY
+ %define LOG_EXIT
+%endif
+
+
+BEGINCODE
+
+%ifdef RT_OS_WINDOWS
+ %ifdef RT_ARCH_AMD64
+
+
+BEGINPROC WrapGCC2MSC0Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC0Int
+
+
+BEGINPROC WrapGCC2MSC1Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC1Int
+
+
+BEGINPROC WrapGCC2MSC2Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC2Int
+
+
+BEGINPROC WrapGCC2MSC3Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC3Int
+
+
+BEGINPROC WrapGCC2MSC4Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC4Int
+
+
+BEGINPROC WrapGCC2MSC5Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 30h
+
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC5Int
+
+
+BEGINPROC WrapGCC2MSC6Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 30h
+
+    mov     [rsp + 28h], r9
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC6Int
+
+
+BEGINPROC WrapGCC2MSC7Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 40h
+
+    mov     r11, [ebp + 10h]
+    mov     [rsp + 30h], r11
+    mov     [rsp + 28h], r9
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC7Int
+
+
+BEGINPROC WrapGCC2MSC8Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 40h
+
+    mov     r10, [ebp + 18h]
+    mov     [rsp + 38h], r10
+    mov     r11, [ebp + 10h]
+    mov     [rsp + 30h], r11
+    mov     [rsp + 28h], r9
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC8Int
+
+
+BEGINPROC WrapGCC2MSC9Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 50h
+
+    mov     rax, [ebp + 20h]
+    mov     [rsp + 40h], rax
+    mov     r10, [ebp + 18h]
+    mov     [rsp + 38h], r10
+    mov     r11, [ebp + 10h]
+    mov     [rsp + 30h], r11
+    mov     [rsp + 28h], r9
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC9Int
+
+
+BEGINPROC WrapGCC2MSC10Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 50h
+
+    mov     r11, [ebp + 28h]
+    mov     [rsp + 48h], r11
+    mov     rax, [ebp + 20h]
+    mov     [rsp + 40h], rax
+    mov     r10, [ebp + 18h]
+    mov     [rsp + 38h], r10
+    mov     r11, [ebp + 10h]
+    mov     [rsp + 30h], r11
+    mov     [rsp + 28h], r9
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC10Int
+
+
+BEGINPROC WrapGCC2MSC11Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 60h
+
+    mov     r10, [ebp + 30h]
+    mov     [rsp + 50h], r10
+    mov     r11, [ebp + 28h]
+    mov     [rsp + 48h], r11
+    mov     rax, [ebp + 20h]
+    mov     [rsp + 40h], rax
+    mov     r10, [ebp + 18h]
+    mov     [rsp + 38h], r10
+    mov     r11, [ebp + 10h]
+    mov     [rsp + 30h], r11
+    mov     [rsp + 28h], r9
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC11Int
+
+
+BEGINPROC WrapGCC2MSC12Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 60h
+
+    mov     rax, [ebp + 28h]
+    mov     [rsp + 48h], rax
+    mov     r10, [ebp + 30h]
+    mov     [rsp + 50h], r10
+    mov     r11, [ebp + 28h]
+    mov     [rsp + 48h], r11
+    mov     rax, [ebp + 20h]
+    mov     [rsp + 40h], rax
+    mov     r10, [ebp + 18h]
+    mov     [rsp + 38h], r10
+    mov     r11, [ebp + 10h]
+    mov     [rsp + 30h], r11
+    mov     [rsp + 28h], r9
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     r8, rdx
+    mov     rdx, rsi
+    mov     rcx, rdi
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC12Int
+
+
+
+BEGINPROC WrapGCC2MSCVariadictInt
+    LOG_ENTRY
+%ifdef DEBUG
+    ; check that there are NO floting point arguments in XMM registers!
+    or      rax, rax
+    jz      .ok
+    int3
+.ok:
+%endif
+    sub     rsp, 28h
+    mov     r11, [rsp + 28h]            ; r11 = return address.
+    mov     [rsp + 28h], r9
+    mov     [rsp + 20h], r8
+    mov     r9, rcx
+    mov     [rsp + 18h], r9             ; (*)
+    mov     r8, rdx
+    mov     [rsp + 14h], r8             ; (*)
+    mov     rdx, rsi
+    mov     [rsp + 8h], rdx             ; (*)
+    mov     rcx, rdi
+    mov     [rsp], rcx                  ; (*)
+    mov     rsi, r11                    ; rsi is preserved by the callee.
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    add     rsp, 30h
+    LOG_EXIT
+    jmp     rsi
+    ; (*) unconditionally spill the registers, just in case '...' implies weird stuff on MSC. Check this out!
+ENDPROC WrapGCC2MSCVariadictInt
+
+
+;;
+; Custom template for SSMR3RegisterInternal.
+;
+; (This is based on the WrapGCC2MSC11Int template.)
+;
+; @cproto
+;
+; SSMR3DECL(int) SSMR3RegisterInternal(PVM pVM, const char *pszName, uint32_t u32Instance, uint32_t u32Version, size_t cbGuess,
+;    PFNSSMINTSAVEPREP pfnSavePrep, PFNSSMINTSAVEEXEC pfnSaveExec, PFNSSMINTSAVEDONE pfnSaveDone,
+;    PFNSSMINTLOADPREP pfnLoadPrep, PFNSSMINTLOADEXEC pfnLoadExec, PFNSSMINTLOADDONE pfnLoadDone);
+;
+; @param    pVM             rdi              0
+; @param    pszName         rsi              1
+; @param    u32Instance     rdx              2
+; @param    u32Version      rcx              3
+; @param    cbGuess         r8               4
+; @param    pfnSavePrep     r9               5
+; @param    pfnSaveExec     rbp + 10h        6
+; @param    pfnSaveDone     rbp + 18h        7
+; @param    pfnLoadPrep     rbp + 20h        8
+; @param    pfnLoadExec     rbp + 28h        9
+; @param    pfnLoadDone     rbp + 30h       10
+;
+BEGINPROC WrapGCC2MSC_SSMR3RegisterInternal
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+
+    sub     rsp, 60h
+
+    mov     r10, [ebp + 30h]
+    mov     [rsp + 50h], r10            ; pfnLoadDone
+    mov     r11, [ebp + 28h]
+    mov     [rsp + 48h], r11            ; pfnLoadExec
+    mov     rax, [ebp + 20h]
+    mov     [rsp + 40h], rax            ; pfnLoadPrep
+    mov     r10, [ebp + 18h]
+    mov     [rsp + 38h], r10            ; pfnSaveDone
+    mov     r11, [ebp + 10h]
+    mov     [rsp + 30h], r11            ; pfnSaveExec
+    mov     [rsp + 28h], r9             ; pfnSavePrep
+    mov     [rsp + 20h], r8
+    mov     [rsp + 18h], rcx            ; -> r9
+    mov     [rsp + 10h], rdx            ; -> r8
+    mov     [rsp + 08h], rsi            ; -> rdx
+    mov     [rsp], rdi                  ; -> rcx
+
+    ; Now convert the function pointers. Have to setup a new shadow
+    ; space here since the SSMR3RegisterInternal one is already in use.
+    sub     rsp, 20h
+
+    mov     rcx, REM_FIXUP_64_DESC      ; pDesc
+    lea     rdx, [rsp + 28h + 20h]      ; pValue
+    mov     r8d, 5                      ; iParam
+    mov     rax, REM_FIXUP_64_WRAP_GCC_CB
+    call    rax
+
+    mov     rcx, REM_FIXUP_64_DESC      ; pDesc
+    lea     rdx, [rsp + 30h + 20h]      ; pValue
+    mov     r8d, 6                      ; iParam
+    mov     rax, REM_FIXUP_64_WRAP_GCC_CB
+    call    rax
+
+    mov     rcx, REM_FIXUP_64_DESC      ; pDesc
+    lea     rdx, [rsp + 38h + 20h]      ; pValue
+    mov     r8d, 7                      ; iParam
+    mov     rax, REM_FIXUP_64_WRAP_GCC_CB
+    call    rax
+
+    mov     rcx, REM_FIXUP_64_DESC      ; pDesc
+    lea     rdx, [rsp + 40h + 20h]      ; pValue
+    mov     r8d, 8                      ; iParam
+    mov     rax, REM_FIXUP_64_WRAP_GCC_CB
+    call    rax
+
+    mov     rcx, REM_FIXUP_64_DESC      ; pDesc
+    lea     rdx, [rsp + 48h + 20h]      ; pValue
+    mov     r8d, 9                      ; iParam
+    mov     rax, REM_FIXUP_64_WRAP_GCC_CB
+    call    rax
+
+    mov     rcx, REM_FIXUP_64_DESC      ; pDesc
+    lea     rdx, [rsp + 50h + 20h]      ; pValue
+    mov     r8d, 10                     ; iParam
+    mov     rax, REM_FIXUP_64_WRAP_GCC_CB
+    call    rax
+
+    add     rsp, 20h
+
+    ; finally do the call.
+    mov     r9,  [rsp + 18h]
+    mov     r8,  [rsp + 10h]
+    mov     rdx, [rsp + 08h]
+    mov     rcx, [rsp]
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapGCC2MSC_SSMR3RegisterInternal
+
+
+;
+; The other way around:
+;
+
+
+BEGINPROC WrapMSC2GCC0Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 10h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC0Int
+
+
+BEGINPROC WrapMSC2GCC1Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC1Int
+
+
+BEGINPROC WrapMSC2GCC2Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+    mov     rsi, rdx
+%ifdef USE_DIRECT_CALLS
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+%else
+    mov     rax, REM_FIXUP_64_REAL_STUFF
+    call    rax
+%endif
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC2Int
+
+
+BEGINPROC WrapMSC2GCC3Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+    mov     rsi, rdx
+    mov     rdx, r8
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC3Int
+
+
+BEGINPROC WrapMSC2GCC4Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+    mov     rsi, rdx
+    mov     rdx, r8
+    mov     rcx, r9
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC4Int
+
+
+BEGINPROC WrapMSC2GCC5Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+    mov     rsi, rdx
+    mov     rdx, r8
+    mov     rcx, r9
+    mov     r8, [ebp + 30h]
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC5Int
+
+
+BEGINPROC WrapMSC2GCC6Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 20h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+    mov     rsi, rdx
+    mov     rdx, r8
+    mov     rcx, r9
+    mov     r8, [ebp + 30h]
+    mov     r9, [ebp + 38h]
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC6Int
+
+
+BEGINPROC WrapMSC2GCC7Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 30h
+    mov     [ebp - 10h], rsi ;; @todo wtf is this using ebp instead of rbp?
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+    mov     rsi, rdx
+    mov     rdx, r8
+    mov     rcx, r9
+    mov     r8, [ebp + 30h]
+    mov     r9, [ebp + 38h]
+    mov     r10, [ebp + 40h]
+    mov     [esp], r10
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC7Int
+
+
+BEGINPROC WrapMSC2GCC8Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 30h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+    mov     rsi, rdx
+    mov     rdx, r8
+    mov     rcx, r9
+    mov     r8, [ebp + 30h]
+    mov     r9, [ebp + 38h]
+    mov     r10, [ebp + 40h]
+    mov     [esp], r10
+    mov     r11, [ebp + 48h]
+    mov     [esp + 8], r11
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC8Int
+
+
+BEGINPROC WrapMSC2GCC9Int
+    LOG_ENTRY
+    push    rbp
+    mov     rbp, rsp
+    sub     rsp, 40h
+    mov     [ebp - 10h], rsi
+    mov     [ebp - 18h], rdi
+
+    mov     rdi, rcx
+    mov     rsi, rdx
+    mov     rdx, r8
+    mov     rcx, r9
+    mov     r8, [ebp + 30h]
+    mov     r9, [ebp + 38h]
+    mov     r10, [ebp + 40h]
+    mov     [esp], r10
+    mov     r11, [ebp + 48h]
+    mov     [esp + 8], r11
+    mov     rax, [ebp + 50h]
+    mov     [esp + 10h], rax
+    call    $+5+REM_FIXUP_32_REAL_STUFF
+
+    mov     rdi, [ebp - 18h]
+    mov     rsi, [ebp - 10h]
+    leave
+    LOG_EXIT
+    ret
+ENDPROC WrapMSC2GCC9Int
+
+ %endif ; RT_ARCH_AMD64
+%endif ; RT_OS_WINDOWS
+
Index: /trunk/src/recompiler_new/VBoxRecompiler.c
===================================================================
--- /trunk/src/recompiler_new/VBoxRecompiler.c	(revision 13168)
+++ /trunk/src/recompiler_new/VBoxRecompiler.c	(revision 13168)
@@ -0,0 +1,4981 @@
+/* $Id$ */
+/** @file
+ * VBox Recompiler - QEMU.
+ */
+
+/*
+ * Copyright (C) 2006-2007 Sun Microsystems, Inc.
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
+ * Clara, CA 95054 USA or visit http://www.sun.com if you need
+ * additional information or have any questions.
+ */
+
+
+/*******************************************************************************
+*   Header Files                                                               *
+*******************************************************************************/
+#define LOG_GROUP LOG_GROUP_REM
+#include "vl.h"
+#include "exec-all.h"
+
+#include <VBox/rem.h>
+#include <VBox/vmapi.h>
+#include <VBox/tm.h>
+#include <VBox/ssm.h>
+#include <VBox/em.h>
+#include <VBox/trpm.h>
+#include <VBox/iom.h>
+#include <VBox/mm.h>
+#include <VBox/pgm.h>
+#include <VBox/pdm.h>
+#include <VBox/dbgf.h>
+#include <VBox/dbg.h>
+#include <VBox/hwaccm.h>
+#include <VBox/patm.h>
+#include <VBox/csam.h>
+#include "REMInternal.h"
+#include <VBox/vm.h>
+#include <VBox/param.h>
+#include <VBox/err.h>
+
+#include <VBox/log.h>
+#include <iprt/semaphore.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/thread.h>
+#include <iprt/string.h>
+
+/* Don't wanna include everything. */
+extern void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3);
+extern void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
+extern void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
+extern void tlb_flush_page(CPUX86State *env, target_ulong addr);
+extern void tlb_flush(CPUState *env, int flush_global);
+extern void sync_seg(CPUX86State *env1, int seg_reg, int selector);
+extern void sync_ldtr(CPUX86State *env1, int selector);
+extern int  sync_tr(CPUX86State *env1, int selector);
+
+#ifdef VBOX_STRICT
+unsigned long get_phys_page_offset(target_ulong addr);
+#endif
+
+
+/*******************************************************************************
+*   Defined Constants And Macros                                               *
+*******************************************************************************/
+
+/** Copy 80-bit fpu register at pSrc to pDst.
+ * This is probably faster than *calling* memcpy.
+ */
+#define REM_COPY_FPU_REG(pDst, pSrc) \
+    do { *(PX86FPUMMX)(pDst) = *(const X86FPUMMX *)(pSrc); } while (0)
+
+
+/*******************************************************************************
+*   Internal Functions                                                         *
+*******************************************************************************/
+static DECLCALLBACK(int) remR3Save(PVM pVM, PSSMHANDLE pSSM);
+static DECLCALLBACK(int) remR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t u32Version);
+static void     remR3StateUpdate(PVM pVM);
+
+static uint32_t remR3MMIOReadU8(void *pvVM, target_phys_addr_t GCPhys);
+static uint32_t remR3MMIOReadU16(void *pvVM, target_phys_addr_t GCPhys);
+static uint32_t remR3MMIOReadU32(void *pvVM, target_phys_addr_t GCPhys);
+static void     remR3MMIOWriteU8(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+static void     remR3MMIOWriteU16(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+static void     remR3MMIOWriteU32(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+
+static uint32_t remR3HandlerReadU8(void *pvVM, target_phys_addr_t GCPhys);
+static uint32_t remR3HandlerReadU16(void *pvVM, target_phys_addr_t GCPhys);
+static uint32_t remR3HandlerReadU32(void *pvVM, target_phys_addr_t GCPhys);
+static void     remR3HandlerWriteU8(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+static void     remR3HandlerWriteU16(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+static void     remR3HandlerWriteU32(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+
+
+/*******************************************************************************
+*   Global Variables                                                           *
+*******************************************************************************/
+
+/** @todo Move stats to REM::s some rainy day we have nothing do to. */
+#ifdef VBOX_WITH_STATISTICS
+static STAMPROFILEADV gStatExecuteSingleInstr;
+static STAMPROFILEADV gStatCompilationQEmu;
+static STAMPROFILEADV gStatRunCodeQEmu;
+static STAMPROFILEADV gStatTotalTimeQEmu;
+static STAMPROFILEADV gStatTimers;
+static STAMPROFILEADV gStatTBLookup;
+static STAMPROFILEADV gStatIRQ;
+static STAMPROFILEADV gStatRawCheck;
+static STAMPROFILEADV gStatMemRead;
+static STAMPROFILEADV gStatMemWrite;
+static STAMPROFILE    gStatGCPhys2HCVirt;
+static STAMPROFILE    gStatHCVirt2GCPhys;
+static STAMCOUNTER    gStatCpuGetTSC;
+static STAMCOUNTER    gStatRefuseTFInhibit;
+static STAMCOUNTER    gStatRefuseVM86;
+static STAMCOUNTER    gStatRefusePaging;
+static STAMCOUNTER    gStatRefusePAE;
+static STAMCOUNTER    gStatRefuseIOPLNot0;
+static STAMCOUNTER    gStatRefuseIF0;
+static STAMCOUNTER    gStatRefuseCode16;
+static STAMCOUNTER    gStatRefuseWP0;
+static STAMCOUNTER    gStatRefuseRing1or2;
+static STAMCOUNTER    gStatRefuseCanExecute;
+static STAMCOUNTER    gStatREMGDTChange;
+static STAMCOUNTER    gStatREMIDTChange;
+static STAMCOUNTER    gStatREMLDTRChange;
+static STAMCOUNTER    gStatREMTRChange;
+static STAMCOUNTER    gStatSelOutOfSync[6];
+static STAMCOUNTER    gStatSelOutOfSyncStateBack[6];
+static STAMCOUNTER    gStatFlushTBs;
+#endif
+
+/*
+ * Global stuff.
+ */
+
+/** MMIO read callbacks. */
+CPUReadMemoryFunc  *g_apfnMMIORead[3] =
+{
+    remR3MMIOReadU8,
+    remR3MMIOReadU16,
+    remR3MMIOReadU32
+};
+
+/** MMIO write callbacks. */
+CPUWriteMemoryFunc *g_apfnMMIOWrite[3] =
+{
+    remR3MMIOWriteU8,
+    remR3MMIOWriteU16,
+    remR3MMIOWriteU32
+};
+
+/** Handler read callbacks. */
+CPUReadMemoryFunc  *g_apfnHandlerRead[3] =
+{
+    remR3HandlerReadU8,
+    remR3HandlerReadU16,
+    remR3HandlerReadU32
+};
+
+/** Handler write callbacks. */
+CPUWriteMemoryFunc *g_apfnHandlerWrite[3] =
+{
+    remR3HandlerWriteU8,
+    remR3HandlerWriteU16,
+    remR3HandlerWriteU32
+};
+
+
+#if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64))
+/*
+ * Debugger commands.
+ */
+static DECLCALLBACK(int) remR3CmdDisasEnableStepping(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR paArgs, unsigned cArgs, PDBGCVAR pResult);
+
+/** '.remstep' arguments. */
+static const DBGCVARDESC    g_aArgRemStep[] =
+{
+    /* cTimesMin,   cTimesMax,  enmCategory,            fFlags,                         pszName,        pszDescription */
+    {  0,           ~0,         DBGCVAR_CAT_NUMBER,     0,                              "on/off",       "Boolean value/mnemonic indicating the new state." },
+};
+
+/** Command descriptors. */
+static const DBGCCMD    g_aCmds[] =
+{
+    {
+        .pszCmd ="remstep",
+        .cArgsMin = 0,
+        .cArgsMax = 1,
+        .paArgDescs = &g_aArgRemStep[0],
+        .cArgDescs = ELEMENTS(g_aArgRemStep),
+        .pResultDesc = NULL,
+        .fFlags = 0,
+        .pfnHandler = remR3CmdDisasEnableStepping,
+        .pszSyntax = "[on/off]",
+        .pszDescription = "Enable or disable the single stepping with logged disassembly. "
+                          "If no arguments show the current state."
+    }
+};
+#endif
+
+
+/* Instantiate the structure signatures. */
+#define REM_STRUCT_OP 0
+#include "Sun/structs.h"
+
+
+
+/*******************************************************************************
+*   Internal Functions                                                         *
+*******************************************************************************/
+static void remAbort(int rc, const char *pszTip);
+extern int testmath(void);
+
+/* Put them here to avoid unused variable warning. */
+AssertCompile(RT_SIZEOFMEMB(VM, rem.padding) >= RT_SIZEOFMEMB(VM, rem.s));
+#if !defined(IPRT_NO_CRT) && (defined(RT_OS_LINUX) || defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS))
+//AssertCompileMemberSize(REM, Env, REM_ENV_SIZE);
+/* Why did this have to be identical?? */
+AssertCompile(RT_SIZEOFMEMB(REM, Env) <= REM_ENV_SIZE);
+#else
+AssertCompile(RT_SIZEOFMEMB(REM, Env) <= REM_ENV_SIZE);
+#endif
+
+
+/**
+ * Initializes the REM.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The VM to operate on.
+ */
+REMR3DECL(int) REMR3Init(PVM pVM)
+{
+    uint32_t u32Dummy;
+    unsigned i;
+
+    /*
+     * Assert sanity.
+     */
+    AssertReleaseMsg(sizeof(pVM->rem.padding) >= sizeof(pVM->rem.s), ("%#x >= %#x; sizeof(Env)=%#x\n", sizeof(pVM->rem.padding), sizeof(pVM->rem.s), sizeof(pVM->rem.s.Env)));
+    AssertReleaseMsg(sizeof(pVM->rem.s.Env) <= REM_ENV_SIZE, ("%#x == %#x\n", sizeof(pVM->rem.s.Env), REM_ENV_SIZE));
+    AssertReleaseMsg(!(RT_OFFSETOF(VM, rem) & 31), ("off=%#x\n", RT_OFFSETOF(VM, rem)));
+#if defined(DEBUG) && !defined(RT_OS_SOLARIS) /// @todo fix the solaris math stuff.
+    Assert(!testmath());
+#endif
+    ASSERT_STRUCT_TABLE(Misc);
+    ASSERT_STRUCT_TABLE(TLB);
+    ASSERT_STRUCT_TABLE(SegmentCache);
+    ASSERT_STRUCT_TABLE(XMMReg);
+    ASSERT_STRUCT_TABLE(MMXReg);
+    ASSERT_STRUCT_TABLE(float_status);
+    ASSERT_STRUCT_TABLE(float32u);
+    ASSERT_STRUCT_TABLE(float64u);
+    ASSERT_STRUCT_TABLE(floatx80u);
+    ASSERT_STRUCT_TABLE(CPUState);
+
+    /*
+     * Init some internal data members.
+     */
+    pVM->rem.s.offVM = RT_OFFSETOF(VM, rem.s);
+    pVM->rem.s.Env.pVM = pVM;
+#ifdef CPU_RAW_MODE_INIT
+    pVM->rem.s.state |= CPU_RAW_MODE_INIT;
+#endif
+
+    /* ctx. */
+    int rc = CPUMQueryGuestCtxPtr(pVM, &pVM->rem.s.pCtx);
+    if (VBOX_FAILURE(rc))
+    {
+        AssertMsgFailed(("Failed to obtain guest ctx pointer. rc=%Vrc\n", rc));
+        return rc;
+    }
+    AssertMsg(MMR3PhysGetRamSize(pVM) == 0, ("Init order have changed! REM depends on notification about ALL physical memory registrations\n"));
+
+    /* ignore all notifications */
+    pVM->rem.s.fIgnoreAll = true;
+
+    /*
+     * Init the recompiler.
+     */
+    if (!cpu_x86_init(&pVM->rem.s.Env))
+    {
+        AssertMsgFailed(("cpu_x86_init failed - impossible!\n"));
+        return VERR_GENERAL_FAILURE;
+    }
+    CPUMGetGuestCpuId(pVM,          1, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features);
+    CPUMGetGuestCpuId(pVM, 0x80000001, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext3_features, &pVM->rem.s.Env.cpuid_ext2_features);
+
+    /* allocate code buffer for single instruction emulation. */
+    pVM->rem.s.Env.cbCodeBuffer = 4096;
+    pVM->rem.s.Env.pvCodeBuffer = RTMemExecAlloc(pVM->rem.s.Env.cbCodeBuffer);
+    AssertMsgReturn(pVM->rem.s.Env.pvCodeBuffer, ("Failed to allocate code buffer!\n"), VERR_NO_MEMORY);
+
+    /* finally, set the cpu_single_env global. */
+    cpu_single_env = &pVM->rem.s.Env;
+
+    /* Nothing is pending by default */
+    pVM->rem.s.u32PendingInterrupt = REM_NO_PENDING_IRQ;
+
+    /*
+     * Register ram types.
+     */
+    pVM->rem.s.iMMIOMemType    = cpu_register_io_memory(-1, g_apfnMMIORead, g_apfnMMIOWrite, pVM);
+    AssertReleaseMsg(pVM->rem.s.iMMIOMemType >= 0, ("pVM->rem.s.iMMIOMemType=%d\n", pVM->rem.s.iMMIOMemType));
+    pVM->rem.s.iHandlerMemType = cpu_register_io_memory(-1, g_apfnHandlerRead, g_apfnHandlerWrite, pVM);
+    AssertReleaseMsg(pVM->rem.s.iHandlerMemType >= 0, ("pVM->rem.s.iHandlerMemType=%d\n", pVM->rem.s.iHandlerMemType));
+    Log2(("REM: iMMIOMemType=%d iHandlerMemType=%d\n", pVM->rem.s.iMMIOMemType, pVM->rem.s.iHandlerMemType));
+
+    /* stop ignoring. */
+    pVM->rem.s.fIgnoreAll = false;
+
+    /*
+     * Register the saved state data unit.
+     */
+    rc = SSMR3RegisterInternal(pVM, "rem", 1, REM_SAVED_STATE_VERSION, sizeof(uint32_t) * 10,
+                               NULL, remR3Save, NULL,
+                               NULL, remR3Load, NULL);
+    if (VBOX_FAILURE(rc))
+        return rc;
+
+#if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64))
+    /*
+     * Debugger commands.
+     */
+    static bool fRegisteredCmds = false;
+    if (!fRegisteredCmds)
+    {
+        int rc = DBGCRegisterCommands(&g_aCmds[0], ELEMENTS(g_aCmds));
+        if (VBOX_SUCCESS(rc))
+            fRegisteredCmds = true;
+    }
+#endif
+
+#ifdef VBOX_WITH_STATISTICS
+    /*
+     * Statistics.
+     */
+    STAM_REG(pVM, &gStatExecuteSingleInstr, STAMTYPE_PROFILE, "/PROF/REM/SingleInstr",STAMUNIT_TICKS_PER_CALL, "Profiling single instruction emulation.");
+    STAM_REG(pVM, &gStatCompilationQEmu,    STAMTYPE_PROFILE, "/PROF/REM/Compile",    STAMUNIT_TICKS_PER_CALL, "Profiling QEmu compilation.");
+    STAM_REG(pVM, &gStatRunCodeQEmu,        STAMTYPE_PROFILE, "/PROF/REM/Runcode",    STAMUNIT_TICKS_PER_CALL, "Profiling QEmu code execution.");
+    STAM_REG(pVM, &gStatTotalTimeQEmu,      STAMTYPE_PROFILE, "/PROF/REM/Emulate",    STAMUNIT_TICKS_PER_CALL, "Profiling code emulation.");
+    STAM_REG(pVM, &gStatTimers,             STAMTYPE_PROFILE, "/PROF/REM/Timers",     STAMUNIT_TICKS_PER_CALL, "Profiling timer scheduling.");
+    STAM_REG(pVM, &gStatTBLookup,           STAMTYPE_PROFILE, "/PROF/REM/TBLookup",   STAMUNIT_TICKS_PER_CALL, "Profiling timer scheduling.");
+    STAM_REG(pVM, &gStatIRQ,                STAMTYPE_PROFILE, "/PROF/REM/IRQ",        STAMUNIT_TICKS_PER_CALL, "Profiling timer scheduling.");
+    STAM_REG(pVM, &gStatRawCheck,           STAMTYPE_PROFILE, "/PROF/REM/RawCheck",   STAMUNIT_TICKS_PER_CALL, "Profiling timer scheduling.");
+    STAM_REG(pVM, &gStatMemRead,            STAMTYPE_PROFILE, "/PROF/REM/MemRead",    STAMUNIT_TICKS_PER_CALL, "Profiling memory access.");
+    STAM_REG(pVM, &gStatMemWrite,           STAMTYPE_PROFILE, "/PROF/REM/MemWrite",   STAMUNIT_TICKS_PER_CALL, "Profiling memory access.");
+    STAM_REG(pVM, &gStatHCVirt2GCPhys,      STAMTYPE_PROFILE, "/PROF/REM/HCVirt2GCPhys", STAMUNIT_TICKS_PER_CALL, "Profiling memory convertion.");
+    STAM_REG(pVM, &gStatGCPhys2HCVirt,      STAMTYPE_PROFILE, "/PROF/REM/GCPhys2HCVirt", STAMUNIT_TICKS_PER_CALL, "Profiling memory convertion.");
+
+    STAM_REG(pVM, &gStatCpuGetTSC,          STAMTYPE_COUNTER, "/REM/CpuGetTSC",         STAMUNIT_OCCURENCES,     "cpu_get_tsc calls");
+
+    STAM_REG(pVM, &gStatRefuseTFInhibit,    STAMTYPE_COUNTER, "/REM/Refuse/TFInibit", STAMUNIT_OCCURENCES,     "Raw mode refused because of TF or irq inhibit");
+    STAM_REG(pVM, &gStatRefuseVM86,         STAMTYPE_COUNTER, "/REM/Refuse/VM86",     STAMUNIT_OCCURENCES,     "Raw mode refused because of VM86");
+    STAM_REG(pVM, &gStatRefusePaging,       STAMTYPE_COUNTER, "/REM/Refuse/Paging",   STAMUNIT_OCCURENCES,     "Raw mode refused because of disabled paging/pm");
+    STAM_REG(pVM, &gStatRefusePAE,          STAMTYPE_COUNTER, "/REM/Refuse/PAE",      STAMUNIT_OCCURENCES,     "Raw mode refused because of PAE");
+    STAM_REG(pVM, &gStatRefuseIOPLNot0,     STAMTYPE_COUNTER, "/REM/Refuse/IOPLNot0", STAMUNIT_OCCURENCES,     "Raw mode refused because of IOPL != 0");
+    STAM_REG(pVM, &gStatRefuseIF0,          STAMTYPE_COUNTER, "/REM/Refuse/IF0",      STAMUNIT_OCCURENCES,     "Raw mode refused because of IF=0");
+    STAM_REG(pVM, &gStatRefuseCode16,       STAMTYPE_COUNTER, "/REM/Refuse/Code16",   STAMUNIT_OCCURENCES,     "Raw mode refused because of 16 bit code");
+    STAM_REG(pVM, &gStatRefuseWP0,          STAMTYPE_COUNTER, "/REM/Refuse/WP0",      STAMUNIT_OCCURENCES,     "Raw mode refused because of WP=0");
+    STAM_REG(pVM, &gStatRefuseRing1or2,     STAMTYPE_COUNTER, "/REM/Refuse/Ring1or2", STAMUNIT_OCCURENCES,     "Raw mode refused because of ring 1/2 execution");
+    STAM_REG(pVM, &gStatRefuseCanExecute,   STAMTYPE_COUNTER, "/REM/Refuse/CanExecuteRaw", STAMUNIT_OCCURENCES,     "Raw mode refused because of cCanExecuteRaw");
+    STAM_REG(pVM, &gStatFlushTBs,           STAMTYPE_COUNTER, "/REM/FlushTB",         STAMUNIT_OCCURENCES,     "Number of TB flushes");
+
+    STAM_REG(pVM, &gStatREMGDTChange,       STAMTYPE_COUNTER, "/REM/Change/GDTBase",   STAMUNIT_OCCURENCES,     "GDT base changes");
+    STAM_REG(pVM, &gStatREMLDTRChange,      STAMTYPE_COUNTER, "/REM/Change/LDTR",      STAMUNIT_OCCURENCES,     "LDTR changes");
+    STAM_REG(pVM, &gStatREMIDTChange,       STAMTYPE_COUNTER, "/REM/Change/IDTBase",   STAMUNIT_OCCURENCES,     "IDT base changes");
+    STAM_REG(pVM, &gStatREMTRChange,        STAMTYPE_COUNTER, "/REM/Change/TR",        STAMUNIT_OCCURENCES,     "TR selector changes");
+
+    STAM_REG(pVM, &gStatSelOutOfSync[0],    STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/ES",        STAMUNIT_OCCURENCES,     "ES out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSync[1],    STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/CS",        STAMUNIT_OCCURENCES,     "CS out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSync[2],    STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/SS",        STAMUNIT_OCCURENCES,     "SS out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSync[3],    STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/DS",        STAMUNIT_OCCURENCES,     "DS out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSync[4],    STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/FS",        STAMUNIT_OCCURENCES,     "FS out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSync[5],    STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/GS",        STAMUNIT_OCCURENCES,     "GS out of sync");
+
+    STAM_REG(pVM, &gStatSelOutOfSyncStateBack[0],    STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/ES",        STAMUNIT_OCCURENCES,     "ES out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSyncStateBack[1],    STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/CS",        STAMUNIT_OCCURENCES,     "CS out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSyncStateBack[2],    STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/SS",        STAMUNIT_OCCURENCES,     "SS out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSyncStateBack[3],    STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/DS",        STAMUNIT_OCCURENCES,     "DS out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSyncStateBack[4],    STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/FS",        STAMUNIT_OCCURENCES,     "FS out of sync");
+    STAM_REG(pVM, &gStatSelOutOfSyncStateBack[5],    STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/GS",        STAMUNIT_OCCURENCES,     "GS out of sync");
+
+
+#endif
+
+#ifdef DEBUG_ALL_LOGGING
+    loglevel = ~0;
+#endif
+
+    return rc;
+}
+
+
+/**
+ * Terminates the REM.
+ *
+ * Termination means cleaning up and freeing all resources,
+ * the VM it self is at this point powered off or suspended.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The VM to operate on.
+ */
+REMR3DECL(int) REMR3Term(PVM pVM)
+{
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * The VM is being reset.
+ *
+ * For the REM component this means to call the cpu_reset() and
+ * reinitialize some state variables.
+ *
+ * @param   pVM     VM handle.
+ */
+REMR3DECL(void) REMR3Reset(PVM pVM)
+{
+    /*
+     * Reset the REM cpu.
+     */
+    pVM->rem.s.fIgnoreAll = true;
+    cpu_reset(&pVM->rem.s.Env);
+    pVM->rem.s.cInvalidatedPages = 0;
+    pVM->rem.s.fIgnoreAll = false;
+
+    /* Clear raw ring 0 init state */
+    pVM->rem.s.Env.state &= ~CPU_RAW_RING0;
+}
+
+
+/**
+ * Execute state save operation.
+ *
+ * @returns VBox status code.
+ * @param   pVM             VM Handle.
+ * @param   pSSM            SSM operation handle.
+ */
+static DECLCALLBACK(int) remR3Save(PVM pVM, PSSMHANDLE pSSM)
+{
+    LogFlow(("remR3Save:\n"));
+
+    /*
+     * Save the required CPU Env bits.
+     * (Not much because we're never in REM when doing the save.)
+     */
+    PREM pRem = &pVM->rem.s;
+    Assert(!pRem->fInREM);
+    SSMR3PutU32(pSSM,   pRem->Env.hflags);
+    SSMR3PutU32(pSSM,   ~0);            /* separator */
+
+    /* Remember if we've entered raw mode (vital for ring 1 checks in e.g. iret emulation). */
+    SSMR3PutU32(pSSM, !!(pRem->Env.state & CPU_RAW_RING0));
+    SSMR3PutUInt(pSSM, pVM->rem.s.u32PendingInterrupt);
+
+    return SSMR3PutU32(pSSM, ~0);       /* terminator */
+}
+
+
+/**
+ * Execute state load operation.
+ *
+ * @returns VBox status code.
+ * @param   pVM             VM Handle.
+ * @param   pSSM            SSM operation handle.
+ * @param   u32Version      Data layout version.
+ */
+static DECLCALLBACK(int) remR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t u32Version)
+{
+    uint32_t u32Dummy;
+    uint32_t fRawRing0 = false;
+    LogFlow(("remR3Load:\n"));
+
+    /*
+     * Validate version.
+     */
+    if (    u32Version != REM_SAVED_STATE_VERSION
+        &&  u32Version != REM_SAVED_STATE_VERSION_VER1_6)
+    {
+        AssertMsgFailed(("remR3Load: Invalid version u32Version=%d!\n", u32Version));
+        return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
+    }
+
+    /*
+     * Do a reset to be on the safe side...
+     */
+    REMR3Reset(pVM);
+
+    /*
+     * Ignore all ignorable notifications.
+     * (Not doing this will cause serious trouble.)
+     */
+    pVM->rem.s.fIgnoreAll = true;
+
+    /*
+     * Load the required CPU Env bits.
+     * (Not much because we're never in REM when doing the save.)
+     */
+    PREM pRem = &pVM->rem.s;
+    Assert(!pRem->fInREM);
+    SSMR3GetU32(pSSM,   &pRem->Env.hflags);
+    if (u32Version == REM_SAVED_STATE_VERSION_VER1_6)
+    {
+        /* Redundant REM CPU state has to be loaded, but can be ignored. */
+        CPUX86State_Ver16 temp;
+        SSMR3GetMem(pSSM,   &temp, RT_OFFSETOF(CPUX86State_Ver16, jmp_env));
+    }
+
+    uint32_t u32Sep;
+    int rc = SSMR3GetU32(pSSM, &u32Sep);            /* separator */
+    if (VBOX_FAILURE(rc))
+        return rc;
+    if (u32Sep != ~0U)
+    {
+        AssertMsgFailed(("u32Sep=%#x\n", u32Sep));
+        return VERR_SSM_DATA_UNIT_FORMAT_CHANGED;
+    }
+
+    /* Remember if we've entered raw mode (vital for ring 1 checks in e.g. iret emulation). */
+    SSMR3GetUInt(pSSM, &fRawRing0);
+    if (fRawRing0)
+        pRem->Env.state |= CPU_RAW_RING0;
+
+    if (u32Version == REM_SAVED_STATE_VERSION_VER1_6)
+    {
+        /*
+         * Load the REM stuff.
+         */
+        rc = SSMR3GetUInt(pSSM, &pRem->cInvalidatedPages);
+        if (VBOX_FAILURE(rc))
+            return rc;
+        if (pRem->cInvalidatedPages > ELEMENTS(pRem->aGCPtrInvalidatedPages))
+        {
+            AssertMsgFailed(("cInvalidatedPages=%#x\n", pRem->cInvalidatedPages));
+            return VERR_SSM_DATA_UNIT_FORMAT_CHANGED;
+        }
+        unsigned i;
+        for (i = 0; i < pRem->cInvalidatedPages; i++)
+            SSMR3GetGCPtr(pSSM, &pRem->aGCPtrInvalidatedPages[i]);
+    }
+
+    rc = SSMR3GetUInt(pSSM, &pVM->rem.s.u32PendingInterrupt);
+    if (VBOX_FAILURE(rc))
+        return rc;
+
+    /* check the terminator. */
+    rc = SSMR3GetU32(pSSM, &u32Sep);
+    if (VBOX_FAILURE(rc))
+        return rc;
+    if (u32Sep != ~0U)
+    {
+        AssertMsgFailed(("u32Sep=%#x (term)\n", u32Sep));
+        return VERR_SSM_DATA_UNIT_FORMAT_CHANGED;
+    }
+
+    /*
+     * Get the CPUID features.
+     */
+    CPUMGetGuestCpuId(pVM,          1, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features);
+    CPUMGetGuestCpuId(pVM, 0x80000001, &u32Dummy, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext2_features);
+
+    /*
+     * Sync the Load Flush the TLB
+     */
+    tlb_flush(&pRem->Env, 1);
+
+    /*
+     * Stop ignoring ignornable notifications.
+     */
+    pVM->rem.s.fIgnoreAll = false;
+
+    /*
+     * Sync the whole CPU state when executing code in the recompiler.
+     */
+    CPUMSetChangedFlags(pVM, CPUM_CHANGED_ALL);
+    return VINF_SUCCESS;
+}
+
+
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_RUN
+
+/**
+ * Single steps an instruction in recompiled mode.
+ *
+ * Before calling this function the REM state needs to be in sync with
+ * the VM. Call REMR3State() to perform the sync. It's only necessary
+ * (and permitted) to sync at the first call to REMR3Step()/REMR3Run()
+ * and after calling REMR3StateBack().
+ *
+ * @returns VBox status code.
+ *
+ * @param   pVM         VM Handle.
+ */
+REMR3DECL(int) REMR3Step(PVM pVM)
+{
+    /*
+     * Lock the REM - we don't wanna have anyone interrupting us
+     * while stepping - and enabled single stepping. We also ignore
+     * pending interrupts and suchlike.
+     */
+    int interrupt_request = pVM->rem.s.Env.interrupt_request;
+    Assert(!(interrupt_request & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXIT | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER  | CPU_INTERRUPT_EXTERNAL_HARD | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_TIMER)));
+    pVM->rem.s.Env.interrupt_request = 0;
+    cpu_single_step(&pVM->rem.s.Env, 1);
+
+    /*
+     * If we're standing at a breakpoint, that have to be disabled before we start stepping.
+     */
+    RTGCPTR     GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base;
+    bool        fBp = !cpu_breakpoint_remove(&pVM->rem.s.Env, GCPtrPC);
+
+    /*
+     * Execute and handle the return code.
+     * We execute without enabling the cpu tick, so on success we'll
+     * just flip it on and off to make sure it moves
+     */
+    int rc = cpu_exec(&pVM->rem.s.Env);
+    if (rc == EXCP_DEBUG)
+    {
+        TMCpuTickResume(pVM);
+        TMCpuTickPause(pVM);
+        TMVirtualResume(pVM);
+        TMVirtualPause(pVM);
+        rc = VINF_EM_DBG_STEPPED;
+    }
+    else
+    {
+        AssertMsgFailed(("Damn, this shouldn't happen! cpu_exec returned %d while singlestepping\n", rc));
+        switch (rc)
+        {
+            case EXCP_INTERRUPT:    rc = VINF_SUCCESS; break;
+            case EXCP_HLT:
+            case EXCP_HALTED:       rc = VINF_EM_HALT; break;
+            case EXCP_RC:
+                rc = pVM->rem.s.rc;
+                pVM->rem.s.rc = VERR_INTERNAL_ERROR;
+                break;
+            default:
+                AssertReleaseMsgFailed(("This really shouldn't happen, rc=%d!\n", rc));
+                rc = VERR_INTERNAL_ERROR;
+                break;
+        }
+    }
+
+    /*
+     * Restore the stuff we changed to prevent interruption.
+     * Unlock the REM.
+     */
+    if (fBp)
+    {
+        int rc2 = cpu_breakpoint_insert(&pVM->rem.s.Env, GCPtrPC);
+        Assert(rc2 == 0); NOREF(rc2);
+    }
+    cpu_single_step(&pVM->rem.s.Env, 0);
+    pVM->rem.s.Env.interrupt_request = interrupt_request;
+
+    return rc;
+}
+
+
+/**
+ * Set a breakpoint using the REM facilities.
+ *
+ * @returns VBox status code.
+ * @param   pVM             The VM handle.
+ * @param   Address         The breakpoint address.
+ * @thread  The emulation thread.
+ */
+REMR3DECL(int) REMR3BreakpointSet(PVM pVM, RTGCUINTPTR Address)
+{
+    VM_ASSERT_EMT(pVM);
+    if (!cpu_breakpoint_insert(&pVM->rem.s.Env, Address))
+    {
+        LogFlow(("REMR3BreakpointSet: Address=%VGv\n", Address));
+        return VINF_SUCCESS;
+    }
+    LogFlow(("REMR3BreakpointSet: Address=%VGv - failed!\n", Address));
+    return VERR_REM_NO_MORE_BP_SLOTS;
+}
+
+
+/**
+ * Clears a breakpoint set by REMR3BreakpointSet().
+ *
+ * @returns VBox status code.
+ * @param   pVM             The VM handle.
+ * @param   Address         The breakpoint address.
+ * @thread  The emulation thread.
+ */
+REMR3DECL(int) REMR3BreakpointClear(PVM pVM, RTGCUINTPTR Address)
+{
+    VM_ASSERT_EMT(pVM);
+    if (!cpu_breakpoint_remove(&pVM->rem.s.Env, Address))
+    {
+        LogFlow(("REMR3BreakpointClear: Address=%VGv\n", Address));
+        return VINF_SUCCESS;
+    }
+    LogFlow(("REMR3BreakpointClear: Address=%VGv - not found!\n", Address));
+    return VERR_REM_BP_NOT_FOUND;
+}
+
+
+/**
+ * Emulate an instruction.
+ *
+ * This function executes one instruction without letting anyone
+ * interrupt it. This is intended for being called while being in
+ * raw mode and thus will take care of all the state syncing between
+ * REM and the rest.
+ *
+ * @returns VBox status code.
+ * @param   pVM         VM handle.
+ */
+REMR3DECL(int) REMR3EmulateInstruction(PVM pVM)
+{
+    Log2(("REMR3EmulateInstruction: (cs:eip=%04x:%08x)\n", CPUMGetGuestCS(pVM), CPUMGetGuestEIP(pVM)));
+
+    /* Make sure this flag is set; we might never execute remR3CanExecuteRaw in the AMD-V case.
+     * CPU_RAW_HWACC makes sure we never execute interrupt handlers in the recompiler.
+     */
+    if (HWACCMIsEnabled(pVM))
+        pVM->rem.s.Env.state |= CPU_RAW_HWACC;
+
+    /*
+     * Sync the state and enable single instruction / single stepping.
+     */
+    int rc = REMR3State(pVM, false /* no need to flush the TBs; we always compile. */);
+    if (VBOX_SUCCESS(rc))
+    {
+        int interrupt_request = pVM->rem.s.Env.interrupt_request;
+        Assert(!(interrupt_request & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXIT | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER | CPU_INTERRUPT_EXTERNAL_HARD | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_TIMER)));
+        Assert(!pVM->rem.s.Env.singlestep_enabled);
+#if 1
+
+        /*
+         * Now we set the execute single instruction flag and enter the cpu_exec loop.
+         */
+        TMNotifyStartOfExecution(pVM);
+        pVM->rem.s.Env.interrupt_request = CPU_INTERRUPT_SINGLE_INSTR;
+        rc = cpu_exec(&pVM->rem.s.Env);
+        TMNotifyEndOfExecution(pVM);
+        switch (rc)
+        {
+            /*
+             * Executed without anything out of the way happening.
+             */
+            case EXCP_SINGLE_INSTR:
+                rc = VINF_EM_RESCHEDULE;
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_SINGLE_INSTR\n"));
+                break;
+
+            /*
+             * If we take a trap or start servicing a pending interrupt, we might end up here.
+             * (Timer thread or some other thread wishing EMT's attention.)
+             */
+            case EXCP_INTERRUPT:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_INTERRUPT\n"));
+                rc = VINF_EM_RESCHEDULE;
+                break;
+
+            /*
+             * Single step, we assume!
+             * If there was a breakpoint there we're fucked now.
+             */
+            case EXCP_DEBUG:
+            {
+                /* breakpoint or single step? */
+                RTGCPTR     GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base;
+                int         iBP;
+                rc = VINF_EM_DBG_STEPPED;
+                for (iBP = 0; iBP < pVM->rem.s.Env.nb_breakpoints; iBP++)
+                    if (pVM->rem.s.Env.breakpoints[iBP] == GCPtrPC)
+                    {
+                        rc = VINF_EM_DBG_BREAKPOINT;
+                        break;
+                    }
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_DEBUG rc=%Vrc iBP=%d GCPtrPC=%VGv\n", rc, iBP, GCPtrPC));
+                break;
+            }
+
+            /*
+             * hlt instruction.
+             */
+            case EXCP_HLT:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_HLT\n"));
+                rc = VINF_EM_HALT;
+                break;
+
+            /*
+             * The VM has halted.
+             */
+            case EXCP_HALTED:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_HALTED\n"));
+                rc = VINF_EM_HALT;
+                break;
+
+            /*
+             * Switch to RAW-mode.
+             */
+            case EXCP_EXECUTE_RAW:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_EXECUTE_RAW\n"));
+                rc = VINF_EM_RESCHEDULE_RAW;
+                break;
+
+            /*
+             * Switch to hardware accelerated RAW-mode.
+             */
+            case EXCP_EXECUTE_HWACC:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_EXECUTE_HWACC\n"));
+                rc = VINF_EM_RESCHEDULE_HWACC;
+                break;
+
+            /*
+             * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error).
+             */
+            case EXCP_RC:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_RC\n"));
+                rc = pVM->rem.s.rc;
+                pVM->rem.s.rc = VERR_INTERNAL_ERROR;
+                break;
+
+            /*
+             * Figure out the rest when they arrive....
+             */
+            default:
+                AssertMsgFailed(("rc=%d\n", rc));
+                Log2(("REMR3EmulateInstruction: cpu_exec -> %d\n", rc));
+                rc = VINF_EM_RESCHEDULE;
+                break;
+        }
+
+        /*
+         * Switch back the state.
+         */
+#else
+        pVM->rem.s.Env.interrupt_request = 0;
+        cpu_single_step(&pVM->rem.s.Env, 1);
+
+        /*
+         * Execute and handle the return code.
+         * We execute without enabling the cpu tick, so on success we'll
+         * just flip it on and off to make sure it moves.
+         *
+         * (We do not use emulate_single_instr() because that doesn't enter the
+         * right way in will cause serious trouble if a longjmp was attempted.)
+         */
+# ifdef DEBUG_bird
+        remR3DisasInstr(&pVM->rem.s.Env, 1, "REMR3EmulateInstruction");
+# endif
+        TMNotifyStartOfExecution(pVM);
+        int cTimesMax = 16384;
+        uint32_t eip = pVM->rem.s.Env.eip;
+        do
+        {
+            rc = cpu_exec(&pVM->rem.s.Env);
+
+        } while (   eip == pVM->rem.s.Env.eip
+                 && (rc == EXCP_DEBUG || rc == EXCP_EXECUTE_RAW)
+                 && --cTimesMax > 0);
+        TMNotifyEndOfExecution(pVM);
+        switch (rc)
+        {
+            /*
+             * Single step, we assume!
+             * If there was a breakpoint there we're fucked now.
+             */
+            case EXCP_DEBUG:
+            {
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_DEBUG\n"));
+                rc = VINF_EM_RESCHEDULE;
+                break;
+            }
+
+            /*
+             * We cannot be interrupted!
+             */
+            case EXCP_INTERRUPT:
+                AssertMsgFailed(("Shouldn't happen! Everything was locked!\n"));
+                rc = VERR_INTERNAL_ERROR;
+                break;
+
+            /*
+             * hlt instruction.
+             */
+            case EXCP_HLT:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_HLT\n"));
+                rc = VINF_EM_HALT;
+                break;
+
+            /*
+             * The VM has halted.
+             */
+            case EXCP_HALTED:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_HALTED\n"));
+                rc = VINF_EM_HALT;
+                break;
+
+            /*
+             * Switch to RAW-mode.
+             */
+            case EXCP_EXECUTE_RAW:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_EXECUTE_RAW\n"));
+                rc = VINF_EM_RESCHEDULE_RAW;
+                break;
+
+            /*
+             * Switch to hardware accelerated RAW-mode.
+             */
+            case EXCP_EXECUTE_HWACC:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_EXECUTE_HWACC\n"));
+                rc = VINF_EM_RESCHEDULE_HWACC;
+                break;
+
+            /*
+             * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error).
+             */
+            case EXCP_RC:
+                Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_RC rc=%Vrc\n", pVM->rem.s.rc));
+                rc = pVM->rem.s.rc;
+                pVM->rem.s.rc = VERR_INTERNAL_ERROR;
+                break;
+
+            /*
+             * Figure out the rest when they arrive....
+             */
+            default:
+                AssertMsgFailed(("rc=%d\n", rc));
+                Log2(("REMR3EmulateInstruction: cpu_exec -> %d\n", rc));
+                rc = VINF_SUCCESS;
+                break;
+        }
+
+        /*
+         * Switch back the state.
+         */
+        cpu_single_step(&pVM->rem.s.Env, 0);
+#endif
+        pVM->rem.s.Env.interrupt_request = interrupt_request;
+        int rc2 = REMR3StateBack(pVM);
+        AssertRC(rc2);
+    }
+
+    Log2(("REMR3EmulateInstruction: returns %Vrc (cs:eip=%04x:%VGv)\n",
+          rc, pVM->rem.s.Env.segs[R_CS].selector, pVM->rem.s.Env.eip));
+    return rc;
+}
+
+
+/**
+ * Runs code in recompiled mode.
+ *
+ * Before calling this function the REM state needs to be in sync with
+ * the VM. Call REMR3State() to perform the sync. It's only necessary
+ * (and permitted) to sync at the first call to REMR3Step()/REMR3Run()
+ * and after calling REMR3StateBack().
+ *
+ * @returns VBox status code.
+ *
+ * @param   pVM         VM Handle.
+ */
+REMR3DECL(int) REMR3Run(PVM pVM)
+{
+    Log2(("REMR3Run: (cs:eip=%04x:%VGv)\n", pVM->rem.s.Env.segs[R_CS].selector, pVM->rem.s.Env.eip));
+    Assert(pVM->rem.s.fInREM);
+
+    TMNotifyStartOfExecution(pVM);
+    int rc = cpu_exec(&pVM->rem.s.Env);
+    TMNotifyEndOfExecution(pVM);
+    switch (rc)
+    {
+        /*
+         * This happens when the execution was interrupted
+         * by an external event, like pending timers.
+         */
+        case EXCP_INTERRUPT:
+            Log2(("REMR3Run: cpu_exec -> EXCP_INTERRUPT\n"));
+            rc = VINF_SUCCESS;
+            break;
+
+        /*
+         * hlt instruction.
+         */
+        case EXCP_HLT:
+            Log2(("REMR3Run: cpu_exec -> EXCP_HLT\n"));
+            rc = VINF_EM_HALT;
+            break;
+
+        /*
+         * The VM has halted.
+         */
+        case EXCP_HALTED:
+            Log2(("REMR3Run: cpu_exec -> EXCP_HALTED\n"));
+            rc = VINF_EM_HALT;
+            break;
+
+        /*
+         * Breakpoint/single step.
+         */
+        case EXCP_DEBUG:
+        {
+#if 0//def DEBUG_bird
+            static int iBP = 0;
+            printf("howdy, breakpoint! iBP=%d\n", iBP);
+            switch (iBP)
+            {
+                case 0:
+                    cpu_breakpoint_remove(&pVM->rem.s.Env, pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base);
+                    pVM->rem.s.Env.state |= CPU_EMULATE_SINGLE_STEP;
+                    //pVM->rem.s.Env.interrupt_request = 0;
+                    //pVM->rem.s.Env.exception_index = -1;
+                    //g_fInterruptDisabled = 1;
+                    rc = VINF_SUCCESS;
+                    asm("int3");
+                    break;
+                default:
+                    asm("int3");
+                    break;
+            }
+            iBP++;
+#else
+            /* breakpoint or single step? */
+            RTGCPTR     GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base;
+            int         iBP;
+            rc = VINF_EM_DBG_STEPPED;
+            for (iBP = 0; iBP < pVM->rem.s.Env.nb_breakpoints; iBP++)
+                if (pVM->rem.s.Env.breakpoints[iBP] == GCPtrPC)
+                {
+                    rc = VINF_EM_DBG_BREAKPOINT;
+                    break;
+                }
+            Log2(("REMR3Run: cpu_exec -> EXCP_DEBUG rc=%Vrc iBP=%d GCPtrPC=%VGv\n", rc, iBP, GCPtrPC));
+#endif
+            break;
+        }
+
+        /*
+         * Switch to RAW-mode.
+         */
+        case EXCP_EXECUTE_RAW:
+            Log2(("REMR3Run: cpu_exec -> EXCP_EXECUTE_RAW\n"));
+            rc = VINF_EM_RESCHEDULE_RAW;
+            break;
+
+        /*
+         * Switch to hardware accelerated RAW-mode.
+         */
+        case EXCP_EXECUTE_HWACC:
+            Log2(("REMR3Run: cpu_exec -> EXCP_EXECUTE_HWACC\n"));
+            rc = VINF_EM_RESCHEDULE_HWACC;
+            break;
+
+        /*
+         * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error).
+         */
+        case EXCP_RC:
+            Log2(("REMR3Run: cpu_exec -> EXCP_RC rc=%Vrc\n", pVM->rem.s.rc));
+            rc = pVM->rem.s.rc;
+            pVM->rem.s.rc = VERR_INTERNAL_ERROR;
+            break;
+
+        /*
+         * Figure out the rest when they arrive....
+         */
+        default:
+            AssertMsgFailed(("rc=%d\n", rc));
+            Log2(("REMR3Run: cpu_exec -> %d\n", rc));
+            rc = VINF_SUCCESS;
+            break;
+    }
+
+    Log2(("REMR3Run: returns %Vrc (cs:eip=%04x:%VGv)\n", rc, pVM->rem.s.Env.segs[R_CS].selector, pVM->rem.s.Env.eip));
+    return rc;
+}
+
+
+/**
+ * Check if the cpu state is suitable for Raw execution.
+ *
+ * @returns boolean
+ * @param   env         The CPU env struct.
+ * @param   eip         The EIP to check this for (might differ from env->eip).
+ * @param   fFlags      hflags OR'ed with IOPL, TF and VM from eflags.
+ * @param   piException Stores EXCP_EXECUTE_RAW/HWACC in case raw mode is supported in this context
+ *
+ * @remark  This function must be kept in perfect sync with the scheduler in EM.cpp!
+ */
+bool remR3CanExecuteRaw(CPUState *env, RTGCPTR eip, unsigned fFlags, int *piException)
+{
+    /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */
+    /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */
+    /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */
+
+    /* Update counter. */
+    env->pVM->rem.s.cCanExecuteRaw++;
+
+    if (HWACCMIsEnabled(env->pVM))
+    {
+        env->state |= CPU_RAW_HWACC;
+
+        /*
+         * Create partial context for HWACCMR3CanExecuteGuest
+         */
+        CPUMCTX Ctx;
+        Ctx.cr0            = env->cr[0];
+        Ctx.cr3            = env->cr[3];
+        Ctx.cr4            = env->cr[4];
+
+        Ctx.tr             = env->tr.selector;
+        Ctx.trHid.u64Base  = env->tr.base;
+        Ctx.trHid.u32Limit = env->tr.limit;
+        Ctx.trHid.Attr.u   = (env->tr.flags >> 8) & 0xF0FF;
+
+        Ctx.idtr.cbIdt     = env->idt.limit;
+        Ctx.idtr.pIdt      = env->idt.base;
+
+        Ctx.eflags.u32     = env->eflags;
+
+        Ctx.cs             = env->segs[R_CS].selector;
+        Ctx.csHid.u64Base  = env->segs[R_CS].base;
+        Ctx.csHid.u32Limit = env->segs[R_CS].limit;
+        Ctx.csHid.Attr.u   = (env->segs[R_CS].flags >> 8) & 0xF0FF;
+
+        Ctx.ds             = env->segs[R_DS].selector;
+        Ctx.dsHid.u64Base  = env->segs[R_DS].base;
+        Ctx.dsHid.u32Limit = env->segs[R_DS].limit;
+        Ctx.dsHid.Attr.u   = (env->segs[R_DS].flags >> 8) & 0xF0FF;
+
+        Ctx.es             = env->segs[R_ES].selector;
+        Ctx.esHid.u64Base  = env->segs[R_ES].base;
+        Ctx.esHid.u32Limit = env->segs[R_ES].limit;
+        Ctx.esHid.Attr.u   = (env->segs[R_ES].flags >> 8) & 0xF0FF;
+
+        Ctx.fs             = env->segs[R_FS].selector;
+        Ctx.fsHid.u64Base  = env->segs[R_FS].base;
+        Ctx.fsHid.u32Limit = env->segs[R_FS].limit;
+        Ctx.fsHid.Attr.u   = (env->segs[R_FS].flags >> 8) & 0xF0FF;
+
+        Ctx.gs             = env->segs[R_GS].selector;
+        Ctx.gsHid.u64Base  = env->segs[R_GS].base;
+        Ctx.gsHid.u32Limit = env->segs[R_GS].limit;
+        Ctx.gsHid.Attr.u   = (env->segs[R_GS].flags >> 8) & 0xF0FF;
+
+        Ctx.ss             = env->segs[R_SS].selector;
+        Ctx.ssHid.u64Base  = env->segs[R_SS].base;
+        Ctx.ssHid.u32Limit = env->segs[R_SS].limit;
+        Ctx.ssHid.Attr.u   = (env->segs[R_SS].flags >> 8) & 0xF0FF;
+
+        Ctx.msrEFER        = env->efer;
+
+        /* Hardware accelerated raw-mode:
+         *
+         * Typically only 32-bits protected mode, with paging enabled, code is allowed here.
+         */
+        if (HWACCMR3CanExecuteGuest(env->pVM, &Ctx) == true)
+        {
+            *piException = EXCP_EXECUTE_HWACC;
+            return true;
+        }
+        return false;
+    }
+
+    /*
+     * Here we only support 16 & 32 bits protected mode ring 3 code that has no IO privileges
+     * or 32 bits protected mode ring 0 code
+     *
+     * The tests are ordered by the likelyhood of being true during normal execution.
+     */
+    if (fFlags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK))
+    {
+        STAM_COUNTER_INC(&gStatRefuseTFInhibit);
+        Log2(("raw mode refused: fFlags=%#x\n", fFlags));
+        return false;
+    }
+
+#ifndef VBOX_RAW_V86
+    if (fFlags & VM_MASK) {
+        STAM_COUNTER_INC(&gStatRefuseVM86);
+        Log2(("raw mode refused: VM_MASK\n"));
+        return false;
+    }
+#endif
+
+    if (env->state & CPU_EMULATE_SINGLE_INSTR)
+    {
+#ifndef DEBUG_bird
+        Log2(("raw mode refused: CPU_EMULATE_SINGLE_INSTR\n"));
+#endif
+        return false;
+    }
+
+    if (env->singlestep_enabled)
+    {
+        //Log2(("raw mode refused: Single step\n"));
+        return false;
+    }
+
+    if (env->nb_breakpoints > 0)
+    {
+        //Log2(("raw mode refused: Breakpoints\n"));
+        return false;
+    }
+
+    uint32_t u32CR0 = env->cr[0];
+    if ((u32CR0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
+    {
+        STAM_COUNTER_INC(&gStatRefusePaging);
+        //Log2(("raw mode refused: %s%s%s\n", (u32CR0 & X86_CR0_PG) ? "" : " !PG", (u32CR0 & X86_CR0_PE) ? "" : " !PE", (u32CR0 & X86_CR0_AM) ? "" : " !AM"));
+        return false;
+    }
+
+    if (env->cr[4] & CR4_PAE_MASK)
+    {
+        if (!(env->cpuid_features & X86_CPUID_FEATURE_EDX_PAE))
+        {
+            STAM_COUNTER_INC(&gStatRefusePAE);
+            return false;
+        }
+    }
+
+    if (((fFlags >> HF_CPL_SHIFT) & 3) == 3)
+    {
+        if (!EMIsRawRing3Enabled(env->pVM))
+            return false;
+
+        if (!(env->eflags & IF_MASK))
+        {
+            STAM_COUNTER_INC(&gStatRefuseIF0);
+            Log2(("raw mode refused: IF (RawR3)\n"));
+            return false;
+        }
+
+        if (!(u32CR0 & CR0_WP_MASK) && EMIsRawRing0Enabled(env->pVM))
+        {
+            STAM_COUNTER_INC(&gStatRefuseWP0);
+            Log2(("raw mode refused: CR0.WP + RawR0\n"));
+            return false;
+        }
+    }
+    else
+    {
+        if (!EMIsRawRing0Enabled(env->pVM))
+            return false;
+
+        // Let's start with pure 32 bits ring 0 code first
+        if ((fFlags & (HF_SS32_MASK | HF_CS32_MASK)) != (HF_SS32_MASK | HF_CS32_MASK))
+        {
+            STAM_COUNTER_INC(&gStatRefuseCode16);
+            Log2(("raw r0 mode refused: HF_[S|C]S32_MASK fFlags=%#x\n", fFlags));
+            return false;
+        }
+
+        // Only R0
+        if (((fFlags >> HF_CPL_SHIFT) & 3) != 0)
+        {
+            STAM_COUNTER_INC(&gStatRefuseRing1or2);
+            Log2(("raw r0 mode refused: CPL %d\n", ((fFlags >> HF_CPL_SHIFT) & 3) ));
+            return false;
+        }
+
+        if (!(u32CR0 & CR0_WP_MASK))
+        {
+            STAM_COUNTER_INC(&gStatRefuseWP0);
+            Log2(("raw r0 mode refused: CR0.WP=0!\n"));
+            return false;
+        }
+
+        if (PATMIsPatchGCAddr(env->pVM, eip))
+        {
+            Log2(("raw r0 mode forced: patch code\n"));
+            *piException = EXCP_EXECUTE_RAW;
+            return true;
+        }
+
+#if !defined(VBOX_ALLOW_IF0) && !defined(VBOX_RUN_INTERRUPT_GATE_HANDLERS)
+        if (!(env->eflags & IF_MASK))
+        {
+            STAM_COUNTER_INC(&gStatRefuseIF0);
+            ////Log2(("R0: IF=0 VIF=%d %08X\n", eip, *env->pVMeflags));
+            //Log2(("RR0: Interrupts turned off; fall back to emulation\n"));
+            return false;
+        }
+#endif
+
+        env->state |= CPU_RAW_RING0;
+    }
+
+    /*
+     * Don't reschedule the first time we're called, because there might be
+     * special reasons why we're here that is not covered by the above checks.
+     */
+    if (env->pVM->rem.s.cCanExecuteRaw == 1)
+    {
+        Log2(("raw mode refused: first scheduling\n"));
+        STAM_COUNTER_INC(&gStatRefuseCanExecute);
+        return false;
+    }
+
+    Assert(PGMPhysIsA20Enabled(env->pVM));
+    *piException = EXCP_EXECUTE_RAW;
+    return true;
+}
+
+
+/**
+ * Fetches a code byte.
+ *
+ * @returns Success indicator (bool) for ease of use.
+ * @param   env         The CPU environment structure.
+ * @param   GCPtrInstr  Where to fetch code.
+ * @param   pu8Byte     Where to store the byte on success
+ */
+bool remR3GetOpcode(CPUState *env, RTGCPTR GCPtrInstr, uint8_t *pu8Byte)
+{
+    int rc = PATMR3QueryOpcode(env->pVM, GCPtrInstr, pu8Byte);
+    if (VBOX_SUCCESS(rc))
+        return true;
+    return false;
+}
+
+
+/**
+ * Flush (or invalidate if you like) page table/dir entry.
+ *
+ * (invlpg instruction; tlb_flush_page)
+ *
+ * @param   env         Pointer to cpu environment.
+ * @param   GCPtr       The virtual address which page table/dir entry should be invalidated.
+ */
+void remR3FlushPage(CPUState *env, RTGCPTR GCPtr)
+{
+    PVM pVM = env->pVM;
+
+    /*
+     * When we're replaying invlpg instructions or restoring a saved
+     * state we disable this path.
+     */
+    if (pVM->rem.s.fIgnoreInvlPg || pVM->rem.s.fIgnoreAll)
+        return;
+    Log(("remR3FlushPage: GCPtr=%VGv\n", GCPtr));
+    Assert(pVM->rem.s.fInREM || pVM->rem.s.fInStateSync);
+
+    //RAWEx_ProfileStop(env, STATS_QEMU_TOTAL);
+
+    /*
+     * Update the control registers before calling PGMFlushPage.
+     */
+    PCPUMCTX pCtx = (PCPUMCTX)pVM->rem.s.pCtx;
+    pCtx->cr0 = env->cr[0];
+    pCtx->cr3 = env->cr[3];
+    pCtx->cr4 = env->cr[4];
+
+    /*
+     * Let PGM do the rest.
+     */
+    int rc = PGMInvalidatePage(pVM, GCPtr);
+    if (VBOX_FAILURE(rc))
+    {
+        AssertMsgFailed(("remR3FlushPage %VGv failed with %d!!\n", GCPtr, rc));
+        VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
+    }
+    //RAWEx_ProfileStart(env, STATS_QEMU_TOTAL);
+}
+
+
+/**
+ * Called from tlb_protect_code in order to write monitor a code page.
+ *
+ * @param   env             Pointer to the CPU environment.
+ * @param   GCPtr           Code page to monitor
+ */
+void remR3ProtectCode(CPUState *env, RTGCPTR GCPtr)
+{
+#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC
+    Assert(env->pVM->rem.s.fInREM);
+    if (     (env->cr[0] & X86_CR0_PG)                      /* paging must be enabled */
+        &&  !(env->state & CPU_EMULATE_SINGLE_INSTR)        /* ignore during single instruction execution */
+        &&   (((env->hflags >> HF_CPL_SHIFT) & 3) == 0)     /* supervisor mode only */
+        &&  !(env->eflags & VM_MASK)                        /* no V86 mode */
+        &&  !HWACCMIsEnabled(env->pVM))
+        CSAMR3MonitorPage(env->pVM, GCPtr, CSAM_TAG_REM);
+#endif
+}
+
+/**
+ * Called from tlb_unprotect_code in order to clear write monitoring for a code page.
+ *
+ * @param   env             Pointer to the CPU environment.
+ * @param   GCPtr           Code page to monitor
+ */
+void remR3UnprotectCode(CPUState *env, RTGCPTR GCPtr)
+{
+    Assert(env->pVM->rem.s.fInREM);
+#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC
+    if (     (env->cr[0] & X86_CR0_PG)                      /* paging must be enabled */
+        &&  !(env->state & CPU_EMULATE_SINGLE_INSTR)        /* ignore during single instruction execution */
+        &&   (((env->hflags >> HF_CPL_SHIFT) & 3) == 0)     /* supervisor mode only */
+        &&  !(env->eflags & VM_MASK)                        /* no V86 mode */
+        &&  !HWACCMIsEnabled(env->pVM))
+        CSAMR3UnmonitorPage(env->pVM, GCPtr, CSAM_TAG_REM);
+#endif
+}
+
+
+/**
+ * Called when the CPU is initialized, any of the CRx registers are changed or
+ * when the A20 line is modified.
+ *
+ * @param   env             Pointer to the CPU environment.
+ * @param   fGlobal         Set if the flush is global.
+ */
+void remR3FlushTLB(CPUState *env, bool fGlobal)
+{
+    PVM pVM = env->pVM;
+
+    /*
+     * When we're replaying invlpg instructions or restoring a saved
+     * state we disable this path.
+     */
+    if (pVM->rem.s.fIgnoreCR3Load || pVM->rem.s.fIgnoreAll)
+        return;
+    Assert(pVM->rem.s.fInREM);
+
+    /*
+     * The caller doesn't check cr4, so we have to do that for ourselves.
+     */
+    if (!fGlobal && !(env->cr[4] & X86_CR4_PGE))
+        fGlobal = true;
+    Log(("remR3FlushTLB: CR0=%RGr CR3=%RGr CR4=%RGr %s\n", env->cr[0], env->cr[3], env->cr[4], fGlobal ? " global" : ""));
+
+    /*
+     * Update the control registers before calling PGMR3FlushTLB.
+     */
+    PCPUMCTX pCtx = (PCPUMCTX)pVM->rem.s.pCtx;
+    pCtx->cr0 = env->cr[0];
+    pCtx->cr3 = env->cr[3];
+    pCtx->cr4 = env->cr[4];
+
+    /*
+     * Let PGM do the rest.
+     */
+    PGMFlushTLB(pVM, env->cr[3], fGlobal);
+}
+
+
+/**
+ * Called when any of the cr0, cr4 or efer registers is updated.
+ *
+ * @param   env     Pointer to the CPU environment.
+ */
+void remR3ChangeCpuMode(CPUState *env)
+{
+    int rc;
+    PVM pVM = env->pVM;
+
+    /*
+     * When we're replaying loads or restoring a saved
+     * state this path is disabled.
+     */
+    if (pVM->rem.s.fIgnoreCpuMode || pVM->rem.s.fIgnoreAll)
+        return;
+    Assert(pVM->rem.s.fInREM);
+
+    /*
+     * Update the control registers before calling PGMChangeMode()
+     * as it may need to map whatever cr3 is pointing to.
+     */
+    PCPUMCTX pCtx = (PCPUMCTX)pVM->rem.s.pCtx;
+    pCtx->cr0 = env->cr[0];
+    pCtx->cr3 = env->cr[3];
+    pCtx->cr4 = env->cr[4];
+
+#ifdef TARGET_X86_64
+    rc = PGMChangeMode(pVM, env->cr[0], env->cr[4], env->efer);
+    if (rc != VINF_SUCCESS)
+        cpu_abort(env, "PGMChangeMode(, %RX64, %RX64, %RX64) -> %Vrc\n", env->cr[0], env->cr[4], env->efer, rc);
+#else
+    rc = PGMChangeMode(pVM, env->cr[0], env->cr[4], 0);
+    if (rc != VINF_SUCCESS)
+        cpu_abort(env, "PGMChangeMode(, %RX64, %RX64, %RX64) -> %Vrc\n", env->cr[0], env->cr[4], 0LL, rc);
+#endif
+}
+
+
+/**
+ * Called from compiled code to run dma.
+ *
+ * @param   env             Pointer to the CPU environment.
+ */
+void remR3DmaRun(CPUState *env)
+{
+    remR3ProfileStop(STATS_QEMU_RUN_EMULATED_CODE);
+    PDMR3DmaRun(env->pVM);
+    remR3ProfileStart(STATS_QEMU_RUN_EMULATED_CODE);
+}
+
+
+/**
+ * Called from compiled code to schedule pending timers in VMM
+ *
+ * @param   env             Pointer to the CPU environment.
+ */
+void remR3TimersRun(CPUState *env)
+{
+    remR3ProfileStop(STATS_QEMU_RUN_EMULATED_CODE);
+    remR3ProfileStart(STATS_QEMU_RUN_TIMERS);
+    TMR3TimerQueuesDo(env->pVM);
+    remR3ProfileStop(STATS_QEMU_RUN_TIMERS);
+    remR3ProfileStart(STATS_QEMU_RUN_EMULATED_CODE);
+}
+
+
+/**
+ * Record trap occurance
+ *
+ * @returns VBox status code
+ * @param   env             Pointer to the CPU environment.
+ * @param   uTrap           Trap nr
+ * @param   uErrorCode      Error code
+ * @param   pvNextEIP       Next EIP
+ */
+int remR3NotifyTrap(CPUState *env, uint32_t uTrap, uint32_t uErrorCode, uint32_t pvNextEIP)
+{
+    PVM pVM = env->pVM;
+#ifdef VBOX_WITH_STATISTICS
+    static STAMCOUNTER s_aStatTrap[255];
+    static bool        s_aRegisters[RT_ELEMENTS(s_aStatTrap)];
+#endif
+
+#ifdef VBOX_WITH_STATISTICS
+    if (uTrap < 255)
+    {
+        if (!s_aRegisters[uTrap])
+        {
+            s_aRegisters[uTrap] = true;
+            char szStatName[64];
+            RTStrPrintf(szStatName, sizeof(szStatName), "/REM/Trap/0x%02X", uTrap);
+            STAM_REG(env->pVM, &s_aStatTrap[uTrap], STAMTYPE_COUNTER, szStatName, STAMUNIT_OCCURENCES, "Trap stats.");
+        }
+        STAM_COUNTER_INC(&s_aStatTrap[uTrap]);
+    }
+#endif
+    Log(("remR3NotifyTrap: uTrap=%x error=%x next_eip=%VGv eip=%VGv cr2=%VGv\n", uTrap, uErrorCode, pvNextEIP, env->eip, env->cr[2]));
+    if(   uTrap < 0x20
+       && (env->cr[0] & X86_CR0_PE)
+       && !(env->eflags & X86_EFL_VM))
+    {
+#ifdef DEBUG
+        remR3DisasInstr(env, 1, "remR3NotifyTrap: ");
+#endif
+        if(pVM->rem.s.uPendingException == uTrap && ++pVM->rem.s.cPendingExceptions > 512)
+        {
+            LogRel(("VERR_REM_TOO_MANY_TRAPS -> uTrap=%x error=%x next_eip=%VGv eip=%VGv cr2=%VGv\n", uTrap, uErrorCode, pvNextEIP, env->eip, env->cr[2]));
+            remR3RaiseRC(env->pVM, VERR_REM_TOO_MANY_TRAPS);
+            return VERR_REM_TOO_MANY_TRAPS;
+        }
+        if(pVM->rem.s.uPendingException != uTrap || pVM->rem.s.uPendingExcptEIP != env->eip || pVM->rem.s.uPendingExcptCR2 != env->cr[2])
+            pVM->rem.s.cPendingExceptions = 1;
+        pVM->rem.s.uPendingException = uTrap;
+        pVM->rem.s.uPendingExcptEIP  = env->eip;
+        pVM->rem.s.uPendingExcptCR2  = env->cr[2];
+    }
+    else
+    {
+        pVM->rem.s.cPendingExceptions = 0;
+        pVM->rem.s.uPendingException = uTrap;
+        pVM->rem.s.uPendingExcptEIP  = env->eip;
+        pVM->rem.s.uPendingExcptCR2  = env->cr[2];
+    }
+    return VINF_SUCCESS;
+}
+
+
+/*
+ * Clear current active trap
+ *
+ * @param   pVM         VM Handle.
+ */
+void remR3TrapClear(PVM pVM)
+{
+    pVM->rem.s.cPendingExceptions = 0;
+    pVM->rem.s.uPendingException  = 0;
+    pVM->rem.s.uPendingExcptEIP   = 0;
+    pVM->rem.s.uPendingExcptCR2   = 0;
+}
+
+
+/*
+ * Record previous call instruction addresses
+ *
+ * @param   env             Pointer to the CPU environment.
+ */
+void remR3RecordCall(CPUState *env)
+{
+    CSAMR3RecordCallAddress(env->pVM, env->eip);
+}
+
+
+/**
+ * Syncs the internal REM state with the VM.
+ *
+ * This must be called before REMR3Run() is invoked whenever when the REM
+ * state is not up to date. Calling it several times in a row is not
+ * permitted.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pVM         VM Handle.
+ * @param   fFlushTBs   Flush all translation blocks before executing code
+ *
+ * @remark  The caller has to check for important FFs before calling REMR3Run. REMR3State will
+ *          no do this since the majority of the callers don't want any unnecessary of events
+ *          pending that would immediatly interrupt execution.
+ */
+REMR3DECL(int)  REMR3State(PVM pVM, bool fFlushTBs)
+{
+    Log2(("REMR3State:\n"));
+    STAM_PROFILE_START(&pVM->rem.s.StatsState, a);
+    register const CPUMCTX *pCtx = pVM->rem.s.pCtx;
+    register unsigned       fFlags;
+    bool                    fHiddenSelRegsValid = CPUMAreHiddenSelRegsValid(pVM);
+    unsigned                i;
+
+    Assert(!pVM->rem.s.fInREM);
+    pVM->rem.s.fInStateSync = true;
+
+    if (fFlushTBs)
+    {
+        STAM_COUNTER_INC(&gStatFlushTBs);
+        tb_flush(&pVM->rem.s.Env);
+    }
+
+    /*
+     * Copy the registers which require no special handling.
+     */
+#ifdef TARGET_X86_64
+    /* Note that the high dwords of 64 bits registers are undefined in 32 bits mode and are undefined after a mode change. */
+    Assert(R_EAX == 0);
+    pVM->rem.s.Env.regs[R_EAX]  = pCtx->rax;
+    Assert(R_ECX == 1);
+    pVM->rem.s.Env.regs[R_ECX]  = pCtx->rcx;
+    Assert(R_EDX == 2);
+    pVM->rem.s.Env.regs[R_EDX]  = pCtx->rdx;
+    Assert(R_EBX == 3);
+    pVM->rem.s.Env.regs[R_EBX]  = pCtx->rbx;
+    Assert(R_ESP == 4);
+    pVM->rem.s.Env.regs[R_ESP]  = pCtx->rsp;
+    Assert(R_EBP == 5);
+    pVM->rem.s.Env.regs[R_EBP]  = pCtx->rbp;
+    Assert(R_ESI == 6);
+    pVM->rem.s.Env.regs[R_ESI]  = pCtx->rsi;
+    Assert(R_EDI == 7);
+    pVM->rem.s.Env.regs[R_EDI]  = pCtx->rdi;
+    pVM->rem.s.Env.regs[8]      = pCtx->r8;
+    pVM->rem.s.Env.regs[9]      = pCtx->r9;
+    pVM->rem.s.Env.regs[10]     = pCtx->r10;
+    pVM->rem.s.Env.regs[11]     = pCtx->r11;
+    pVM->rem.s.Env.regs[12]     = pCtx->r12;
+    pVM->rem.s.Env.regs[13]     = pCtx->r13;
+    pVM->rem.s.Env.regs[14]     = pCtx->r14;
+    pVM->rem.s.Env.regs[15]     = pCtx->r15;
+
+    pVM->rem.s.Env.eip          = pCtx->rip;
+
+    pVM->rem.s.Env.eflags       = pCtx->rflags.u64;
+#else
+    Assert(R_EAX == 0);
+    pVM->rem.s.Env.regs[R_EAX]  = pCtx->eax;
+    Assert(R_ECX == 1);
+    pVM->rem.s.Env.regs[R_ECX]  = pCtx->ecx;
+    Assert(R_EDX == 2);
+    pVM->rem.s.Env.regs[R_EDX]  = pCtx->edx;
+    Assert(R_EBX == 3);
+    pVM->rem.s.Env.regs[R_EBX]  = pCtx->ebx;
+    Assert(R_ESP == 4);
+    pVM->rem.s.Env.regs[R_ESP]  = pCtx->esp;
+    Assert(R_EBP == 5);
+    pVM->rem.s.Env.regs[R_EBP]  = pCtx->ebp;
+    Assert(R_ESI == 6);
+    pVM->rem.s.Env.regs[R_ESI]  = pCtx->esi;
+    Assert(R_EDI == 7);
+    pVM->rem.s.Env.regs[R_EDI]  = pCtx->edi;
+    pVM->rem.s.Env.eip          = pCtx->eip;
+
+    pVM->rem.s.Env.eflags       = pCtx->eflags.u32;
+#endif
+
+    pVM->rem.s.Env.cr[2]        = pCtx->cr2;
+
+    /** @todo we could probably benefit from using a CPUM_CHANGED_DRx flag too! */
+    for (i=0;i<8;i++)
+        pVM->rem.s.Env.dr[i] = pCtx->dr[i];
+
+    /*
+     * Clear the halted hidden flag (the interrupt waking up the CPU can
+     * have been dispatched in raw mode).
+     */
+    pVM->rem.s.Env.hflags      &= ~HF_HALTED_MASK;
+
+    /*
+     * Replay invlpg?
+     */
+    if (pVM->rem.s.cInvalidatedPages)
+    {
+        pVM->rem.s.fIgnoreInvlPg = true;
+        RTUINT i;
+        for (i = 0; i < pVM->rem.s.cInvalidatedPages; i++)
+        {
+            Log2(("REMR3State: invlpg %VGv\n", pVM->rem.s.aGCPtrInvalidatedPages[i]));
+            tlb_flush_page(&pVM->rem.s.Env, pVM->rem.s.aGCPtrInvalidatedPages[i]);
+        }
+        pVM->rem.s.fIgnoreInvlPg = false;
+        pVM->rem.s.cInvalidatedPages = 0;
+    }
+
+    /* Replay notification changes? */
+    if (pVM->rem.s.cHandlerNotifications)
+        REMR3ReplayHandlerNotifications(pVM);
+
+    /* Update MSRs; before CRx registers! */
+    pVM->rem.s.Env.efer         = pCtx->msrEFER;
+    pVM->rem.s.Env.star         = pCtx->msrSTAR;
+    pVM->rem.s.Env.pat          = pCtx->msrPAT;
+#ifdef TARGET_X86_64
+    pVM->rem.s.Env.lstar        = pCtx->msrLSTAR;
+    pVM->rem.s.Env.cstar        = pCtx->msrCSTAR;
+    pVM->rem.s.Env.fmask        = pCtx->msrSFMASK;
+    pVM->rem.s.Env.kernelgsbase = pCtx->msrKERNELGSBASE;
+
+    /* Update the internal long mode activate flag according to the new EFER value. */
+    if (pCtx->msrEFER & MSR_K6_EFER_LMA)
+        pVM->rem.s.Env.hflags |= HF_LMA_MASK;
+    else
+        pVM->rem.s.Env.hflags &= ~(HF_LMA_MASK | HF_CS64_MASK);
+#endif
+
+
+    /*
+     * Registers which are rarely changed and require special handling / order when changed.
+     */
+    fFlags = CPUMGetAndClearChangedFlagsREM(pVM);
+    LogFlow(("CPUMGetAndClearChangedFlagsREM %x\n", fFlags));
+    if (fFlags & (  CPUM_CHANGED_CR4  | CPUM_CHANGED_CR3  | CPUM_CHANGED_CR0
+                  | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_LDTR | CPUM_CHANGED_TR
+                  | CPUM_CHANGED_FPU_REM | CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_CPUID))
+    {
+        if (fFlags & CPUM_CHANGED_FPU_REM)
+            save_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)&pCtx->fpu); /* 'save' is an excellent name. */
+
+        if (fFlags & CPUM_CHANGED_GLOBAL_TLB_FLUSH)
+        {
+            pVM->rem.s.fIgnoreCR3Load = true;
+            tlb_flush(&pVM->rem.s.Env, true);
+            pVM->rem.s.fIgnoreCR3Load = false;
+        }
+
+        /* CR4 before CR0! */
+        if (fFlags & CPUM_CHANGED_CR4)
+        {
+            pVM->rem.s.fIgnoreCR3Load = true;
+            pVM->rem.s.fIgnoreCpuMode = true;
+            cpu_x86_update_cr4(&pVM->rem.s.Env, pCtx->cr4);
+            pVM->rem.s.fIgnoreCpuMode = false;
+            pVM->rem.s.fIgnoreCR3Load = false;
+        }
+
+        if (fFlags & CPUM_CHANGED_CR0)
+        {
+            pVM->rem.s.fIgnoreCR3Load = true;
+            pVM->rem.s.fIgnoreCpuMode = true;
+            cpu_x86_update_cr0(&pVM->rem.s.Env, pCtx->cr0);
+            pVM->rem.s.fIgnoreCpuMode = false;
+            pVM->rem.s.fIgnoreCR3Load = false;
+        }
+
+        if (fFlags & CPUM_CHANGED_CR3)
+        {
+            pVM->rem.s.fIgnoreCR3Load = true;
+            cpu_x86_update_cr3(&pVM->rem.s.Env, pCtx->cr3);
+            pVM->rem.s.fIgnoreCR3Load = false;
+        }
+
+        if (fFlags & CPUM_CHANGED_GDTR)
+        {
+            pVM->rem.s.Env.gdt.base     = pCtx->gdtr.pGdt;
+            pVM->rem.s.Env.gdt.limit    = pCtx->gdtr.cbGdt;
+        }
+
+        if (fFlags & CPUM_CHANGED_IDTR)
+        {
+            pVM->rem.s.Env.idt.base     = pCtx->idtr.pIdt;
+            pVM->rem.s.Env.idt.limit    = pCtx->idtr.cbIdt;
+        }
+
+        if (fFlags & CPUM_CHANGED_SYSENTER_MSR)
+        {
+            pVM->rem.s.Env.sysenter_cs  = pCtx->SysEnter.cs;
+            pVM->rem.s.Env.sysenter_eip = pCtx->SysEnter.eip;
+            pVM->rem.s.Env.sysenter_esp = pCtx->SysEnter.esp;
+        }
+
+        if (fFlags & CPUM_CHANGED_LDTR)
+        {
+            if (fHiddenSelRegsValid)
+            {
+                pVM->rem.s.Env.ldt.selector = pCtx->ldtr;
+                pVM->rem.s.Env.ldt.base     = pCtx->ldtrHid.u64Base;
+                pVM->rem.s.Env.ldt.limit    = pCtx->ldtrHid.u32Limit;
+                pVM->rem.s.Env.ldt.flags    = (pCtx->ldtrHid.Attr.u << 8) & 0xFFFFFF;;
+            }
+            else
+                sync_ldtr(&pVM->rem.s.Env, pCtx->ldtr);
+        }
+
+        if (fFlags & CPUM_CHANGED_TR)
+        {
+            if (fHiddenSelRegsValid)
+            {
+                pVM->rem.s.Env.tr.selector = pCtx->tr;
+                pVM->rem.s.Env.tr.base     = pCtx->trHid.u64Base;
+                pVM->rem.s.Env.tr.limit    = pCtx->trHid.u32Limit;
+                pVM->rem.s.Env.tr.flags    = (pCtx->trHid.Attr.u << 8) & 0xFFFFFF;;
+            }
+            else
+                sync_tr(&pVM->rem.s.Env, pCtx->tr);
+
+            /** @note do_interrupt will fault if the busy flag is still set.... */
+            pVM->rem.s.Env.tr.flags &= ~DESC_TSS_BUSY_MASK;
+        }
+
+        if (fFlags & CPUM_CHANGED_CPUID)
+        {
+            uint32_t u32Dummy;
+
+            /*
+            * Get the CPUID features.
+            */
+            CPUMGetGuestCpuId(pVM,          1, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features);
+            CPUMGetGuestCpuId(pVM, 0x80000001, &u32Dummy, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext2_features);
+        }
+    }
+
+    /*
+     * Update selector registers.
+     * This must be done *after* we've synced gdt, ldt and crX registers
+     * since we're reading the GDT/LDT om sync_seg. This will happen with
+     * saved state which takes a quick dip into rawmode for instance.
+     */
+    /*
+     * Stack; Note first check this one as the CPL might have changed. The
+     * wrong CPL can cause QEmu to raise an exception in sync_seg!!
+     */
+
+    if (fHiddenSelRegsValid)
+    {
+        /* The hidden selector registers are valid in the CPU context. */
+        /** @note QEmu saves the 2nd dword of the descriptor; we should convert the attribute word back! */
+
+        /* Set current CPL */
+        cpu_x86_set_cpl(&pVM->rem.s.Env, CPUMGetGuestCPL(pVM, CPUMCTX2CORE(pCtx)));
+
+        cpu_x86_load_seg_cache(&pVM->rem.s.Env, R_CS, pCtx->cs, pCtx->csHid.u64Base, pCtx->csHid.u32Limit, (pCtx->csHid.Attr.u << 8) & 0xFFFFFF);
+        cpu_x86_load_seg_cache(&pVM->rem.s.Env, R_SS, pCtx->ss, pCtx->ssHid.u64Base, pCtx->ssHid.u32Limit, (pCtx->ssHid.Attr.u << 8) & 0xFFFFFF);
+        cpu_x86_load_seg_cache(&pVM->rem.s.Env, R_DS, pCtx->ds, pCtx->dsHid.u64Base, pCtx->dsHid.u32Limit, (pCtx->dsHid.Attr.u << 8) & 0xFFFFFF);
+        cpu_x86_load_seg_cache(&pVM->rem.s.Env, R_ES, pCtx->es, pCtx->esHid.u64Base, pCtx->esHid.u32Limit, (pCtx->esHid.Attr.u << 8) & 0xFFFFFF);
+        cpu_x86_load_seg_cache(&pVM->rem.s.Env, R_FS, pCtx->fs, pCtx->fsHid.u64Base, pCtx->fsHid.u32Limit, (pCtx->fsHid.Attr.u << 8) & 0xFFFFFF);
+        cpu_x86_load_seg_cache(&pVM->rem.s.Env, R_GS, pCtx->gs, pCtx->gsHid.u64Base, pCtx->gsHid.u32Limit, (pCtx->gsHid.Attr.u << 8) & 0xFFFFFF);
+    }
+    else
+    {
+        /* In 'normal' raw mode we don't have access to the hidden selector registers. */
+        if (pVM->rem.s.Env.segs[R_SS].selector != (uint16_t)pCtx->ss)
+        {
+            Log2(("REMR3State: SS changed from %04x to %04x!\n", pVM->rem.s.Env.segs[R_SS].selector, pCtx->ss));
+
+            cpu_x86_set_cpl(&pVM->rem.s.Env, CPUMGetGuestCPL(pVM, CPUMCTX2CORE(pCtx)));
+            sync_seg(&pVM->rem.s.Env, R_SS, pCtx->ss);
+#ifdef VBOX_WITH_STATISTICS
+            if (pVM->rem.s.Env.segs[R_SS].newselector)
+            {
+                STAM_COUNTER_INC(&gStatSelOutOfSync[R_SS]);
+            }
+#endif
+        }
+        else
+            pVM->rem.s.Env.segs[R_SS].newselector = 0;
+
+        if (pVM->rem.s.Env.segs[R_ES].selector != pCtx->es)
+        {
+            Log2(("REMR3State: ES changed from %04x to %04x!\n", pVM->rem.s.Env.segs[R_ES].selector, pCtx->es));
+            sync_seg(&pVM->rem.s.Env, R_ES, pCtx->es);
+#ifdef VBOX_WITH_STATISTICS
+            if (pVM->rem.s.Env.segs[R_ES].newselector)
+            {
+                STAM_COUNTER_INC(&gStatSelOutOfSync[R_ES]);
+            }
+#endif
+        }
+        else
+            pVM->rem.s.Env.segs[R_ES].newselector = 0;
+
+        if (pVM->rem.s.Env.segs[R_CS].selector != pCtx->cs)
+        {
+            Log2(("REMR3State: CS changed from %04x to %04x!\n", pVM->rem.s.Env.segs[R_CS].selector, pCtx->cs));
+            sync_seg(&pVM->rem.s.Env, R_CS, pCtx->cs);
+#ifdef VBOX_WITH_STATISTICS
+            if (pVM->rem.s.Env.segs[R_CS].newselector)
+            {
+                STAM_COUNTER_INC(&gStatSelOutOfSync[R_CS]);
+            }
+#endif
+        }
+        else
+            pVM->rem.s.Env.segs[R_CS].newselector = 0;
+
+        if (pVM->rem.s.Env.segs[R_DS].selector != pCtx->ds)
+        {
+            Log2(("REMR3State: DS changed from %04x to %04x!\n", pVM->rem.s.Env.segs[R_DS].selector, pCtx->ds));
+            sync_seg(&pVM->rem.s.Env, R_DS, pCtx->ds);
+#ifdef VBOX_WITH_STATISTICS
+            if (pVM->rem.s.Env.segs[R_DS].newselector)
+            {
+                STAM_COUNTER_INC(&gStatSelOutOfSync[R_DS]);
+            }
+#endif
+        }
+        else
+            pVM->rem.s.Env.segs[R_DS].newselector = 0;
+
+    /** @todo need to find a way to communicate potential GDT/LDT changes and thread switches. The selector might
+     * be the same but not the base/limit. */
+        if (pVM->rem.s.Env.segs[R_FS].selector != pCtx->fs)
+        {
+            Log2(("REMR3State: FS changed from %04x to %04x!\n", pVM->rem.s.Env.segs[R_FS].selector, pCtx->fs));
+            sync_seg(&pVM->rem.s.Env, R_FS, pCtx->fs);
+#ifdef VBOX_WITH_STATISTICS
+            if (pVM->rem.s.Env.segs[R_FS].newselector)
+            {
+                STAM_COUNTER_INC(&gStatSelOutOfSync[R_FS]);
+            }
+#endif
+        }
+        else
+            pVM->rem.s.Env.segs[R_FS].newselector = 0;
+
+        if (pVM->rem.s.Env.segs[R_GS].selector != pCtx->gs)
+        {
+            Log2(("REMR3State: GS changed from %04x to %04x!\n", pVM->rem.s.Env.segs[R_GS].selector, pCtx->gs));
+            sync_seg(&pVM->rem.s.Env, R_GS, pCtx->gs);
+#ifdef VBOX_WITH_STATISTICS
+            if (pVM->rem.s.Env.segs[R_GS].newselector)
+            {
+                STAM_COUNTER_INC(&gStatSelOutOfSync[R_GS]);
+            }
+#endif
+        }
+        else
+            pVM->rem.s.Env.segs[R_GS].newselector = 0;
+    }
+
+    /*
+     * Check for traps.
+     */
+    pVM->rem.s.Env.exception_index = -1; /** @todo this won't work :/ */
+    TRPMEVENT   enmType;
+    uint8_t     u8TrapNo;
+    int rc = TRPMQueryTrap(pVM, &u8TrapNo, &enmType);
+    if (VBOX_SUCCESS(rc))
+    {
+#ifdef DEBUG
+        if (u8TrapNo == 0x80)
+        {
+            remR3DumpLnxSyscall(pVM);
+            remR3DumpOBsdSyscall(pVM);
+        }
+#endif
+
+        pVM->rem.s.Env.exception_index = u8TrapNo;
+        if (enmType != TRPM_SOFTWARE_INT)
+        {
+            pVM->rem.s.Env.exception_is_int     = 0;
+            pVM->rem.s.Env.exception_next_eip   = pVM->rem.s.Env.eip;
+        }
+        else
+        {
+            /*
+             * The there are two 1 byte opcodes and one 2 byte opcode for software interrupts.
+             * We ASSUME that there are no prefixes and sets the default to 2 byte, and checks
+             * for int03 and into.
+             */
+            pVM->rem.s.Env.exception_is_int     = 1;
+            pVM->rem.s.Env.exception_next_eip   = pCtx->rip + 2;
+            /* int 3 may be generated by one-byte 0xcc */
+            if (u8TrapNo == 3)
+            {
+                if (read_byte(&pVM->rem.s.Env, pVM->rem.s.Env.segs[R_CS].base + pCtx->rip) == 0xcc)
+                    pVM->rem.s.Env.exception_next_eip = pCtx->rip + 1;
+            }
+            /* int 4 may be generated by one-byte 0xce */
+            else if (u8TrapNo == 4)
+            {
+                if (read_byte(&pVM->rem.s.Env, pVM->rem.s.Env.segs[R_CS].base + pCtx->rip) == 0xce)
+                    pVM->rem.s.Env.exception_next_eip = pCtx->rip + 1;
+            }
+        }
+
+        /* get error code and cr2 if needed. */
+        switch (u8TrapNo)
+        {
+            case 0x0e:
+                pVM->rem.s.Env.cr[2] = TRPMGetFaultAddress(pVM);
+                /* fallthru */
+            case 0x0a: case 0x0b: case 0x0c: case 0x0d:
+                pVM->rem.s.Env.error_code = TRPMGetErrorCode(pVM);
+                break;
+
+            case 0x11: case 0x08:
+            default:
+                pVM->rem.s.Env.error_code = 0;
+                break;
+        }
+
+        /*
+         * We can now reset the active trap since the recompiler is gonna have a go at it.
+         */
+        rc = TRPMResetTrap(pVM);
+        AssertRC(rc);
+        Log2(("REMR3State: trap=%02x errcd=%VGv cr2=%VGv nexteip=%VGv%s\n", pVM->rem.s.Env.exception_index, pVM->rem.s.Env.error_code,
+              pVM->rem.s.Env.cr[2], pVM->rem.s.Env.exception_next_eip, pVM->rem.s.Env.exception_is_int ? " software" : ""));
+    }
+
+    /*
+     * Clear old interrupt request flags; Check for pending hardware interrupts.
+     * (See @remark for why we don't check for other FFs.)
+     */
+    pVM->rem.s.Env.interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXIT | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER);
+    if (    pVM->rem.s.u32PendingInterrupt != REM_NO_PENDING_IRQ
+        ||  VM_FF_ISPENDING(pVM, VM_FF_INTERRUPT_APIC | VM_FF_INTERRUPT_PIC))
+        pVM->rem.s.Env.interrupt_request |= CPU_INTERRUPT_HARD;
+
+    /*
+     * We're now in REM mode.
+     */
+    pVM->rem.s.fInREM = true;
+    pVM->rem.s.fInStateSync = false;
+    pVM->rem.s.cCanExecuteRaw = 0;
+    STAM_PROFILE_STOP(&pVM->rem.s.StatsState, a);
+    Log2(("REMR3State: returns VINF_SUCCESS\n"));
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Syncs back changes in the REM state to the the VM state.
+ *
+ * This must be called after invoking REMR3Run().
+ * Calling it several times in a row is not permitted.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pVM         VM Handle.
+ */
+REMR3DECL(int) REMR3StateBack(PVM pVM)
+{
+    Log2(("REMR3StateBack:\n"));
+    Assert(pVM->rem.s.fInREM);
+    STAM_PROFILE_START(&pVM->rem.s.StatsStateBack, a);
+    register PCPUMCTX pCtx = pVM->rem.s.pCtx;
+    unsigned          i;
+
+    /*
+     * Copy back the registers.
+     * This is done in the order they are declared in the CPUMCTX structure.
+     */
+
+    /** @todo FOP */
+    /** @todo FPUIP */
+    /** @todo CS */
+    /** @todo FPUDP */
+    /** @todo DS */
+    /** @todo Fix MXCSR support in QEMU so we don't overwrite MXCSR with 0 when we shouldn't! */
+    pCtx->fpu.MXCSR         = 0;
+    pCtx->fpu.MXCSR_MASK    = 0;
+
+    /** @todo check if FPU/XMM was actually used in the recompiler */
+    restore_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)&pCtx->fpu);
+////    dprintf2(("FPU state CW=%04X TT=%04X SW=%04X (%04X)\n", env->fpuc, env->fpstt, env->fpus, pVMCtx->fpu.FSW));
+
+#ifdef TARGET_X86_64
+    /* Note that the high dwords of 64 bits registers are undefined in 32 bits mode and are undefined after a mode change. */
+    pCtx->rdi           = pVM->rem.s.Env.regs[R_EDI];
+    pCtx->rsi           = pVM->rem.s.Env.regs[R_ESI];
+    pCtx->rbp           = pVM->rem.s.Env.regs[R_EBP];
+    pCtx->rax           = pVM->rem.s.Env.regs[R_EAX];
+    pCtx->rbx           = pVM->rem.s.Env.regs[R_EBX];
+    pCtx->rdx           = pVM->rem.s.Env.regs[R_EDX];
+    pCtx->rcx           = pVM->rem.s.Env.regs[R_ECX];
+    pCtx->r8            = pVM->rem.s.Env.regs[8];
+    pCtx->r9            = pVM->rem.s.Env.regs[9];
+    pCtx->r10           = pVM->rem.s.Env.regs[10];
+    pCtx->r11           = pVM->rem.s.Env.regs[11];
+    pCtx->r12           = pVM->rem.s.Env.regs[12];
+    pCtx->r13           = pVM->rem.s.Env.regs[13];
+    pCtx->r14           = pVM->rem.s.Env.regs[14];
+    pCtx->r15           = pVM->rem.s.Env.regs[15];
+
+    pCtx->rsp           = pVM->rem.s.Env.regs[R_ESP];
+
+#else
+    pCtx->edi           = pVM->rem.s.Env.regs[R_EDI];
+    pCtx->esi           = pVM->rem.s.Env.regs[R_ESI];
+    pCtx->ebp           = pVM->rem.s.Env.regs[R_EBP];
+    pCtx->eax           = pVM->rem.s.Env.regs[R_EAX];
+    pCtx->ebx           = pVM->rem.s.Env.regs[R_EBX];
+    pCtx->edx           = pVM->rem.s.Env.regs[R_EDX];
+    pCtx->ecx           = pVM->rem.s.Env.regs[R_ECX];
+
+    pCtx->esp           = pVM->rem.s.Env.regs[R_ESP];
+#endif
+
+    pCtx->ss            = pVM->rem.s.Env.segs[R_SS].selector;
+
+#ifdef VBOX_WITH_STATISTICS
+    if (pVM->rem.s.Env.segs[R_SS].newselector)
+    {
+        STAM_COUNTER_INC(&gStatSelOutOfSyncStateBack[R_SS]);
+    }
+    if (pVM->rem.s.Env.segs[R_GS].newselector)
+    {
+        STAM_COUNTER_INC(&gStatSelOutOfSyncStateBack[R_GS]);
+    }
+    if (pVM->rem.s.Env.segs[R_FS].newselector)
+    {
+        STAM_COUNTER_INC(&gStatSelOutOfSyncStateBack[R_FS]);
+    }
+    if (pVM->rem.s.Env.segs[R_ES].newselector)
+    {
+        STAM_COUNTER_INC(&gStatSelOutOfSyncStateBack[R_ES]);
+    }
+    if (pVM->rem.s.Env.segs[R_DS].newselector)
+    {
+        STAM_COUNTER_INC(&gStatSelOutOfSyncStateBack[R_DS]);
+    }
+    if (pVM->rem.s.Env.segs[R_CS].newselector)
+    {
+        STAM_COUNTER_INC(&gStatSelOutOfSyncStateBack[R_CS]);
+    }
+#endif
+    pCtx->gs            = pVM->rem.s.Env.segs[R_GS].selector;
+    pCtx->fs            = pVM->rem.s.Env.segs[R_FS].selector;
+    pCtx->es            = pVM->rem.s.Env.segs[R_ES].selector;
+    pCtx->ds            = pVM->rem.s.Env.segs[R_DS].selector;
+    pCtx->cs            = pVM->rem.s.Env.segs[R_CS].selector;
+
+#ifdef TARGET_X86_64
+    pCtx->rip           = pVM->rem.s.Env.eip;
+    pCtx->rflags.u64    = pVM->rem.s.Env.eflags;
+#else
+    pCtx->eip           = pVM->rem.s.Env.eip;
+    pCtx->eflags.u32    = pVM->rem.s.Env.eflags;
+#endif
+
+    pCtx->cr0           = pVM->rem.s.Env.cr[0];
+    pCtx->cr2           = pVM->rem.s.Env.cr[2];
+    pCtx->cr3           = pVM->rem.s.Env.cr[3];
+    pCtx->cr4           = pVM->rem.s.Env.cr[4];
+
+    for (i=0;i<8;i++)
+        pCtx->dr[i] = pVM->rem.s.Env.dr[i];
+
+    pCtx->gdtr.cbGdt    = pVM->rem.s.Env.gdt.limit;
+    if (pCtx->gdtr.pGdt != pVM->rem.s.Env.gdt.base)
+    {
+        pCtx->gdtr.pGdt = pVM->rem.s.Env.gdt.base;
+        STAM_COUNTER_INC(&gStatREMGDTChange);
+        VM_FF_SET(pVM, VM_FF_SELM_SYNC_GDT);
+    }
+
+    pCtx->idtr.cbIdt    = pVM->rem.s.Env.idt.limit;
+    if (pCtx->idtr.pIdt != pVM->rem.s.Env.idt.base)
+    {
+        pCtx->idtr.pIdt = pVM->rem.s.Env.idt.base;
+        STAM_COUNTER_INC(&gStatREMIDTChange);
+        VM_FF_SET(pVM, VM_FF_TRPM_SYNC_IDT);
+    }
+
+    if (pCtx->ldtr != pVM->rem.s.Env.ldt.selector)
+    {
+        pCtx->ldtr      = pVM->rem.s.Env.ldt.selector;
+        STAM_COUNTER_INC(&gStatREMLDTRChange);
+        VM_FF_SET(pVM, VM_FF_SELM_SYNC_LDT);
+    }
+    if (pCtx->tr != pVM->rem.s.Env.tr.selector)
+    {
+        pCtx->tr        = pVM->rem.s.Env.tr.selector;
+        STAM_COUNTER_INC(&gStatREMTRChange);
+        VM_FF_SET(pVM, VM_FF_SELM_SYNC_TSS);
+    }
+
+    /** @todo These values could still be out of sync! */
+    pCtx->csHid.u64Base    = pVM->rem.s.Env.segs[R_CS].base;
+    pCtx->csHid.u32Limit   = pVM->rem.s.Env.segs[R_CS].limit;
+    /** @note QEmu saves the 2nd dword of the descriptor; we should store the attribute word only! */
+    pCtx->csHid.Attr.u     = (pVM->rem.s.Env.segs[R_CS].flags >> 8) & 0xF0FF;
+
+    pCtx->dsHid.u64Base    = pVM->rem.s.Env.segs[R_DS].base;
+    pCtx->dsHid.u32Limit   = pVM->rem.s.Env.segs[R_DS].limit;
+    pCtx->dsHid.Attr.u     = (pVM->rem.s.Env.segs[R_DS].flags >> 8) & 0xF0FF;
+
+    pCtx->esHid.u64Base    = pVM->rem.s.Env.segs[R_ES].base;
+    pCtx->esHid.u32Limit   = pVM->rem.s.Env.segs[R_ES].limit;
+    pCtx->esHid.Attr.u     = (pVM->rem.s.Env.segs[R_ES].flags >> 8) & 0xF0FF;
+
+    pCtx->fsHid.u64Base    = pVM->rem.s.Env.segs[R_FS].base;
+    pCtx->fsHid.u32Limit   = pVM->rem.s.Env.segs[R_FS].limit;
+    pCtx->fsHid.Attr.u     = (pVM->rem.s.Env.segs[R_FS].flags >> 8) & 0xF0FF;
+
+    pCtx->gsHid.u64Base    = pVM->rem.s.Env.segs[R_GS].base;
+    pCtx->gsHid.u32Limit   = pVM->rem.s.Env.segs[R_GS].limit;
+    pCtx->gsHid.Attr.u     = (pVM->rem.s.Env.segs[R_GS].flags >> 8) & 0xF0FF;
+
+    pCtx->ssHid.u64Base    = pVM->rem.s.Env.segs[R_SS].base;
+    pCtx->ssHid.u32Limit   = pVM->rem.s.Env.segs[R_SS].limit;
+    pCtx->ssHid.Attr.u     = (pVM->rem.s.Env.segs[R_SS].flags >> 8) & 0xF0FF;
+
+    pCtx->ldtrHid.u64Base  = pVM->rem.s.Env.ldt.base;
+    pCtx->ldtrHid.u32Limit = pVM->rem.s.Env.ldt.limit;
+    pCtx->ldtrHid.Attr.u   = (pVM->rem.s.Env.ldt.flags >> 8) & 0xF0FF;
+
+    pCtx->trHid.u64Base    = pVM->rem.s.Env.tr.base;
+    pCtx->trHid.u32Limit   = pVM->rem.s.Env.tr.limit;
+    pCtx->trHid.Attr.u     = (pVM->rem.s.Env.tr.flags >> 8) & 0xF0FF;
+
+    /* Sysenter MSR */
+    pCtx->SysEnter.cs      = pVM->rem.s.Env.sysenter_cs;
+    pCtx->SysEnter.eip     = pVM->rem.s.Env.sysenter_eip;
+    pCtx->SysEnter.esp     = pVM->rem.s.Env.sysenter_esp;
+
+    /* System MSRs. */
+    pCtx->msrEFER          = pVM->rem.s.Env.efer;
+    pCtx->msrSTAR          = pVM->rem.s.Env.star;
+    pCtx->msrPAT           = pVM->rem.s.Env.pat;
+#ifdef TARGET_X86_64
+    pCtx->msrLSTAR         = pVM->rem.s.Env.lstar;
+    pCtx->msrCSTAR         = pVM->rem.s.Env.cstar;
+    pCtx->msrSFMASK        = pVM->rem.s.Env.fmask;
+    pCtx->msrKERNELGSBASE  = pVM->rem.s.Env.kernelgsbase;
+#endif
+
+    remR3TrapClear(pVM);
+
+    /*
+     * Check for traps.
+     */
+    if (    pVM->rem.s.Env.exception_index >= 0
+        &&  pVM->rem.s.Env.exception_index < 256)
+    {
+        Log(("REMR3StateBack: Pending trap %x %d\n", pVM->rem.s.Env.exception_index, pVM->rem.s.Env.exception_is_int));
+        int rc = TRPMAssertTrap(pVM, pVM->rem.s.Env.exception_index, (pVM->rem.s.Env.exception_is_int) ? TRPM_SOFTWARE_INT : TRPM_HARDWARE_INT);
+        AssertRC(rc);
+        switch (pVM->rem.s.Env.exception_index)
+        {
+            case 0x0e:
+                TRPMSetFaultAddress(pVM, pCtx->cr2);
+                /* fallthru */
+            case 0x0a: case 0x0b: case 0x0c: case 0x0d:
+            case 0x11: case 0x08: /* 0 */
+                TRPMSetErrorCode(pVM, pVM->rem.s.Env.error_code);
+                break;
+        }
+
+    }
+
+    /*
+     * We're not longer in REM mode.
+     */
+    pVM->rem.s.fInREM   = false;
+    STAM_PROFILE_STOP(&pVM->rem.s.StatsStateBack, a);
+    Log2(("REMR3StateBack: returns VINF_SUCCESS\n"));
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * This is called by the disassembler when it wants to update the cpu state
+ * before for instance doing a register dump.
+ */
+static void remR3StateUpdate(PVM pVM)
+{
+    Assert(pVM->rem.s.fInREM);
+    register PCPUMCTX pCtx = pVM->rem.s.pCtx;
+    unsigned          i;
+
+    /*
+     * Copy back the registers.
+     * This is done in the order they are declared in the CPUMCTX structure.
+     */
+
+    /** @todo FOP */
+    /** @todo FPUIP */
+    /** @todo CS */
+    /** @todo FPUDP */
+    /** @todo DS */
+    /** @todo Fix MXCSR support in QEMU so we don't overwrite MXCSR with 0 when we shouldn't! */
+    pCtx->fpu.MXCSR         = 0;
+    pCtx->fpu.MXCSR_MASK    = 0;
+
+    /** @todo check if FPU/XMM was actually used in the recompiler */
+    restore_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)&pCtx->fpu);
+////    dprintf2(("FPU state CW=%04X TT=%04X SW=%04X (%04X)\n", env->fpuc, env->fpstt, env->fpus, pVMCtx->fpu.FSW));
+
+#ifdef TARGET_X86_64
+    pCtx->rdi           = pVM->rem.s.Env.regs[R_EDI];
+    pCtx->rsi           = pVM->rem.s.Env.regs[R_ESI];
+    pCtx->rbp           = pVM->rem.s.Env.regs[R_EBP];
+    pCtx->rax           = pVM->rem.s.Env.regs[R_EAX];
+    pCtx->rbx           = pVM->rem.s.Env.regs[R_EBX];
+    pCtx->rdx           = pVM->rem.s.Env.regs[R_EDX];
+    pCtx->rcx           = pVM->rem.s.Env.regs[R_ECX];
+    pCtx->r8            = pVM->rem.s.Env.regs[8];
+    pCtx->r9            = pVM->rem.s.Env.regs[9];
+    pCtx->r10           = pVM->rem.s.Env.regs[10];
+    pCtx->r11           = pVM->rem.s.Env.regs[11];
+    pCtx->r12           = pVM->rem.s.Env.regs[12];
+    pCtx->r13           = pVM->rem.s.Env.regs[13];
+    pCtx->r14           = pVM->rem.s.Env.regs[14];
+    pCtx->r15           = pVM->rem.s.Env.regs[15];
+
+    pCtx->rsp           = pVM->rem.s.Env.regs[R_ESP];
+#else
+    pCtx->edi           = pVM->rem.s.Env.regs[R_EDI];
+    pCtx->esi           = pVM->rem.s.Env.regs[R_ESI];
+    pCtx->ebp           = pVM->rem.s.Env.regs[R_EBP];
+    pCtx->eax           = pVM->rem.s.Env.regs[R_EAX];
+    pCtx->ebx           = pVM->rem.s.Env.regs[R_EBX];
+    pCtx->edx           = pVM->rem.s.Env.regs[R_EDX];
+    pCtx->ecx           = pVM->rem.s.Env.regs[R_ECX];
+
+    pCtx->esp           = pVM->rem.s.Env.regs[R_ESP];
+#endif
+
+    pCtx->ss            = pVM->rem.s.Env.segs[R_SS].selector;
+
+    pCtx->gs            = pVM->rem.s.Env.segs[R_GS].selector;
+    pCtx->fs            = pVM->rem.s.Env.segs[R_FS].selector;
+    pCtx->es            = pVM->rem.s.Env.segs[R_ES].selector;
+    pCtx->ds            = pVM->rem.s.Env.segs[R_DS].selector;
+    pCtx->cs            = pVM->rem.s.Env.segs[R_CS].selector;
+
+#ifdef TARGET_X86_64
+    pCtx->rip           = pVM->rem.s.Env.eip;
+    pCtx->rflags.u64    = pVM->rem.s.Env.eflags;
+#else
+    pCtx->eip           = pVM->rem.s.Env.eip;
+    pCtx->eflags.u32    = pVM->rem.s.Env.eflags;
+#endif
+
+    pCtx->cr0           = pVM->rem.s.Env.cr[0];
+    pCtx->cr2           = pVM->rem.s.Env.cr[2];
+    pCtx->cr3           = pVM->rem.s.Env.cr[3];
+    pCtx->cr4           = pVM->rem.s.Env.cr[4];
+
+    for (i=0;i<8;i++)
+        pCtx->dr[i] = pVM->rem.s.Env.dr[i];
+
+    pCtx->gdtr.cbGdt    = pVM->rem.s.Env.gdt.limit;
+    if (pCtx->gdtr.pGdt != (uint32_t)pVM->rem.s.Env.gdt.base)
+    {
+        pCtx->gdtr.pGdt     = (uint32_t)pVM->rem.s.Env.gdt.base;
+        STAM_COUNTER_INC(&gStatREMGDTChange);
+        VM_FF_SET(pVM, VM_FF_SELM_SYNC_GDT);
+    }
+
+    pCtx->idtr.cbIdt    = pVM->rem.s.Env.idt.limit;
+    if (pCtx->idtr.pIdt != (uint32_t)pVM->rem.s.Env.idt.base)
+    {
+        pCtx->idtr.pIdt     = (uint32_t)pVM->rem.s.Env.idt.base;
+        STAM_COUNTER_INC(&gStatREMIDTChange);
+        VM_FF_SET(pVM, VM_FF_TRPM_SYNC_IDT);
+    }
+
+    if (pCtx->ldtr != pVM->rem.s.Env.ldt.selector)
+    {
+        pCtx->ldtr      = pVM->rem.s.Env.ldt.selector;
+        STAM_COUNTER_INC(&gStatREMLDTRChange);
+        VM_FF_SET(pVM, VM_FF_SELM_SYNC_LDT);
+    }
+    if (pCtx->tr != pVM->rem.s.Env.tr.selector)
+    {
+        pCtx->tr        = pVM->rem.s.Env.tr.selector;
+        STAM_COUNTER_INC(&gStatREMTRChange);
+        VM_FF_SET(pVM, VM_FF_SELM_SYNC_TSS);
+    }
+
+    /** @todo These values could still be out of sync! */
+    pCtx->csHid.u64Base    = pVM->rem.s.Env.segs[R_CS].base;
+    pCtx->csHid.u32Limit   = pVM->rem.s.Env.segs[R_CS].limit;
+    /** @note QEmu saves the 2nd dword of the descriptor; we should store the attribute word only! */
+    pCtx->csHid.Attr.u     = (pVM->rem.s.Env.segs[R_CS].flags >> 8) & 0xFFFF;
+
+    pCtx->dsHid.u64Base    = pVM->rem.s.Env.segs[R_DS].base;
+    pCtx->dsHid.u32Limit   = pVM->rem.s.Env.segs[R_DS].limit;
+    pCtx->dsHid.Attr.u     = (pVM->rem.s.Env.segs[R_DS].flags >> 8) & 0xFFFF;
+
+    pCtx->esHid.u64Base    = pVM->rem.s.Env.segs[R_ES].base;
+    pCtx->esHid.u32Limit   = pVM->rem.s.Env.segs[R_ES].limit;
+    pCtx->esHid.Attr.u     = (pVM->rem.s.Env.segs[R_ES].flags >> 8) & 0xFFFF;
+
+    pCtx->fsHid.u64Base    = pVM->rem.s.Env.segs[R_FS].base;
+    pCtx->fsHid.u32Limit   = pVM->rem.s.Env.segs[R_FS].limit;
+    pCtx->fsHid.Attr.u     = (pVM->rem.s.Env.segs[R_FS].flags >> 8) & 0xFFFF;
+
+    pCtx->gsHid.u64Base    = pVM->rem.s.Env.segs[R_GS].base;
+    pCtx->gsHid.u32Limit   = pVM->rem.s.Env.segs[R_GS].limit;
+    pCtx->gsHid.Attr.u     = (pVM->rem.s.Env.segs[R_GS].flags >> 8) & 0xFFFF;
+
+    pCtx->ssHid.u64Base    = pVM->rem.s.Env.segs[R_SS].base;
+    pCtx->ssHid.u32Limit   = pVM->rem.s.Env.segs[R_SS].limit;
+    pCtx->ssHid.Attr.u     = (pVM->rem.s.Env.segs[R_SS].flags >> 8) & 0xFFFF;
+
+    pCtx->ldtrHid.u64Base  = pVM->rem.s.Env.ldt.base;
+    pCtx->ldtrHid.u32Limit = pVM->rem.s.Env.ldt.limit;
+    pCtx->ldtrHid.Attr.u   = (pVM->rem.s.Env.ldt.flags >> 8) & 0xFFFF;
+
+    pCtx->trHid.u64Base    = pVM->rem.s.Env.tr.base;
+    pCtx->trHid.u32Limit   = pVM->rem.s.Env.tr.limit;
+    pCtx->trHid.Attr.u     = (pVM->rem.s.Env.tr.flags >> 8) & 0xFFFF;
+
+    /* Sysenter MSR */
+    pCtx->SysEnter.cs      = pVM->rem.s.Env.sysenter_cs;
+    pCtx->SysEnter.eip     = pVM->rem.s.Env.sysenter_eip;
+    pCtx->SysEnter.esp     = pVM->rem.s.Env.sysenter_esp;
+
+    /* System MSRs. */
+    pCtx->msrEFER          = pVM->rem.s.Env.efer;
+    pCtx->msrSTAR          = pVM->rem.s.Env.star;
+    pCtx->msrPAT           = pVM->rem.s.Env.pat;
+#ifdef TARGET_X86_64
+    pCtx->msrLSTAR         = pVM->rem.s.Env.lstar;
+    pCtx->msrCSTAR         = pVM->rem.s.Env.cstar;
+    pCtx->msrSFMASK        = pVM->rem.s.Env.fmask;
+    pCtx->msrKERNELGSBASE  = pVM->rem.s.Env.kernelgsbase;
+#endif
+
+}
+
+
+/**
+ * Update the VMM state information if we're currently in REM.
+ *
+ * This method is used by the DBGF and PDMDevice when there is any uncertainty of whether
+ * we're currently executing in REM and the VMM state is invalid. This method will of
+ * course check that we're executing in REM before syncing any data over to the VMM.
+ *
+ * @param   pVM         The VM handle.
+ */
+REMR3DECL(void) REMR3StateUpdate(PVM pVM)
+{
+    if (pVM->rem.s.fInREM)
+        remR3StateUpdate(pVM);
+}
+
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM
+
+
+/**
+ * Notify the recompiler about Address Gate 20 state change.
+ *
+ * This notification is required since A20 gate changes are
+ * initialized from a device driver and the VM might just as
+ * well be in REM mode as in RAW mode.
+ *
+ * @param   pVM         VM handle.
+ * @param   fEnable     True if the gate should be enabled.
+ *                      False if the gate should be disabled.
+ */
+REMR3DECL(void) REMR3A20Set(PVM pVM, bool fEnable)
+{
+    LogFlow(("REMR3A20Set: fEnable=%d\n", fEnable));
+    VM_ASSERT_EMT(pVM);
+
+    bool fSaved = pVM->rem.s.fIgnoreAll; /* just in case. */
+    pVM->rem.s.fIgnoreAll = fSaved || !pVM->rem.s.fInREM;
+
+    cpu_x86_set_a20(&pVM->rem.s.Env, fEnable);
+
+    pVM->rem.s.fIgnoreAll = fSaved;
+}
+
+
+/**
+ * Replays the invalidated recorded pages.
+ * Called in response to VERR_REM_FLUSHED_PAGES_OVERFLOW from the RAW execution loop.
+ *
+ * @param   pVM         VM handle.
+ */
+REMR3DECL(void) REMR3ReplayInvalidatedPages(PVM pVM)
+{
+    VM_ASSERT_EMT(pVM);
+
+    /*
+     * Sync the required registers.
+     */
+    pVM->rem.s.Env.cr[0] = pVM->rem.s.pCtx->cr0;
+    pVM->rem.s.Env.cr[2] = pVM->rem.s.pCtx->cr2;
+    pVM->rem.s.Env.cr[3] = pVM->rem.s.pCtx->cr3;
+    pVM->rem.s.Env.cr[4] = pVM->rem.s.pCtx->cr4;
+
+    /*
+     * Replay the flushes.
+     */
+    pVM->rem.s.fIgnoreInvlPg = true;
+    RTUINT i;
+    for (i = 0; i < pVM->rem.s.cInvalidatedPages; i++)
+    {
+        Log2(("REMR3ReplayInvalidatedPages: invlpg %VGv\n", pVM->rem.s.aGCPtrInvalidatedPages[i]));
+        tlb_flush_page(&pVM->rem.s.Env, pVM->rem.s.aGCPtrInvalidatedPages[i]);
+    }
+    pVM->rem.s.fIgnoreInvlPg = false;
+    pVM->rem.s.cInvalidatedPages = 0;
+}
+
+
+/**
+ * Replays the handler notification changes
+ * Called in response to VM_FF_REM_HANDLER_NOTIFY from the RAW execution loop.
+ *
+ * @param   pVM         VM handle.
+ */
+REMR3DECL(void) REMR3ReplayHandlerNotifications(PVM pVM)
+{
+    LogFlow(("REMR3ReplayInvalidatedPages:\n"));
+    VM_ASSERT_EMT(pVM);
+
+    /*
+     * Replay the flushes.
+     */
+    RTUINT i;
+    const RTUINT c = pVM->rem.s.cHandlerNotifications;
+    pVM->rem.s.cHandlerNotifications = 0;
+    for (i = 0; i < c; i++)
+    {
+        PREMHANDLERNOTIFICATION pRec = &pVM->rem.s.aHandlerNotifications[i];
+        switch (pRec->enmKind)
+        {
+            case REMHANDLERNOTIFICATIONKIND_PHYSICAL_REGISTER:
+                REMR3NotifyHandlerPhysicalRegister(pVM,
+                                                   pRec->u.PhysicalRegister.enmType,
+                                                   pRec->u.PhysicalRegister.GCPhys,
+                                                   pRec->u.PhysicalRegister.cb,
+                                                   pRec->u.PhysicalRegister.fHasHCHandler);
+                break;
+
+            case REMHANDLERNOTIFICATIONKIND_PHYSICAL_DEREGISTER:
+                REMR3NotifyHandlerPhysicalDeregister(pVM,
+                                                     pRec->u.PhysicalDeregister.enmType,
+                                                     pRec->u.PhysicalDeregister.GCPhys,
+                                                     pRec->u.PhysicalDeregister.cb,
+                                                     pRec->u.PhysicalDeregister.fHasHCHandler,
+                                                     pRec->u.PhysicalDeregister.fRestoreAsRAM);
+                break;
+
+            case REMHANDLERNOTIFICATIONKIND_PHYSICAL_MODIFY:
+                REMR3NotifyHandlerPhysicalModify(pVM,
+                                                 pRec->u.PhysicalModify.enmType,
+                                                 pRec->u.PhysicalModify.GCPhysOld,
+                                                 pRec->u.PhysicalModify.GCPhysNew,
+                                                 pRec->u.PhysicalModify.cb,
+                                                 pRec->u.PhysicalModify.fHasHCHandler,
+                                                 pRec->u.PhysicalModify.fRestoreAsRAM);
+                break;
+
+            default:
+                AssertReleaseMsgFailed(("enmKind=%d\n", pRec->enmKind));
+                break;
+        }
+    }
+    VM_FF_CLEAR(pVM, VM_FF_REM_HANDLER_NOTIFY);
+}
+
+
+/**
+ * Notify REM about changed code page.
+ *
+ * @returns VBox status code.
+ * @param   pVM         VM handle.
+ * @param   pvCodePage  Code page address
+ */
+REMR3DECL(int) REMR3NotifyCodePageChanged(PVM pVM, RTGCPTR pvCodePage)
+{
+#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC
+    int      rc;
+    RTGCPHYS PhysGC;
+    uint64_t flags;
+
+    VM_ASSERT_EMT(pVM);
+
+    /*
+     * Get the physical page address.
+     */
+    rc = PGMGstGetPage(pVM, pvCodePage, &flags, &PhysGC);
+    if (rc == VINF_SUCCESS)
+    {
+        /*
+         * Sync the required registers and flush the whole page.
+         * (Easier to do the whole page than notifying it about each physical
+         * byte that was changed.
+         */
+        pVM->rem.s.Env.cr[0] = pVM->rem.s.pCtx->cr0;
+        pVM->rem.s.Env.cr[2] = pVM->rem.s.pCtx->cr2;
+        pVM->rem.s.Env.cr[3] = pVM->rem.s.pCtx->cr3;
+        pVM->rem.s.Env.cr[4] = pVM->rem.s.pCtx->cr4;
+
+        tb_invalidate_phys_page_range(PhysGC, PhysGC + PAGE_SIZE - 1, 0);
+    }
+#endif
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Notification about a successful MMR3PhysRegister() call.
+ *
+ * @param   pVM         VM handle.
+ * @param   GCPhys      The physical address the RAM.
+ * @param   cb          Size of the memory.
+ * @param   fFlags      Flags of the MM_RAM_FLAGS_* defines.
+ */
+REMR3DECL(void) REMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, unsigned fFlags)
+{
+    Log(("REMR3NotifyPhysRamRegister: GCPhys=%VGp cb=%d fFlags=%d\n", GCPhys, cb, fFlags));
+    VM_ASSERT_EMT(pVM);
+
+    /*
+     * Validate input - we trust the caller.
+     */
+    Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+    Assert(cb);
+    Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb);
+
+    /*
+     * Base ram?
+     */
+    if (!GCPhys)
+    {
+        phys_ram_size = cb;
+        phys_ram_dirty_size = cb >> PAGE_SHIFT;
+#ifndef VBOX_STRICT
+        phys_ram_dirty = MMR3HeapAlloc(pVM, MM_TAG_REM, phys_ram_dirty_size);
+        AssertReleaseMsg(phys_ram_dirty, ("failed to allocate %d bytes of dirty bytes\n", phys_ram_dirty_size));
+#else /* VBOX_STRICT: allocate a full map and make the out of bounds pages invalid. */
+        phys_ram_dirty = RTMemPageAlloc(_4G >> PAGE_SHIFT);
+        AssertReleaseMsg(phys_ram_dirty, ("failed to allocate %d bytes of dirty bytes\n", _4G >> PAGE_SHIFT));
+        uint32_t cbBitmap = RT_ALIGN_32(phys_ram_dirty_size, PAGE_SIZE);
+        int rc = RTMemProtect(phys_ram_dirty + cbBitmap, (_4G >> PAGE_SHIFT) - cbBitmap, RTMEM_PROT_NONE);
+        AssertRC(rc);
+        phys_ram_dirty += cbBitmap - phys_ram_dirty_size;
+#endif
+        memset(phys_ram_dirty, 0xff, phys_ram_dirty_size);
+    }
+
+    /*
+     * Register the ram.
+     */
+    Assert(!pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = true;
+
+#ifdef VBOX_WITH_NEW_PHYS_CODE
+    if (fFlags & MM_RAM_FLAGS_RESERVED)
+        cpu_register_physical_memory(GCPhys, cb, IO_MEM_UNASSIGNED);
+    else
+        cpu_register_physical_memory(GCPhys, cb, GCPhys);
+#else
+    if (!GCPhys)
+        cpu_register_physical_memory(GCPhys, cb, GCPhys | IO_MEM_RAM_MISSING);
+    else
+    {
+        if (fFlags & MM_RAM_FLAGS_RESERVED)
+            cpu_register_physical_memory(GCPhys, cb, IO_MEM_UNASSIGNED);
+        else
+            cpu_register_physical_memory(GCPhys, cb, GCPhys);
+    }
+#endif
+    Assert(pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = false;
+}
+
+#ifndef VBOX_WITH_NEW_PHYS_CODE
+
+/**
+ * Notification about a successful PGMR3PhysRegisterChunk() call.
+ *
+ * @param   pVM         VM handle.
+ * @param   GCPhys      The physical address the RAM.
+ * @param   cb          Size of the memory.
+ * @param   pvRam       The HC address of the RAM.
+ * @param   fFlags      Flags of the MM_RAM_FLAGS_* defines.
+ */
+REMR3DECL(void) REMR3NotifyPhysRamChunkRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, RTHCUINTPTR pvRam, unsigned fFlags)
+{
+    Log(("REMR3NotifyPhysRamChunkRegister: GCPhys=%VGp cb=%d pvRam=%p fFlags=%d\n", GCPhys, cb, pvRam, fFlags));
+    VM_ASSERT_EMT(pVM);
+
+    /*
+     * Validate input - we trust the caller.
+     */
+    Assert(pvRam);
+    Assert(RT_ALIGN(pvRam, PAGE_SIZE) == pvRam);
+    Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+    Assert(cb == PGM_DYNAMIC_CHUNK_SIZE);
+    Assert(fFlags == 0 /* normal RAM */);
+    Assert(!pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = true;
+
+    cpu_register_physical_memory(GCPhys, cb, GCPhys);
+
+    Assert(pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = false;
+}
+
+
+/**
+ *  Grows dynamically allocated guest RAM.
+ *  Will raise a fatal error if the operation fails.
+ *
+ * @param   physaddr    The physical address.
+ */
+void remR3GrowDynRange(unsigned long physaddr)
+{
+    int rc;
+    PVM pVM = cpu_single_env->pVM;
+
+    LogFlow(("remR3GrowDynRange %VGp\n", physaddr));
+    const RTGCPHYS GCPhys = physaddr;
+    rc = PGM3PhysGrowRange(pVM, &GCPhys);
+    if (VBOX_SUCCESS(rc))
+        return;
+
+    LogRel(("\nUnable to allocate guest RAM chunk at %VGp\n", physaddr));
+    cpu_abort(cpu_single_env, "Unable to allocate guest RAM chunk at %VGp\n", physaddr);
+    AssertFatalFailed();
+}
+
+#endif /* !VBOX_WITH_NEW_PHYS_CODE */
+
+/**
+ * Notification about a successful MMR3PhysRomRegister() call.
+ *
+ * @param   pVM         VM handle.
+ * @param   GCPhys      The physical address of the ROM.
+ * @param   cb          The size of the ROM.
+ * @param   pvCopy      Pointer to the ROM copy.
+ * @param   fShadow     Whether it's currently writable shadow ROM or normal readonly ROM.
+ *                      This function will be called when ever the protection of the
+ *                      shadow ROM changes (at reset and end of POST).
+ */
+REMR3DECL(void) REMR3NotifyPhysRomRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, void *pvCopy, bool fShadow)
+{
+    Log(("REMR3NotifyPhysRomRegister: GCPhys=%VGp cb=%d pvCopy=%p fShadow=%RTbool\n", GCPhys, cb, pvCopy, fShadow));
+    VM_ASSERT_EMT(pVM);
+
+    /*
+     * Validate input - we trust the caller.
+     */
+    Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+    Assert(cb);
+    Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb);
+    Assert(pvCopy);
+    Assert(RT_ALIGN_P(pvCopy, PAGE_SIZE) == pvCopy);
+
+    /*
+     * Register the rom.
+     */
+    Assert(!pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = true;
+
+    cpu_register_physical_memory(GCPhys, cb, GCPhys | (fShadow ? 0 : IO_MEM_ROM));
+
+    Log2(("%.64Vhxd\n", (char *)pvCopy + cb - 64));
+
+    Assert(pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = false;
+}
+
+
+/**
+ * Notification about a successful memory deregistration or reservation.
+ *
+ * @param   pVM         VM Handle.
+ * @param   GCPhys      Start physical address.
+ * @param   cb          The size of the range.
+ * @todo    Rename to REMR3NotifyPhysRamDeregister (for MMIO2) as we won't
+ *          reserve any memory soon.
+ */
+REMR3DECL(void) REMR3NotifyPhysReserve(PVM pVM, RTGCPHYS GCPhys, RTUINT cb)
+{
+    Log(("REMR3NotifyPhysReserve: GCPhys=%VGp cb=%d\n", GCPhys, cb));
+    VM_ASSERT_EMT(pVM);
+
+    /*
+     * Validate input - we trust the caller.
+     */
+    Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+    Assert(cb);
+    Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb);
+
+    /*
+     * Unassigning the memory.
+     */
+    Assert(!pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = true;
+
+    cpu_register_physical_memory(GCPhys, cb, IO_MEM_UNASSIGNED);
+
+    Assert(pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = false;
+}
+
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalRegister() call.
+ *
+ * @param   pVM             VM Handle.
+ * @param   enmType         Handler type.
+ * @param   GCPhys          Handler range address.
+ * @param   cb              Size of the handler range.
+ * @param   fHasHCHandler   Set if the handler has a HC callback function.
+ *
+ * @remark  MMR3PhysRomRegister assumes that this function will not apply the
+ *          Handler memory type to memory which has no HC handler.
+ */
+REMR3DECL(void) REMR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERTYPE enmType, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler)
+{
+    Log(("REMR3NotifyHandlerPhysicalRegister: enmType=%d GCPhys=%VGp cb=%VGp fHasHCHandler=%d\n",
+          enmType, GCPhys, cb, fHasHCHandler));
+    VM_ASSERT_EMT(pVM);
+    Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+    Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb);
+
+    if (pVM->rem.s.cHandlerNotifications)
+        REMR3ReplayHandlerNotifications(pVM);
+
+    Assert(!pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = true;
+
+    if (enmType == PGMPHYSHANDLERTYPE_MMIO)
+        cpu_register_physical_memory(GCPhys, cb, pVM->rem.s.iMMIOMemType);
+    else if (fHasHCHandler)
+        cpu_register_physical_memory(GCPhys, cb, pVM->rem.s.iHandlerMemType);
+
+    Assert(pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = false;
+}
+
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalDeregister() operation.
+ *
+ * @param   pVM             VM Handle.
+ * @param   enmType         Handler type.
+ * @param   GCPhys          Handler range address.
+ * @param   cb              Size of the handler range.
+ * @param   fHasHCHandler   Set if the handler has a HC callback function.
+ * @param   fRestoreAsRAM   Whether the to restore it as normal RAM or as unassigned memory.
+ */
+REMR3DECL(void) REMR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERTYPE enmType, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+    Log(("REMR3NotifyHandlerPhysicalDeregister: enmType=%d GCPhys=%VGp cb=%VGp fHasHCHandler=%RTbool fRestoreAsRAM=%RTbool RAM=%08x\n",
+          enmType, GCPhys, cb, fHasHCHandler, fRestoreAsRAM, MMR3PhysGetRamSize(pVM)));
+    VM_ASSERT_EMT(pVM);
+
+    if (pVM->rem.s.cHandlerNotifications)
+        REMR3ReplayHandlerNotifications(pVM);
+
+    Assert(!pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = true;
+
+/** @todo this isn't right, MMIO can (in theory) be restored as RAM. */
+    if (enmType == PGMPHYSHANDLERTYPE_MMIO)
+        cpu_register_physical_memory(GCPhys, cb, IO_MEM_UNASSIGNED);
+    else if (fHasHCHandler)
+    {
+        if (!fRestoreAsRAM)
+        {
+            Assert(GCPhys > MMR3PhysGetRamSize(pVM));
+            cpu_register_physical_memory(GCPhys, cb, IO_MEM_UNASSIGNED);
+        }
+        else
+        {
+            Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+            Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb);
+            cpu_register_physical_memory(GCPhys, cb, GCPhys);
+        }
+    }
+
+    Assert(pVM->rem.s.fIgnoreAll);
+    pVM->rem.s.fIgnoreAll = false;
+}
+
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalModify() call.
+ *
+ * @param   pVM             VM Handle.
+ * @param   enmType         Handler type.
+ * @param   GCPhysOld       Old handler range address.
+ * @param   GCPhysNew       New handler range address.
+ * @param   cb              Size of the handler range.
+ * @param   fHasHCHandler   Set if the handler has a HC callback function.
+ * @param   fRestoreAsRAM   Whether the to restore it as normal RAM or as unassigned memory.
+ */
+REMR3DECL(void) REMR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERTYPE enmType, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+    Log(("REMR3NotifyHandlerPhysicalModify: enmType=%d GCPhysOld=%VGp GCPhysNew=%VGp cb=%VGp fHasHCHandler=%RTbool fRestoreAsRAM=%RTbool\n",
+          enmType, GCPhysOld, GCPhysNew, cb, fHasHCHandler, fRestoreAsRAM));
+    VM_ASSERT_EMT(pVM);
+    AssertReleaseMsg(enmType != PGMPHYSHANDLERTYPE_MMIO, ("enmType=%d\n", enmType));
+
+    if (pVM->rem.s.cHandlerNotifications)
+        REMR3ReplayHandlerNotifications(pVM);
+
+    if (fHasHCHandler)
+    {
+        Assert(!pVM->rem.s.fIgnoreAll);
+        pVM->rem.s.fIgnoreAll = true;
+
+        /*
+         * Reset the old page.
+         */
+        if (!fRestoreAsRAM)
+            cpu_register_physical_memory(GCPhysOld, cb, IO_MEM_UNASSIGNED);
+        else
+        {
+            /* This is not perfect, but it'll do for PD monitoring... */
+            Assert(cb == PAGE_SIZE);
+            Assert(RT_ALIGN_T(GCPhysOld, PAGE_SIZE, RTGCPHYS) == GCPhysOld);
+            cpu_register_physical_memory(GCPhysOld, cb, GCPhysOld);
+        }
+
+        /*
+         * Update the new page.
+         */
+        Assert(RT_ALIGN_T(GCPhysNew, PAGE_SIZE, RTGCPHYS) == GCPhysNew);
+        Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb);
+        cpu_register_physical_memory(GCPhysNew, cb, pVM->rem.s.iHandlerMemType);
+
+        Assert(pVM->rem.s.fIgnoreAll);
+        pVM->rem.s.fIgnoreAll = false;
+    }
+}
+
+
+/**
+ * Checks if we're handling access to this page or not.
+ *
+ * @returns true if we're trapping access.
+ * @returns false if we aren't.
+ * @param   pVM         The VM handle.
+ * @param   GCPhys      The physical address.
+ *
+ * @remark  This function will only work correctly in VBOX_STRICT builds!
+ */
+REMR3DECL(bool) REMR3IsPageAccessHandled(PVM pVM, RTGCPHYS GCPhys)
+{
+#ifdef VBOX_STRICT
+    if (pVM->rem.s.cHandlerNotifications)
+        REMR3ReplayHandlerNotifications(pVM);
+
+    unsigned long off = get_phys_page_offset(GCPhys);
+    return (off & PAGE_OFFSET_MASK) == pVM->rem.s.iHandlerMemType
+        || (off & PAGE_OFFSET_MASK) == pVM->rem.s.iMMIOMemType
+        || (off & PAGE_OFFSET_MASK) == IO_MEM_ROM;
+#else
+    return false;
+#endif
+}
+
+
+/**
+ * Deals with a rare case in get_phys_addr_code where the code
+ * is being monitored.
+ *
+ * It could also be an MMIO page, in which case we will raise a fatal error.
+ *
+ * @returns The physical address corresponding to addr.
+ * @param   env         The cpu environment.
+ * @param   addr        The virtual address.
+ * @param   pTLBEntry   The TLB entry.
+ */
+target_ulong remR3PhysGetPhysicalAddressCode(CPUState *env, target_ulong addr, CPUTLBEntry *pTLBEntry)
+{
+    PVM pVM = env->pVM;
+    if ((pTLBEntry->addr_code & ~TARGET_PAGE_MASK) == pVM->rem.s.iHandlerMemType)
+    {
+        target_ulong ret = pTLBEntry->addend + addr;
+        AssertMsg2("remR3PhysGetPhysicalAddressCode: addr=%VGv addr_code=%VGv addend=%VGp ret=%VGp\n",
+                   (RTGCPTR)addr, (RTGCPTR)pTLBEntry->addr_code, (RTGCPHYS)pTLBEntry->addend, ret);
+        return ret;
+    }
+    LogRel(("\nTrying to execute code with memory type addr_code=%VGv addend=%VGp at %VGv! (iHandlerMemType=%#x iMMIOMemType=%#x)\n"
+            "*** handlers\n",
+            (RTGCPTR)pTLBEntry->addr_code, (RTGCPHYS)pTLBEntry->addend, (RTGCPTR)addr, pVM->rem.s.iHandlerMemType, pVM->rem.s.iMMIOMemType));
+    DBGFR3Info(pVM, "handlers", NULL, DBGFR3InfoLogRelHlp());
+    LogRel(("*** mmio\n"));
+    DBGFR3Info(pVM, "mmio", NULL, DBGFR3InfoLogRelHlp());
+    LogRel(("*** phys\n"));
+    DBGFR3Info(pVM, "phys", NULL, DBGFR3InfoLogRelHlp());
+    cpu_abort(env, "Trying to execute code with memory type addr_code=%VGv addend=%VGp at %VGv. (iHandlerMemType=%#x iMMIOMemType=%#x)\n",
+              (RTGCPTR)pTLBEntry->addr_code, (RTGCPHYS)pTLBEntry->addend, (RTGCPTR)addr, pVM->rem.s.iHandlerMemType, pVM->rem.s.iMMIOMemType);
+    AssertFatalFailed();
+}
+
+
+/** Validate the physical address passed to the read functions.
+ * Useful for finding non-guest-ram reads/writes.  */
+#if 0 //1 /* disable if it becomes bothersome... */
+# define VBOX_CHECK_ADDR(GCPhys) AssertMsg(PGMPhysIsGCPhysValid(cpu_single_env->pVM, (GCPhys)), ("%VGp\n", (GCPhys)))
+#else
+# define VBOX_CHECK_ADDR(GCPhys) do { } while (0)
+#endif
+
+/**
+ * Read guest RAM and ROM.
+ *
+ * @param   SrcGCPhys       The source address (guest physical).
+ * @param   pvDst           The destination address.
+ * @param   cb              Number of bytes
+ */
+void remR3PhysRead(RTGCPHYS SrcGCPhys, void *pvDst, unsigned cb)
+{
+    STAM_PROFILE_ADV_START(&gStatMemRead, a);
+    VBOX_CHECK_ADDR(SrcGCPhys);
+    PGMPhysRead(cpu_single_env->pVM, SrcGCPhys, pvDst, cb);
+    STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+}
+
+
+/**
+ * Read guest RAM and ROM, unsigned 8-bit.
+ *
+ * @param   SrcGCPhys       The source address (guest physical).
+ */
+uint8_t remR3PhysReadU8(RTGCPHYS SrcGCPhys)
+{
+    uint8_t val;
+    STAM_PROFILE_ADV_START(&gStatMemRead, a);
+    VBOX_CHECK_ADDR(SrcGCPhys);
+    val = PGMR3PhysReadU8(cpu_single_env->pVM, SrcGCPhys);
+    STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+    return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, signed 8-bit.
+ *
+ * @param   SrcGCPhys       The source address (guest physical).
+ */
+int8_t remR3PhysReadS8(RTGCPHYS SrcGCPhys)
+{
+    int8_t val;
+    STAM_PROFILE_ADV_START(&gStatMemRead, a);
+    VBOX_CHECK_ADDR(SrcGCPhys);
+    val = PGMR3PhysReadU8(cpu_single_env->pVM, SrcGCPhys);
+    STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+    return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, unsigned 16-bit.
+ *
+ * @param   SrcGCPhys       The source address (guest physical).
+ */
+uint16_t remR3PhysReadU16(RTGCPHYS SrcGCPhys)
+{
+    uint16_t val;
+    STAM_PROFILE_ADV_START(&gStatMemRead, a);
+    VBOX_CHECK_ADDR(SrcGCPhys);
+    val = PGMR3PhysReadU16(cpu_single_env->pVM, SrcGCPhys);
+    STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+    return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, signed 16-bit.
+ *
+ * @param   SrcGCPhys       The source address (guest physical).
+ */
+int16_t remR3PhysReadS16(RTGCPHYS SrcGCPhys)
+{
+    uint16_t val;
+    STAM_PROFILE_ADV_START(&gStatMemRead, a);
+    VBOX_CHECK_ADDR(SrcGCPhys);
+    val = PGMR3PhysReadU16(cpu_single_env->pVM, SrcGCPhys);
+    STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+    return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, unsigned 32-bit.
+ *
+ * @param   SrcGCPhys       The source address (guest physical).
+ */
+uint32_t remR3PhysReadU32(RTGCPHYS SrcGCPhys)
+{
+    uint32_t val;
+    STAM_PROFILE_ADV_START(&gStatMemRead, a);
+    VBOX_CHECK_ADDR(SrcGCPhys);
+    val = PGMR3PhysReadU32(cpu_single_env->pVM, SrcGCPhys);
+    STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+    return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, signed 32-bit.
+ *
+ * @param   SrcGCPhys       The source address (guest physical).
+ */
+int32_t remR3PhysReadS32(RTGCPHYS SrcGCPhys)
+{
+    int32_t val;
+    STAM_PROFILE_ADV_START(&gStatMemRead, a);
+    VBOX_CHECK_ADDR(SrcGCPhys);
+    val = PGMR3PhysReadU32(cpu_single_env->pVM, SrcGCPhys);
+    STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+    return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, unsigned 64-bit.
+ *
+ * @param   SrcGCPhys       The source address (guest physical).
+ */
+uint64_t remR3PhysReadU64(RTGCPHYS SrcGCPhys)
+{
+    uint64_t val;
+    STAM_PROFILE_ADV_START(&gStatMemRead, a);
+    VBOX_CHECK_ADDR(SrcGCPhys);
+    val = PGMR3PhysReadU64(cpu_single_env->pVM, SrcGCPhys);
+    STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+    return val;
+}
+
+
+/**
+ * Write guest RAM.
+ *
+ * @param   DstGCPhys       The destination address (guest physical).
+ * @param   pvSrc           The source address.
+ * @param   cb              Number of bytes to write
+ */
+void remR3PhysWrite(RTGCPHYS DstGCPhys, const void *pvSrc, unsigned cb)
+{
+    STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+    VBOX_CHECK_ADDR(DstGCPhys);
+    PGMPhysWrite(cpu_single_env->pVM, DstGCPhys, pvSrc, cb);
+    STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+}
+
+
+/**
+ * Write guest RAM, unsigned 8-bit.
+ *
+ * @param   DstGCPhys       The destination address (guest physical).
+ * @param   val             Value
+ */
+void remR3PhysWriteU8(RTGCPHYS DstGCPhys, uint8_t val)
+{
+    STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+    VBOX_CHECK_ADDR(DstGCPhys);
+    PGMR3PhysWriteU8(cpu_single_env->pVM, DstGCPhys, val);
+    STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+}
+
+
+/**
+ * Write guest RAM, unsigned 8-bit.
+ *
+ * @param   DstGCPhys       The destination address (guest physical).
+ * @param   val             Value
+ */
+void remR3PhysWriteU16(RTGCPHYS DstGCPhys, uint16_t val)
+{
+    STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+    VBOX_CHECK_ADDR(DstGCPhys);
+    PGMR3PhysWriteU16(cpu_single_env->pVM, DstGCPhys, val);
+    STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+}
+
+
+/**
+ * Write guest RAM, unsigned 32-bit.
+ *
+ * @param   DstGCPhys       The destination address (guest physical).
+ * @param   val             Value
+ */
+void remR3PhysWriteU32(RTGCPHYS DstGCPhys, uint32_t val)
+{
+    STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+    VBOX_CHECK_ADDR(DstGCPhys);
+    PGMR3PhysWriteU32(cpu_single_env->pVM, DstGCPhys, val);
+    STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+}
+
+
+/**
+ * Write guest RAM, unsigned 64-bit.
+ *
+ * @param   DstGCPhys       The destination address (guest physical).
+ * @param   val             Value
+ */
+void remR3PhysWriteU64(RTGCPHYS DstGCPhys, uint64_t val)
+{
+    STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+    VBOX_CHECK_ADDR(DstGCPhys);
+    PGMR3PhysWriteU64(cpu_single_env->pVM, DstGCPhys, val);
+    STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+}
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_MMIO
+
+/** Read MMIO memory. */
+static uint32_t remR3MMIOReadU8(void *pvVM, target_phys_addr_t GCPhys)
+{
+    uint32_t u32 = 0;
+    int rc = IOMMMIORead((PVM)pvVM, GCPhys, &u32, 1);
+    AssertMsg(rc == VINF_SUCCESS, ("rc=%Vrc\n", rc)); NOREF(rc);
+    Log2(("remR3MMIOReadU8: GCPhys=%VGp -> %02x\n", GCPhys, u32));
+    return u32;
+}
+
+/** Read MMIO memory. */
+static uint32_t remR3MMIOReadU16(void *pvVM, target_phys_addr_t GCPhys)
+{
+    uint32_t u32 = 0;
+    int rc = IOMMMIORead((PVM)pvVM, GCPhys, &u32, 2);
+    AssertMsg(rc == VINF_SUCCESS, ("rc=%Vrc\n", rc)); NOREF(rc);
+    Log2(("remR3MMIOReadU16: GCPhys=%VGp -> %04x\n", GCPhys, u32));
+    return u32;
+}
+
+/** Read MMIO memory. */
+static uint32_t remR3MMIOReadU32(void *pvVM, target_phys_addr_t GCPhys)
+{
+    uint32_t u32 = 0;
+    int rc = IOMMMIORead((PVM)pvVM, GCPhys, &u32, 4);
+    AssertMsg(rc == VINF_SUCCESS, ("rc=%Vrc\n", rc)); NOREF(rc);
+    Log2(("remR3MMIOReadU32: GCPhys=%VGp -> %08x\n", GCPhys, u32));
+    return u32;
+}
+
+/** Write to MMIO memory. */
+static void     remR3MMIOWriteU8(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+    Log2(("remR3MMIOWriteU8: GCPhys=%VGp u32=%#x\n", GCPhys, u32));
+    int rc = IOMMMIOWrite((PVM)pvVM, GCPhys, u32, 1);
+    AssertMsg(rc == VINF_SUCCESS, ("rc=%Vrc\n", rc)); NOREF(rc);
+}
+
+/** Write to MMIO memory. */
+static void     remR3MMIOWriteU16(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+    Log2(("remR3MMIOWriteU16: GCPhys=%VGp u32=%#x\n", GCPhys, u32));
+    int rc = IOMMMIOWrite((PVM)pvVM, GCPhys, u32, 2);
+    AssertMsg(rc == VINF_SUCCESS, ("rc=%Vrc\n", rc)); NOREF(rc);
+}
+
+/** Write to MMIO memory. */
+static void     remR3MMIOWriteU32(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+    Log2(("remR3MMIOWriteU32: GCPhys=%VGp u32=%#x\n", GCPhys, u32));
+    int rc = IOMMMIOWrite((PVM)pvVM, GCPhys, u32, 4);
+    AssertMsg(rc == VINF_SUCCESS, ("rc=%Vrc\n", rc)); NOREF(rc);
+}
+
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_HANDLER
+
+/*  !!!WARNING!!! This is extremely hackish right now, we assume it's only for LFB access!  !!!WARNING!!!  */
+
+static uint32_t remR3HandlerReadU8(void *pvVM, target_phys_addr_t GCPhys)
+{
+    Log2(("remR3HandlerReadU8: GCPhys=%VGp\n", GCPhys));
+    uint8_t u8;
+    PGMPhysRead((PVM)pvVM, GCPhys, &u8, sizeof(u8));
+    return u8;
+}
+
+static uint32_t remR3HandlerReadU16(void *pvVM, target_phys_addr_t GCPhys)
+{
+    Log2(("remR3HandlerReadU16: GCPhys=%VGp\n", GCPhys));
+    uint16_t u16;
+    PGMPhysRead((PVM)pvVM, GCPhys, &u16, sizeof(u16));
+    return u16;
+}
+
+static uint32_t remR3HandlerReadU32(void *pvVM, target_phys_addr_t GCPhys)
+{
+    Log2(("remR3HandlerReadU32: GCPhys=%VGp\n", GCPhys));
+    uint32_t u32;
+    PGMPhysRead((PVM)pvVM, GCPhys, &u32, sizeof(u32));
+    return u32;
+}
+
+static void     remR3HandlerWriteU8(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+    Log2(("remR3HandlerWriteU8: GCPhys=%VGp u32=%#x\n", GCPhys, u32));
+    PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint8_t));
+}
+
+static void     remR3HandlerWriteU16(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+    Log2(("remR3HandlerWriteU16: GCPhys=%VGp u32=%#x\n", GCPhys, u32));
+    PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint16_t));
+}
+
+static void     remR3HandlerWriteU32(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+    Log2(("remR3HandlerWriteU32: GCPhys=%VGp u32=%#x\n", GCPhys, u32));
+    PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint32_t));
+}
+
+/* -+- disassembly -+- */
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_DISAS
+
+
+/**
+ * Enables or disables singled stepped disassembly.
+ *
+ * @returns VBox status code.
+ * @param   pVM         VM handle.
+ * @param   fEnable     To enable set this flag, to disable clear it.
+ */
+static DECLCALLBACK(int) remR3DisasEnableStepping(PVM pVM, bool fEnable)
+{
+    LogFlow(("remR3DisasEnableStepping: fEnable=%d\n", fEnable));
+    VM_ASSERT_EMT(pVM);
+
+    if (fEnable)
+        pVM->rem.s.Env.state |= CPU_EMULATE_SINGLE_STEP;
+    else
+        pVM->rem.s.Env.state &= ~CPU_EMULATE_SINGLE_STEP;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Enables or disables singled stepped disassembly.
+ *
+ * @returns VBox status code.
+ * @param   pVM         VM handle.
+ * @param   fEnable     To enable set this flag, to disable clear it.
+ */
+REMR3DECL(int) REMR3DisasEnableStepping(PVM pVM, bool fEnable)
+{
+    PVMREQ  pReq;
+    int     rc;
+
+    LogFlow(("REMR3DisasEnableStepping: fEnable=%d\n", fEnable));
+    if (VM_IS_EMT(pVM))
+        return remR3DisasEnableStepping(pVM, fEnable);
+
+    rc = VMR3ReqCall(pVM, &pReq, RT_INDEFINITE_WAIT, (PFNRT)remR3DisasEnableStepping, 2, pVM, fEnable);
+    AssertRC(rc);
+    if (VBOX_SUCCESS(rc))
+        rc = pReq->iStatus;
+    VMR3ReqFree(pReq);
+    return rc;
+}
+
+
+#if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64))
+/**
+ * External Debugger Command: .remstep [on|off|1|0]
+ */
+static DECLCALLBACK(int) remR3CmdDisasEnableStepping(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR paArgs, unsigned cArgs, PDBGCVAR pResult)
+{
+    bool fEnable;
+    int rc;
+
+    /* print status */
+    if (cArgs == 0)
+        return pCmdHlp->pfnPrintf(pCmdHlp, NULL, "DisasStepping is %s\n",
+                                  pVM->rem.s.Env.state & CPU_EMULATE_SINGLE_STEP ? "enabled" : "disabled");
+
+    /* convert the argument and change the mode. */
+    rc = pCmdHlp->pfnVarToBool(pCmdHlp, &paArgs[0], &fEnable);
+    if (VBOX_FAILURE(rc))
+        return pCmdHlp->pfnVBoxError(pCmdHlp, rc, "boolean conversion failed!\n");
+    rc = REMR3DisasEnableStepping(pVM, fEnable);
+    if (VBOX_FAILURE(rc))
+        return pCmdHlp->pfnVBoxError(pCmdHlp, rc, "REMR3DisasEnableStepping failed!\n");
+    return rc;
+}
+#endif
+
+
+/**
+ * Disassembles n instructions and prints them to the log.
+ *
+ * @returns Success indicator.
+ * @param   env         Pointer to the recompiler CPU structure.
+ * @param   f32BitCode  Indicates that whether or not the code should
+ *                      be disassembled as 16 or 32 bit. If -1 the CS
+ *                      selector will be inspected.
+ * @param   nrInstructions  Nr of instructions to disassemble
+ * @param   pszPrefix
+ * @remark  not currently used for anything but ad-hoc debugging.
+ */
+bool remR3DisasBlock(CPUState *env, int f32BitCode, int nrInstructions, char *pszPrefix)
+{
+    int i;
+
+    /*
+     * Determin 16/32 bit mode.
+     */
+    if (f32BitCode == -1)
+        f32BitCode = !!(env->segs[R_CS].flags & X86_DESC_DB); /** @todo is this right?!!?!?!?!? */
+
+    /*
+     * Convert cs:eip to host context address.
+     * We don't care to much about cross page correctness presently.
+     */
+    RTGCPTR    GCPtrPC = env->segs[R_CS].base + env->eip;
+    void      *pvPC;
+    if (f32BitCode && (env->cr[0] & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG))
+    {
+        Assert(PGMGetGuestMode(env->pVM) < PGMMODE_AMD64);
+
+        /* convert eip to physical address. */
+        int rc = PGMPhysGCPtr2HCPtrByGstCR3(env->pVM,
+                                            GCPtrPC,
+                                            env->cr[3],
+                                            env->cr[4] & (X86_CR4_PSE | X86_CR4_PAE), /** @todo add longmode flag */
+                                            &pvPC);
+        if (VBOX_FAILURE(rc))
+        {
+            if (!PATMIsPatchGCAddr(env->pVM, GCPtrPC))
+                return false;
+            pvPC = (char *)PATMR3QueryPatchMemHC(env->pVM, NULL)
+                + (GCPtrPC - PATMR3QueryPatchMemGC(env->pVM, NULL));
+        }
+    }
+    else
+    {
+        /* physical address */
+        int rc = PGMPhysGCPhys2HCPtr(env->pVM, (RTGCPHYS)GCPtrPC, nrInstructions * 16, &pvPC);
+        if (VBOX_FAILURE(rc))
+            return false;
+    }
+
+    /*
+     * Disassemble.
+     */
+    RTINTPTR        off = env->eip - (RTGCUINTPTR)pvPC;
+    DISCPUSTATE     Cpu;
+    Cpu.mode = f32BitCode ? CPUMODE_32BIT : CPUMODE_16BIT;
+    Cpu.pfnReadBytes = NULL;            /** @todo make cs:eip reader for the disassembler. */
+    //Cpu.dwUserData[0] = (uintptr_t)pVM;
+    //Cpu.dwUserData[1] = (uintptr_t)pvPC;
+    //Cpu.dwUserData[2] = GCPtrPC;
+
+    for (i=0;i<nrInstructions;i++)
+    {
+        char szOutput[256];
+        uint32_t    cbOp;
+        if (RT_FAILURE(DISInstr(&Cpu, (uintptr_t)pvPC, off, &cbOp, &szOutput[0])))
+            return false;
+        if (pszPrefix)
+            Log(("%s: %s", pszPrefix, szOutput));
+        else
+            Log(("%s", szOutput));
+
+        pvPC += cbOp;
+    }
+    return true;
+}
+
+
+/** @todo need to test the new code, using the old code in the mean while. */
+#define USE_OLD_DUMP_AND_DISASSEMBLY
+
+/**
+ * Disassembles one instruction and prints it to the log.
+ *
+ * @returns Success indicator.
+ * @param   env         Pointer to the recompiler CPU structure.
+ * @param   f32BitCode  Indicates that whether or not the code should
+ *                      be disassembled as 16 or 32 bit. If -1 the CS
+ *                      selector will be inspected.
+ * @param   pszPrefix
+ */
+bool remR3DisasInstr(CPUState *env, int f32BitCode, char *pszPrefix)
+{
+#ifdef USE_OLD_DUMP_AND_DISASSEMBLY
+    PVM pVM = env->pVM;
+
+    /* Doesn't work in long mode. */
+    if (env->hflags & HF_LMA_MASK)
+        return false;
+
+    /*
+     * Determin 16/32 bit mode.
+     */
+    if (f32BitCode == -1)
+        f32BitCode = !!(env->segs[R_CS].flags & X86_DESC_DB); /** @todo is this right?!!?!?!?!? */
+
+    /*
+     * Log registers
+     */
+    if (LogIs2Enabled())
+    {
+        remR3StateUpdate(pVM);
+        DBGFR3InfoLog(pVM, "cpumguest", pszPrefix);
+    }
+
+    /*
+     * Convert cs:eip to host context address.
+     * We don't care to much about cross page correctness presently.
+     */
+    RTGCPTR    GCPtrPC = env->segs[R_CS].base + env->eip;
+    void      *pvPC;
+    if ((env->cr[0] & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG))
+    {
+        /* convert eip to physical address. */
+        int rc = PGMPhysGCPtr2HCPtrByGstCR3(pVM,
+                                            GCPtrPC,
+                                            env->cr[3],
+                                            env->cr[4] & (X86_CR4_PSE | X86_CR4_PAE),
+                                            &pvPC);
+        if (VBOX_FAILURE(rc))
+        {
+            if (!PATMIsPatchGCAddr(pVM, GCPtrPC))
+                return false;
+            pvPC = (char *)PATMR3QueryPatchMemHC(pVM, NULL)
+                + (GCPtrPC - PATMR3QueryPatchMemGC(pVM, NULL));
+        }
+    }
+    else
+    {
+
+        /* physical address */
+        int rc = PGMPhysGCPhys2HCPtr(pVM, (RTGCPHYS)GCPtrPC, 16, &pvPC);
+        if (VBOX_FAILURE(rc))
+            return false;
+    }
+
+    /*
+     * Disassemble.
+     */
+    RTINTPTR        off = env->eip - (RTGCUINTPTR)pvPC;
+    DISCPUSTATE     Cpu;
+    Cpu.mode = f32BitCode ? CPUMODE_32BIT : CPUMODE_16BIT;
+    Cpu.pfnReadBytes = NULL;            /** @todo make cs:eip reader for the disassembler. */
+    //Cpu.dwUserData[0] = (uintptr_t)pVM;
+    //Cpu.dwUserData[1] = (uintptr_t)pvPC;
+    //Cpu.dwUserData[2] = GCPtrPC;
+    char szOutput[256];
+    uint32_t    cbOp;
+    if (RT_FAILURE(DISInstr(&Cpu, (uintptr_t)pvPC, off, &cbOp, &szOutput[0])))
+        return false;
+
+    if (!f32BitCode)
+    {
+        if (pszPrefix)
+            Log(("%s: %04X:%s", pszPrefix, env->segs[R_CS].selector, szOutput));
+        else
+            Log(("%04X:%s", env->segs[R_CS].selector, szOutput));
+    }
+    else
+    {
+        if (pszPrefix)
+            Log(("%s: %s", pszPrefix, szOutput));
+        else
+            Log(("%s", szOutput));
+    }
+    return true;
+
+#else /* !USE_OLD_DUMP_AND_DISASSEMBLY */
+    PVM pVM = env->pVM;
+    const bool fLog = LogIsEnabled();
+    const bool fLog2 = LogIs2Enabled();
+    int rc = VINF_SUCCESS;
+
+    /*
+     * Don't bother if there ain't any log output to do.
+     */
+    if (!fLog && !fLog2)
+        return true;
+
+    /*
+     * Update the state so DBGF reads the correct register values.
+     */
+    remR3StateUpdate(pVM);
+
+    /*
+     * Log registers if requested.
+     */
+    if (!fLog2)
+        DBGFR3InfoLog(pVM, "cpumguest", pszPrefix);
+
+    /*
+     * Disassemble to log.
+     */
+    if (fLog)
+        rc = DBGFR3DisasInstrCurrentLogInternal(pVM, pszPrefix);
+
+    return VBOX_SUCCESS(rc);
+#endif
+}
+
+
+/**
+ * Disassemble recompiled code.
+ *
+ * @param   phFileIgnored   Ignored, logfile usually.
+ * @param   pvCode          Pointer to the code block.
+ * @param   cb              Size of the code block.
+ */
+void disas(FILE *phFileIgnored, void *pvCode, unsigned long cb)
+{
+    if (LogIs2Enabled())
+    {
+        unsigned        off = 0;
+        char            szOutput[256];
+        DISCPUSTATE     Cpu;
+
+        memset(&Cpu, 0, sizeof(Cpu));
+#ifdef RT_ARCH_X86
+        Cpu.mode = CPUMODE_32BIT;
+#else
+        Cpu.mode = CPUMODE_64BIT;
+#endif
+
+        RTLogPrintf("Recompiled Code: %p %#lx (%ld) bytes\n", pvCode, cb, cb);
+        while (off < cb)
+        {
+            uint32_t cbInstr;
+            if (RT_SUCCESS(DISInstr(&Cpu, (uintptr_t)pvCode + off, 0, &cbInstr, szOutput)))
+                RTLogPrintf("%s", szOutput);
+            else
+            {
+                RTLogPrintf("disas error\n");
+                cbInstr = 1;
+#ifdef RT_ARCH_AMD64 /** @todo remove when DISInstr starts supporing 64-bit code. */
+                break;
+#endif
+            }
+            off += cbInstr;
+        }
+    }
+    NOREF(phFileIgnored);
+}
+
+
+/**
+ * Disassemble guest code.
+ *
+ * @param   phFileIgnored   Ignored, logfile usually.
+ * @param   uCode           The guest address of the code to disassemble. (flat?)
+ * @param   cb              Number of bytes to disassemble.
+ * @param   fFlags          Flags, probably something which tells if this is 16, 32 or 64 bit code.
+ */
+void target_disas(FILE *phFileIgnored, target_ulong uCode, target_ulong cb, int fFlags)
+{
+    if (LogIs2Enabled())
+    {
+        PVM pVM = cpu_single_env->pVM;
+
+        /*
+         * Update the state so DBGF reads the correct register values (flags).
+         */
+        remR3StateUpdate(pVM);
+
+        /*
+         * Do the disassembling.
+         */
+        RTLogPrintf("Guest Code: PC=%VGp #VGp (%VGp) bytes fFlags=%d\n", uCode, cb, cb, fFlags);
+        RTSEL cs = cpu_single_env->segs[R_CS].selector;
+        RTGCUINTPTR eip = uCode - cpu_single_env->segs[R_CS].base;
+        for (;;)
+        {
+            char        szBuf[256];
+            uint32_t    cbInstr;
+            int rc = DBGFR3DisasInstrEx(pVM,
+                                        cs,
+                                        eip,
+                                        0,
+                                        szBuf, sizeof(szBuf),
+                                        &cbInstr);
+            if (VBOX_SUCCESS(rc))
+                RTLogPrintf("%VGp %s\n", uCode, szBuf);
+            else
+            {
+                RTLogPrintf("%VGp %04x:%VGp: %s\n", uCode, cs, eip, szBuf);
+                cbInstr = 1;
+            }
+
+            /* next */
+            if (cb <= cbInstr)
+                break;
+            cb -= cbInstr;
+            uCode += cbInstr;
+            eip += cbInstr;
+        }
+    }
+    NOREF(phFileIgnored);
+}
+
+
+/**
+ * Looks up a guest symbol.
+ *
+ * @returns Pointer to symbol name. This is a static buffer.
+ * @param   orig_addr   The address in question.
+ */
+const char *lookup_symbol(target_ulong orig_addr)
+{
+    RTGCINTPTR off = 0;
+    DBGFSYMBOL Sym;
+    PVM pVM = cpu_single_env->pVM;
+    int rc = DBGFR3SymbolByAddr(pVM, orig_addr, &off, &Sym);
+    if (VBOX_SUCCESS(rc))
+    {
+        static char szSym[sizeof(Sym.szName) + 48];
+        if (!off)
+            RTStrPrintf(szSym,  sizeof(szSym), "%s\n", Sym.szName);
+        else if (off > 0)
+            RTStrPrintf(szSym,  sizeof(szSym), "%s+%x\n", Sym.szName,  off);
+        else
+            RTStrPrintf(szSym,  sizeof(szSym), "%s-%x\n", Sym.szName,  -off);
+        return szSym;
+    }
+    return "<N/A>";
+}
+
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM
+
+
+/* -+- FF notifications -+- */
+
+
+/**
+ * Notification about a pending interrupt.
+ *
+ * @param   pVM             VM Handle.
+ * @param   u8Interrupt     Interrupt
+ * @thread  The emulation thread.
+ */
+REMR3DECL(void) REMR3NotifyPendingInterrupt(PVM pVM, uint8_t u8Interrupt)
+{
+    Assert(pVM->rem.s.u32PendingInterrupt == REM_NO_PENDING_IRQ);
+    pVM->rem.s.u32PendingInterrupt = u8Interrupt;
+}
+
+/**
+ * Notification about a pending interrupt.
+ *
+ * @returns Pending interrupt or REM_NO_PENDING_IRQ
+ * @param   pVM             VM Handle.
+ * @thread  The emulation thread.
+ */
+REMR3DECL(uint32_t) REMR3QueryPendingInterrupt(PVM pVM)
+{
+    return pVM->rem.s.u32PendingInterrupt;
+}
+
+/**
+ * Notification about the interrupt FF being set.
+ *
+ * @param   pVM             VM Handle.
+ * @thread  The emulation thread.
+ */
+REMR3DECL(void) REMR3NotifyInterruptSet(PVM pVM)
+{
+    LogFlow(("REMR3NotifyInterruptSet: fInRem=%d interrupts %s\n", pVM->rem.s.fInREM,
+             (pVM->rem.s.Env.eflags & IF_MASK) && !(pVM->rem.s.Env.hflags & HF_INHIBIT_IRQ_MASK) ? "enabled" : "disabled"));
+    if (pVM->rem.s.fInREM)
+    {
+        if (VM_IS_EMT(pVM))
+            cpu_interrupt(cpu_single_env, CPU_INTERRUPT_HARD);
+        else
+            ASMAtomicOrS32(&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_HARD);
+    }
+}
+
+
+/**
+ * Notification about the interrupt FF being set.
+ *
+ * @param   pVM             VM Handle.
+ * @thread  Any.
+ */
+REMR3DECL(void) REMR3NotifyInterruptClear(PVM pVM)
+{
+    LogFlow(("REMR3NotifyInterruptClear:\n"));
+    if (pVM->rem.s.fInREM)
+        cpu_reset_interrupt(cpu_single_env, CPU_INTERRUPT_HARD);
+}
+
+
+/**
+ * Notification about pending timer(s).
+ *
+ * @param   pVM             VM Handle.
+ * @thread  Any.
+ */
+REMR3DECL(void) REMR3NotifyTimerPending(PVM pVM)
+{
+#ifndef DEBUG_bird
+    LogFlow(("REMR3NotifyTimerPending: fInRem=%d\n", pVM->rem.s.fInREM));
+#endif
+    if (pVM->rem.s.fInREM)
+    {
+        if (VM_IS_EMT(pVM))
+            cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+        else
+            ASMAtomicOrS32(&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_TIMER);
+    }
+}
+
+
+/**
+ * Notification about pending DMA transfers.
+ *
+ * @param   pVM             VM Handle.
+ * @thread  Any.
+ */
+REMR3DECL(void) REMR3NotifyDmaPending(PVM pVM)
+{
+    LogFlow(("REMR3NotifyDmaPending: fInRem=%d\n", pVM->rem.s.fInREM));
+    if (pVM->rem.s.fInREM)
+    {
+        if (VM_IS_EMT(pVM))
+            cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+        else
+            ASMAtomicOrS32(&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_DMA);
+    }
+}
+
+
+/**
+ * Notification about pending timer(s).
+ *
+ * @param   pVM             VM Handle.
+ * @thread  Any.
+ */
+REMR3DECL(void) REMR3NotifyQueuePending(PVM pVM)
+{
+    LogFlow(("REMR3NotifyQueuePending: fInRem=%d\n", pVM->rem.s.fInREM));
+    if (pVM->rem.s.fInREM)
+    {
+        if (VM_IS_EMT(pVM))
+            cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+        else
+            ASMAtomicOrS32(&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_EXIT);
+    }
+}
+
+
+/**
+ * Notification about pending FF set by an external thread.
+ *
+ * @param   pVM             VM handle.
+ * @thread  Any.
+ */
+REMR3DECL(void) REMR3NotifyFF(PVM pVM)
+{
+    LogFlow(("REMR3NotifyFF: fInRem=%d\n", pVM->rem.s.fInREM));
+    if (pVM->rem.s.fInREM)
+    {
+        if (VM_IS_EMT(pVM))
+            cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+        else
+            ASMAtomicOrS32(&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_EXIT);
+    }
+}
+
+
+#ifdef VBOX_WITH_STATISTICS
+void remR3ProfileStart(int statcode)
+{
+    STAMPROFILEADV *pStat;
+    switch(statcode)
+    {
+    case STATS_EMULATE_SINGLE_INSTR:
+        pStat = &gStatExecuteSingleInstr;
+        break;
+    case STATS_QEMU_COMPILATION:
+        pStat = &gStatCompilationQEmu;
+        break;
+    case STATS_QEMU_RUN_EMULATED_CODE:
+        pStat = &gStatRunCodeQEmu;
+        break;
+    case STATS_QEMU_TOTAL:
+        pStat = &gStatTotalTimeQEmu;
+        break;
+    case STATS_QEMU_RUN_TIMERS:
+        pStat = &gStatTimers;
+        break;
+    case STATS_TLB_LOOKUP:
+        pStat= &gStatTBLookup;
+        break;
+    case STATS_IRQ_HANDLING:
+        pStat= &gStatIRQ;
+        break;
+    case STATS_RAW_CHECK:
+        pStat = &gStatRawCheck;
+        break;
+
+    default:
+        AssertMsgFailed(("unknown stat %d\n", statcode));
+        return;
+    }
+    STAM_PROFILE_ADV_START(pStat, a);
+}
+
+
+void remR3ProfileStop(int statcode)
+{
+    STAMPROFILEADV *pStat;
+    switch(statcode)
+    {
+    case STATS_EMULATE_SINGLE_INSTR:
+        pStat = &gStatExecuteSingleInstr;
+        break;
+    case STATS_QEMU_COMPILATION:
+        pStat = &gStatCompilationQEmu;
+        break;
+    case STATS_QEMU_RUN_EMULATED_CODE:
+        pStat = &gStatRunCodeQEmu;
+        break;
+    case STATS_QEMU_TOTAL:
+        pStat = &gStatTotalTimeQEmu;
+        break;
+    case STATS_QEMU_RUN_TIMERS:
+        pStat = &gStatTimers;
+        break;
+    case STATS_TLB_LOOKUP:
+        pStat= &gStatTBLookup;
+        break;
+    case STATS_IRQ_HANDLING:
+        pStat= &gStatIRQ;
+        break;
+    case STATS_RAW_CHECK:
+        pStat = &gStatRawCheck;
+        break;
+    default:
+        AssertMsgFailed(("unknown stat %d\n", statcode));
+        return;
+    }
+    STAM_PROFILE_ADV_STOP(pStat, a);
+}
+#endif
+
+/**
+ * Raise an RC, force rem exit.
+ *
+ * @param   pVM     VM handle.
+ * @param   rc      The rc.
+ */
+void remR3RaiseRC(PVM pVM, int rc)
+{
+    Log(("remR3RaiseRC: rc=%Vrc\n", rc));
+    Assert(pVM->rem.s.fInREM);
+    VM_ASSERT_EMT(pVM);
+    pVM->rem.s.rc = rc;
+    cpu_interrupt(&pVM->rem.s.Env, CPU_INTERRUPT_RC);
+}
+
+
+/* -+- timers -+- */
+
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+    STAM_COUNTER_INC(&gStatCpuGetTSC);
+    return TMCpuTickGet(env->pVM);
+}
+
+
+/* -+- interrupts -+- */
+
+void cpu_set_ferr(CPUX86State *env)
+{
+    int rc = PDMIsaSetIrq(env->pVM, 13, 1);
+    LogFlow(("cpu_set_ferr: rc=%d\n", rc)); NOREF(rc);
+}
+
+int cpu_get_pic_interrupt(CPUState *env)
+{
+    uint8_t u8Interrupt;
+    int     rc;
+
+    /* When we fail to forward interrupts directly in raw mode, we fall back to the recompiler.
+     * In that case we can't call PDMGetInterrupt anymore, because it has already cleared the interrupt
+     * with the (a)pic.
+     */
+    /** @note We assume we will go directly to the recompiler to handle the pending interrupt! */
+    /** @todo r=bird: In the long run we should just do the interrupt handling in EM/CPUM/TRPM/somewhere and
+     * if we cannot execute the interrupt handler in raw-mode just reschedule to REM. Once that is done we
+     * remove this kludge. */
+    if (env->pVM->rem.s.u32PendingInterrupt != REM_NO_PENDING_IRQ)
+    {
+        rc = VINF_SUCCESS;
+        Assert(env->pVM->rem.s.u32PendingInterrupt >= 0 && env->pVM->rem.s.u32PendingInterrupt <= 255);
+        u8Interrupt = env->pVM->rem.s.u32PendingInterrupt;
+        env->pVM->rem.s.u32PendingInterrupt = REM_NO_PENDING_IRQ;
+    }
+    else
+        rc = PDMGetInterrupt(env->pVM, &u8Interrupt);
+
+    LogFlow(("cpu_get_pic_interrupt: u8Interrupt=%d rc=%Vrc\n", u8Interrupt, rc));
+    if (VBOX_SUCCESS(rc))
+    {
+        if (VM_FF_ISPENDING(env->pVM, VM_FF_INTERRUPT_APIC | VM_FF_INTERRUPT_PIC))
+            env->interrupt_request |= CPU_INTERRUPT_HARD;
+        return u8Interrupt;
+    }
+    return -1;
+}
+
+
+/* -+- local apic -+- */
+
+void cpu_set_apic_base(CPUX86State *env, uint64_t val)
+{
+    int rc = PDMApicSetBase(env->pVM, val);
+    LogFlow(("cpu_set_apic_base: val=%#llx rc=%Vrc\n", val, rc)); NOREF(rc);
+}
+
+uint64_t cpu_get_apic_base(CPUX86State *env)
+{
+    uint64_t u64;
+    int rc = PDMApicGetBase(env->pVM, &u64);
+    if (VBOX_SUCCESS(rc))
+    {
+        LogFlow(("cpu_get_apic_base: returns %#llx \n", u64));
+        return u64;
+    }
+    LogFlow(("cpu_get_apic_base: returns 0 (rc=%Vrc)\n", rc));
+    return 0;
+}
+
+void cpu_set_apic_tpr(CPUX86State *env, uint8_t val)
+{
+    int rc = PDMApicSetTPR(env->pVM, val);
+    LogFlow(("cpu_set_apic_tpr: val=%#x rc=%Vrc\n", val, rc)); NOREF(rc);
+}
+
+uint8_t cpu_get_apic_tpr(CPUX86State *env)
+{
+    uint8_t u8;
+    int rc = PDMApicGetTPR(env->pVM, &u8, NULL);
+    if (VBOX_SUCCESS(rc))
+    {
+        LogFlow(("cpu_get_apic_tpr: returns %#x\n", u8));
+        return u8;
+    }
+    LogFlow(("cpu_get_apic_tpr: returns 0 (rc=%Vrc)\n", rc));
+    return 0;
+}
+
+
+uint64_t cpu_apic_rdmsr(CPUX86State *env, uint32_t reg)
+{
+    uint64_t value;
+    int rc = PDMApicReadMSR(env->pVM, 0/* cpu */, reg, &value);
+    if (VBOX_SUCCESS(rc))
+    {
+        LogFlow(("cpu_apic_rdms returns %#x\n", value));
+        return value;
+    }
+    /** @todo: exception ? */
+    LogFlow(("cpu_apic_rdms returns 0 (rc=%Vrc)\n", rc));
+    return value;
+}
+
+void     cpu_apic_wrmsr(CPUX86State *env, uint32_t reg, uint64_t value)
+{
+    int rc = PDMApicWriteMSR(env->pVM, 0 /* cpu */, reg, value);
+    /** @todo: exception if error ? */
+    LogFlow(("cpu_apic_wrmsr: rc=%Vrc\n", rc)); NOREF(rc);
+}
+/* -+- I/O Ports -+- */
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_IOPORT
+
+void cpu_outb(CPUState *env, int addr, int val)
+{
+    if (addr != 0x80 && addr != 0x70 && addr != 0x61)
+        Log2(("cpu_outb: addr=%#06x val=%#x\n", addr, val));
+
+    int rc = IOMIOPortWrite(env->pVM, (RTIOPORT)addr, val, 1);
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+        return;
+    if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+    {
+        Log(("cpu_outb: addr=%#06x val=%#x -> %Vrc\n", addr, val, rc));
+        remR3RaiseRC(env->pVM, rc);
+        return;
+    }
+    remAbort(rc, __FUNCTION__);
+}
+
+void cpu_outw(CPUState *env, int addr, int val)
+{
+    //Log2(("cpu_outw: addr=%#06x val=%#x\n", addr, val));
+    int rc = IOMIOPortWrite(env->pVM, (RTIOPORT)addr, val, 2);
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+        return;
+    if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+    {
+        Log(("cpu_outw: addr=%#06x val=%#x -> %Vrc\n", addr, val, rc));
+        remR3RaiseRC(env->pVM, rc);
+        return;
+    }
+    remAbort(rc, __FUNCTION__);
+}
+
+void cpu_outl(CPUState *env, int addr, int val)
+{
+    Log2(("cpu_outl: addr=%#06x val=%#x\n", addr, val));
+    int rc = IOMIOPortWrite(env->pVM, (RTIOPORT)addr, val, 4);
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+        return;
+    if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+    {
+        Log(("cpu_outl: addr=%#06x val=%#x -> %Vrc\n", addr, val, rc));
+        remR3RaiseRC(env->pVM, rc);
+        return;
+    }
+    remAbort(rc, __FUNCTION__);
+}
+
+int cpu_inb(CPUState *env, int addr)
+{
+    uint32_t u32 = 0;
+    int rc = IOMIOPortRead(env->pVM, (RTIOPORT)addr, &u32, 1);
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+    {
+        if (/*addr != 0x61 && */addr != 0x71)
+            Log2(("cpu_inb: addr=%#06x -> %#x\n", addr, u32));
+        return (int)u32;
+    }
+    if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+    {
+        Log(("cpu_inb: addr=%#06x -> %#x rc=%Vrc\n", addr, u32, rc));
+        remR3RaiseRC(env->pVM, rc);
+        return (int)u32;
+    }
+    remAbort(rc, __FUNCTION__);
+    return 0xff;
+}
+
+int cpu_inw(CPUState *env, int addr)
+{
+    uint32_t u32 = 0;
+    int rc = IOMIOPortRead(env->pVM, (RTIOPORT)addr, &u32, 2);
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+    {
+        Log2(("cpu_inw: addr=%#06x -> %#x\n", addr, u32));
+        return (int)u32;
+    }
+    if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+    {
+        Log(("cpu_inw: addr=%#06x -> %#x rc=%Vrc\n", addr, u32, rc));
+        remR3RaiseRC(env->pVM, rc);
+        return (int)u32;
+    }
+    remAbort(rc, __FUNCTION__);
+    return 0xffff;
+}
+
+int cpu_inl(CPUState *env, int addr)
+{
+    uint32_t u32 = 0;
+    int rc = IOMIOPortRead(env->pVM, (RTIOPORT)addr, &u32, 4);
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+    {
+//if (addr==0x01f0 && u32 == 0x6b6d)
+//    loglevel = ~0;
+        Log2(("cpu_inl: addr=%#06x -> %#x\n", addr, u32));
+        return (int)u32;
+    }
+    if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+    {
+        Log(("cpu_inl: addr=%#06x -> %#x rc=%Vrc\n", addr, u32, rc));
+        remR3RaiseRC(env->pVM, rc);
+        return (int)u32;
+    }
+    remAbort(rc, __FUNCTION__);
+    return 0xffffffff;
+}
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM
+
+
+/* -+- helpers and misc other interfaces -+- */
+
+/**
+ * Perform the CPUID instruction.
+ *
+ * ASMCpuId cannot be invoked from some source files where this is used because of global
+ * register allocations.
+ *
+ * @param   env         Pointer to the recompiler CPU structure.
+ * @param   uOperator   CPUID operation (eax).
+ * @param   pvEAX       Where to store eax.
+ * @param   pvEBX       Where to store ebx.
+ * @param   pvECX       Where to store ecx.
+ * @param   pvEDX       Where to store edx.
+ */
+void remR3CpuId(CPUState *env, unsigned uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
+{
+    CPUMGetGuestCpuId(env->pVM, uOperator, (uint32_t *)pvEAX, (uint32_t *)pvEBX, (uint32_t *)pvECX, (uint32_t *)pvEDX);
+}
+
+
+#if 0 /* not used */
+/**
+ * Interface for qemu hardware to report back fatal errors.
+ */
+void hw_error(const char *pszFormat, ...)
+{
+    /*
+     * Bitch about it.
+     */
+    /** @todo Add support for nested arg lists in the LogPrintfV routine! I've code for
+     * this in my Odin32 tree at home! */
+    va_list args;
+    va_start(args, pszFormat);
+    RTLogPrintf("fatal error in virtual hardware:");
+    RTLogPrintfV(pszFormat, args);
+    va_end(args);
+    AssertReleaseMsgFailed(("fatal error in virtual hardware: %s\n", pszFormat));
+
+    /*
+     * If we're in REM context we'll sync back the state before 'jumping' to
+     * the EMs failure handling.
+     */
+    PVM pVM = cpu_single_env->pVM;
+    if (pVM->rem.s.fInREM)
+        REMR3StateBack(pVM);
+    EMR3FatalError(pVM, VERR_REM_VIRTUAL_HARDWARE_ERROR);
+    AssertMsgFailed(("EMR3FatalError returned!\n"));
+}
+#endif
+
+/**
+ * Interface for the qemu cpu to report unhandled situation
+ * raising a fatal VM error.
+ */
+void cpu_abort(CPUState *env, const char *pszFormat, ...)
+{
+    /*
+     * Bitch about it.
+     */
+    RTLogFlags(NULL, "nodisabled nobuffered");
+    va_list args;
+    va_start(args, pszFormat);
+    RTLogPrintf("fatal error in recompiler cpu: %N\n", pszFormat, &args);
+    va_end(args);
+    va_start(args, pszFormat);
+    AssertReleaseMsgFailed(("fatal error in recompiler cpu: %N\n", pszFormat, &args));
+    va_end(args);
+
+    /*
+     * If we're in REM context we'll sync back the state before 'jumping' to
+     * the EMs failure handling.
+     */
+    PVM pVM = cpu_single_env->pVM;
+    if (pVM->rem.s.fInREM)
+        REMR3StateBack(pVM);
+    EMR3FatalError(pVM, VERR_REM_VIRTUAL_CPU_ERROR);
+    AssertMsgFailed(("EMR3FatalError returned!\n"));
+}
+
+
+/**
+ * Aborts the VM.
+ *
+ * @param   rc      VBox error code.
+ * @param   pszTip  Hint about why/when this happend.
+ */
+static void remAbort(int rc, const char *pszTip)
+{
+    /*
+     * Bitch about it.
+     */
+    RTLogPrintf("internal REM fatal error: rc=%Vrc %s\n", rc, pszTip);
+    AssertReleaseMsgFailed(("internal REM fatal error: rc=%Vrc %s\n", rc, pszTip));
+
+    /*
+     * Jump back to where we entered the recompiler.
+     */
+    PVM pVM = cpu_single_env->pVM;
+    if (pVM->rem.s.fInREM)
+        REMR3StateBack(pVM);
+    EMR3FatalError(pVM, rc);
+    AssertMsgFailed(("EMR3FatalError returned!\n"));
+}
+
+
+/**
+ * Dumps a linux system call.
+ * @param   pVM     VM handle.
+ */
+void remR3DumpLnxSyscall(PVM pVM)
+{
+    static const char *apsz[] =
+    {
+    	"sys_restart_syscall",	/* 0 - old "setup()" system call, used for restarting */
+    	"sys_exit",
+    	"sys_fork",
+    	"sys_read",
+    	"sys_write",
+    	"sys_open",		/* 5 */
+    	"sys_close",
+    	"sys_waitpid",
+    	"sys_creat",
+    	"sys_link",
+    	"sys_unlink",	/* 10 */
+    	"sys_execve",
+    	"sys_chdir",
+    	"sys_time",
+    	"sys_mknod",
+    	"sys_chmod",		/* 15 */
+    	"sys_lchown16",
+    	"sys_ni_syscall",	/* old break syscall holder */
+    	"sys_stat",
+    	"sys_lseek",
+    	"sys_getpid",	/* 20 */
+    	"sys_mount",
+    	"sys_oldumount",
+    	"sys_setuid16",
+    	"sys_getuid16",
+    	"sys_stime",		/* 25 */
+    	"sys_ptrace",
+    	"sys_alarm",
+    	"sys_fstat",
+    	"sys_pause",
+    	"sys_utime",		/* 30 */
+    	"sys_ni_syscall",	/* old stty syscall holder */
+    	"sys_ni_syscall",	/* old gtty syscall holder */
+    	"sys_access",
+    	"sys_nice",
+    	"sys_ni_syscall",	/* 35 - old ftime syscall holder */
+    	"sys_sync",
+    	"sys_kill",
+    	"sys_rename",
+    	"sys_mkdir",
+    	"sys_rmdir",		/* 40 */
+    	"sys_dup",
+    	"sys_pipe",
+    	"sys_times",
+    	"sys_ni_syscall",	/* old prof syscall holder */
+    	"sys_brk",		/* 45 */
+    	"sys_setgid16",
+    	"sys_getgid16",
+    	"sys_signal",
+    	"sys_geteuid16",
+    	"sys_getegid16",	/* 50 */
+    	"sys_acct",
+    	"sys_umount",	/* recycled never used phys() */
+    	"sys_ni_syscall",	/* old lock syscall holder */
+    	"sys_ioctl",
+    	"sys_fcntl",		/* 55 */
+    	"sys_ni_syscall",	/* old mpx syscall holder */
+    	"sys_setpgid",
+    	"sys_ni_syscall",	/* old ulimit syscall holder */
+    	"sys_olduname",
+    	"sys_umask",		/* 60 */
+    	"sys_chroot",
+    	"sys_ustat",
+    	"sys_dup2",
+    	"sys_getppid",
+    	"sys_getpgrp",	/* 65 */
+    	"sys_setsid",
+    	"sys_sigaction",
+    	"sys_sgetmask",
+    	"sys_ssetmask",
+    	"sys_setreuid16",	/* 70 */
+    	"sys_setregid16",
+    	"sys_sigsuspend",
+    	"sys_sigpending",
+    	"sys_sethostname",
+    	"sys_setrlimit",	/* 75 */
+    	"sys_old_getrlimit",
+    	"sys_getrusage",
+    	"sys_gettimeofday",
+    	"sys_settimeofday",
+    	"sys_getgroups16",	/* 80 */
+    	"sys_setgroups16",
+    	"old_select",
+    	"sys_symlink",
+    	"sys_lstat",
+    	"sys_readlink",	/* 85 */
+    	"sys_uselib",
+    	"sys_swapon",
+    	"sys_reboot",
+    	"old_readdir",
+    	"old_mmap",		/* 90 */
+    	"sys_munmap",
+    	"sys_truncate",
+    	"sys_ftruncate",
+    	"sys_fchmod",
+    	"sys_fchown16",	/* 95 */
+    	"sys_getpriority",
+    	"sys_setpriority",
+    	"sys_ni_syscall",	/* old profil syscall holder */
+    	"sys_statfs",
+    	"sys_fstatfs",	/* 100 */
+    	"sys_ioperm",
+    	"sys_socketcall",
+    	"sys_syslog",
+    	"sys_setitimer",
+    	"sys_getitimer",	/* 105 */
+    	"sys_newstat",
+    	"sys_newlstat",
+    	"sys_newfstat",
+    	"sys_uname",
+    	"sys_iopl",		/* 110 */
+    	"sys_vhangup",
+    	"sys_ni_syscall",	/* old "idle" system call */
+    	"sys_vm86old",
+    	"sys_wait4",
+    	"sys_swapoff",	/* 115 */
+    	"sys_sysinfo",
+    	"sys_ipc",
+    	"sys_fsync",
+    	"sys_sigreturn",
+    	"sys_clone",		/* 120 */
+    	"sys_setdomainname",
+    	"sys_newuname",
+    	"sys_modify_ldt",
+    	"sys_adjtimex",
+    	"sys_mprotect",	/* 125 */
+    	"sys_sigprocmask",
+    	"sys_ni_syscall",	/* old "create_module" */
+    	"sys_init_module",
+    	"sys_delete_module",
+    	"sys_ni_syscall",	/* 130:	old "get_kernel_syms" */
+    	"sys_quotactl",
+    	"sys_getpgid",
+    	"sys_fchdir",
+    	"sys_bdflush",
+    	"sys_sysfs",		/* 135 */
+    	"sys_personality",
+    	"sys_ni_syscall",	/* reserved for afs_syscall */
+    	"sys_setfsuid16",
+    	"sys_setfsgid16",
+    	"sys_llseek",	/* 140 */
+    	"sys_getdents",
+    	"sys_select",
+    	"sys_flock",
+    	"sys_msync",
+    	"sys_readv",		/* 145 */
+    	"sys_writev",
+    	"sys_getsid",
+    	"sys_fdatasync",
+    	"sys_sysctl",
+    	"sys_mlock",		/* 150 */
+    	"sys_munlock",
+    	"sys_mlockall",
+    	"sys_munlockall",
+    	"sys_sched_setparam",
+    	"sys_sched_getparam",   /* 155 */
+    	"sys_sched_setscheduler",
+    	"sys_sched_getscheduler",
+    	"sys_sched_yield",
+    	"sys_sched_get_priority_max",
+    	"sys_sched_get_priority_min",  /* 160 */
+    	"sys_sched_rr_get_interval",
+    	"sys_nanosleep",
+    	"sys_mremap",
+    	"sys_setresuid16",
+    	"sys_getresuid16",	/* 165 */
+    	"sys_vm86",
+    	"sys_ni_syscall",	/* Old sys_query_module */
+    	"sys_poll",
+    	"sys_nfsservctl",
+    	"sys_setresgid16",	/* 170 */
+    	"sys_getresgid16",
+    	"sys_prctl",
+    	"sys_rt_sigreturn",
+    	"sys_rt_sigaction",
+    	"sys_rt_sigprocmask",	/* 175 */
+    	"sys_rt_sigpending",
+    	"sys_rt_sigtimedwait",
+    	"sys_rt_sigqueueinfo",
+    	"sys_rt_sigsuspend",
+    	"sys_pread64",	/* 180 */
+    	"sys_pwrite64",
+    	"sys_chown16",
+    	"sys_getcwd",
+    	"sys_capget",
+    	"sys_capset",	/* 185 */
+    	"sys_sigaltstack",
+    	"sys_sendfile",
+    	"sys_ni_syscall",	/* reserved for streams1 */
+    	"sys_ni_syscall",	/* reserved for streams2 */
+    	"sys_vfork",		/* 190 */
+    	"sys_getrlimit",
+    	"sys_mmap2",
+    	"sys_truncate64",
+    	"sys_ftruncate64",
+    	"sys_stat64",	/* 195 */
+    	"sys_lstat64",
+    	"sys_fstat64",
+    	"sys_lchown",
+    	"sys_getuid",
+    	"sys_getgid",	/* 200 */
+    	"sys_geteuid",
+    	"sys_getegid",
+    	"sys_setreuid",
+    	"sys_setregid",
+    	"sys_getgroups",	/* 205 */
+    	"sys_setgroups",
+    	"sys_fchown",
+    	"sys_setresuid",
+    	"sys_getresuid",
+    	"sys_setresgid",	/* 210 */
+    	"sys_getresgid",
+    	"sys_chown",
+    	"sys_setuid",
+    	"sys_setgid",
+    	"sys_setfsuid",	/* 215 */
+    	"sys_setfsgid",
+    	"sys_pivot_root",
+    	"sys_mincore",
+    	"sys_madvise",
+    	"sys_getdents64",	/* 220 */
+    	"sys_fcntl64",
+    	"sys_ni_syscall",	/* reserved for TUX */
+    	"sys_ni_syscall",
+    	"sys_gettid",
+    	"sys_readahead",	/* 225 */
+    	"sys_setxattr",
+    	"sys_lsetxattr",
+    	"sys_fsetxattr",
+    	"sys_getxattr",
+    	"sys_lgetxattr",	/* 230 */
+    	"sys_fgetxattr",
+    	"sys_listxattr",
+    	"sys_llistxattr",
+    	"sys_flistxattr",
+    	"sys_removexattr",	/* 235 */
+    	"sys_lremovexattr",
+    	"sys_fremovexattr",
+    	"sys_tkill",
+    	"sys_sendfile64",
+    	"sys_futex",		/* 240 */
+    	"sys_sched_setaffinity",
+    	"sys_sched_getaffinity",
+    	"sys_set_thread_area",
+    	"sys_get_thread_area",
+    	"sys_io_setup",	/* 245 */
+    	"sys_io_destroy",
+    	"sys_io_getevents",
+    	"sys_io_submit",
+    	"sys_io_cancel",
+    	"sys_fadvise64",	/* 250 */
+    	"sys_ni_syscall",
+    	"sys_exit_group",
+    	"sys_lookup_dcookie",
+    	"sys_epoll_create",
+    	"sys_epoll_ctl",	/* 255 */
+    	"sys_epoll_wait",
+     	"sys_remap_file_pages",
+     	"sys_set_tid_address",
+     	"sys_timer_create",
+     	"sys_timer_settime",		/* 260 */
+     	"sys_timer_gettime",
+     	"sys_timer_getoverrun",
+     	"sys_timer_delete",
+     	"sys_clock_settime",
+     	"sys_clock_gettime",		/* 265 */
+     	"sys_clock_getres",
+     	"sys_clock_nanosleep",
+    	"sys_statfs64",
+    	"sys_fstatfs64",
+    	"sys_tgkill",	/* 270 */
+    	"sys_utimes",
+     	"sys_fadvise64_64",
+    	"sys_ni_syscall"	/* sys_vserver */
+    };
+
+    uint32_t    uEAX = CPUMGetGuestEAX(pVM);
+    switch (uEAX)
+    {
+        default:
+            if (uEAX < ELEMENTS(apsz))
+                Log(("REM: linux syscall %3d: %s (eip=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x ebp=%08x)\n",
+                     uEAX, apsz[uEAX], CPUMGetGuestEIP(pVM), CPUMGetGuestEBX(pVM), CPUMGetGuestECX(pVM),
+                     CPUMGetGuestEDX(pVM), CPUMGetGuestESI(pVM), CPUMGetGuestEDI(pVM), CPUMGetGuestEBP(pVM)));
+            else
+                Log(("eip=%08x: linux syscall %d (#%x) unknown\n", CPUMGetGuestEIP(pVM), uEAX, uEAX));
+            break;
+
+    }
+}
+
+
+/**
+ * Dumps an OpenBSD system call.
+ * @param   pVM     VM handle.
+ */
+void remR3DumpOBsdSyscall(PVM pVM)
+{
+    static const char *apsz[] =
+    {
+        "SYS_syscall",		//0
+        "SYS_exit",		//1
+        "SYS_fork",		//2
+        "SYS_read",		//3
+        "SYS_write",		//4
+        "SYS_open",		//5
+        "SYS_close",		//6
+        "SYS_wait4",		//7
+        "SYS_8",
+        "SYS_link",		//9
+        "SYS_unlink",		//10
+        "SYS_11",
+        "SYS_chdir",		//12
+        "SYS_fchdir",		//13
+        "SYS_mknod",		//14
+        "SYS_chmod",		//15
+        "SYS_chown",		//16
+        "SYS_break",		//17
+        "SYS_18",
+        "SYS_19",
+        "SYS_getpid",		//20
+        "SYS_mount",		//21
+        "SYS_unmount",		//22
+        "SYS_setuid",		//23
+        "SYS_getuid",		//24
+        "SYS_geteuid",		//25
+        "SYS_ptrace",		//26
+        "SYS_recvmsg",		//27
+        "SYS_sendmsg",		//28
+        "SYS_recvfrom",		//29
+        "SYS_accept",		//30
+        "SYS_getpeername",	//31
+        "SYS_getsockname",	//32
+        "SYS_access",		//33
+        "SYS_chflags",		//34
+        "SYS_fchflags",		//35
+        "SYS_sync",		//36
+        "SYS_kill",		//37
+        "SYS_38",
+        "SYS_getppid",		//39
+        "SYS_40",
+        "SYS_dup",		//41
+        "SYS_opipe",		//42
+        "SYS_getegid",		//43
+        "SYS_profil",		//44
+        "SYS_ktrace",		//45
+        "SYS_sigaction",	//46
+        "SYS_getgid",		//47
+        "SYS_sigprocmask",	//48
+        "SYS_getlogin",		//49
+        "SYS_setlogin",		//50
+        "SYS_acct",		//51
+        "SYS_sigpending",	//52
+        "SYS_osigaltstack",	//53
+        "SYS_ioctl",		//54
+        "SYS_reboot",		//55
+        "SYS_revoke",		//56
+        "SYS_symlink",		//57
+        "SYS_readlink",		//58
+        "SYS_execve",		//59
+        "SYS_umask",		//60
+        "SYS_chroot",		//61
+        "SYS_62",
+        "SYS_63",
+        "SYS_64",
+        "SYS_65",
+        "SYS_vfork",		//66
+        "SYS_67",
+        "SYS_68",
+        "SYS_sbrk",		//69
+        "SYS_sstk",		//70
+        "SYS_61",
+        "SYS_vadvise",		//72
+        "SYS_munmap",		//73
+        "SYS_mprotect",		//74
+        "SYS_madvise",		//75
+        "SYS_76",
+        "SYS_77",
+        "SYS_mincore",		//78
+        "SYS_getgroups",	//79
+        "SYS_setgroups",	//80
+        "SYS_getpgrp",		//81
+        "SYS_setpgid",		//82
+        "SYS_setitimer",	//83
+        "SYS_84",
+        "SYS_85",
+        "SYS_getitimer",	//86
+        "SYS_87",
+        "SYS_88",
+        "SYS_89",
+        "SYS_dup2",		//90
+        "SYS_91",
+        "SYS_fcntl",		//92
+        "SYS_select",		//93
+        "SYS_94",
+        "SYS_fsync",		//95
+        "SYS_setpriority",	//96
+        "SYS_socket",		//97
+        "SYS_connect",		//98
+        "SYS_99",
+        "SYS_getpriority",	//100
+        "SYS_101",
+        "SYS_102",
+        "SYS_sigreturn",	//103
+        "SYS_bind",		//104
+        "SYS_setsockopt",	//105
+        "SYS_listen",		//106
+        "SYS_107",
+        "SYS_108",
+        "SYS_109",
+        "SYS_110",
+        "SYS_sigsuspend",	//111
+        "SYS_112",
+        "SYS_113",
+        "SYS_114",
+        "SYS_115",
+        "SYS_gettimeofday",	//116
+        "SYS_getrusage",	//117
+        "SYS_getsockopt",	//118
+        "SYS_119",
+        "SYS_readv",		//120
+        "SYS_writev",		//121
+        "SYS_settimeofday",	//122
+        "SYS_fchown",		//123
+        "SYS_fchmod",		//124
+        "SYS_125",
+        "SYS_setreuid",		//126
+        "SYS_setregid",		//127
+        "SYS_rename",		//128
+        "SYS_129",
+        "SYS_130",
+        "SYS_flock",		//131
+        "SYS_mkfifo",		//132
+        "SYS_sendto",		//133
+        "SYS_shutdown",		//134
+        "SYS_socketpair",	//135
+        "SYS_mkdir",		//136
+        "SYS_rmdir",		//137
+        "SYS_utimes",		//138
+        "SYS_139",
+        "SYS_adjtime",		//140
+        "SYS_141",
+        "SYS_142",
+        "SYS_143",
+        "SYS_144",
+        "SYS_145",
+        "SYS_146",
+        "SYS_setsid",		//147
+        "SYS_quotactl",		//148
+        "SYS_149",
+        "SYS_150",
+        "SYS_151",
+        "SYS_152",
+        "SYS_153",
+        "SYS_154",
+        "SYS_nfssvc",		//155
+        "SYS_156",
+        "SYS_157",
+        "SYS_158",
+        "SYS_159",
+        "SYS_160",
+        "SYS_getfh",		//161
+        "SYS_162",
+        "SYS_163",
+        "SYS_164",
+        "SYS_sysarch",		//165
+        "SYS_166",
+        "SYS_167",
+        "SYS_168",
+        "SYS_169",
+        "SYS_170",
+        "SYS_171",
+        "SYS_172",
+        "SYS_pread",		//173
+        "SYS_pwrite",		//174
+        "SYS_175",
+        "SYS_176",
+        "SYS_177",
+        "SYS_178",
+        "SYS_179",
+        "SYS_180",
+        "SYS_setgid",		//181
+        "SYS_setegid",		//182
+        "SYS_seteuid",		//183
+        "SYS_lfs_bmapv",	//184
+        "SYS_lfs_markv",	//185
+        "SYS_lfs_segclean",	//186
+        "SYS_lfs_segwait",	//187
+        "SYS_188",
+        "SYS_189",
+        "SYS_190",
+        "SYS_pathconf",		//191
+        "SYS_fpathconf",	//192
+        "SYS_swapctl",		//193
+        "SYS_getrlimit",	//194
+        "SYS_setrlimit",	//195
+        "SYS_getdirentries",	//196
+        "SYS_mmap",		//197
+        "SYS___syscall",        //198
+        "SYS_lseek",		//199
+        "SYS_truncate",		//200
+        "SYS_ftruncate",	//201
+        "SYS___sysctl",         //202
+        "SYS_mlock",		//203
+        "SYS_munlock",		//204
+        "SYS_205",
+        "SYS_futimes",		//206
+        "SYS_getpgid",		//207
+        "SYS_xfspioctl",	//208
+        "SYS_209",
+        "SYS_210",
+        "SYS_211",
+        "SYS_212",
+        "SYS_213",
+        "SYS_214",
+        "SYS_215",
+        "SYS_216",
+        "SYS_217",
+        "SYS_218",
+        "SYS_219",
+        "SYS_220",
+        "SYS_semget",		//221
+        "SYS_222",
+        "SYS_223",
+        "SYS_224",
+        "SYS_msgget",		//225
+        "SYS_msgsnd",		//226
+        "SYS_msgrcv",		//227
+        "SYS_shmat",		//228
+        "SYS_229",
+        "SYS_shmdt",		//230
+        "SYS_231",
+        "SYS_clock_gettime",	//232
+        "SYS_clock_settime",	//233
+        "SYS_clock_getres",	//234
+        "SYS_235",
+        "SYS_236",
+        "SYS_237",
+        "SYS_238",
+        "SYS_239",
+        "SYS_nanosleep",	//240
+        "SYS_241",
+        "SYS_242",
+        "SYS_243",
+        "SYS_244",
+        "SYS_245",
+        "SYS_246",
+        "SYS_247",
+        "SYS_248",
+        "SYS_249",
+        "SYS_minherit",		//250
+        "SYS_rfork",		//251
+        "SYS_poll",		//252
+        "SYS_issetugid",	//253
+        "SYS_lchown",		//254
+        "SYS_getsid",		//255
+        "SYS_msync",		//256
+        "SYS_257",
+        "SYS_258",
+        "SYS_259",
+        "SYS_getfsstat",	//260
+        "SYS_statfs",		//261
+        "SYS_fstatfs",		//262
+        "SYS_pipe",		//263
+        "SYS_fhopen",		//264
+        "SYS_265",
+        "SYS_fhstatfs",		//266
+        "SYS_preadv",		//267
+        "SYS_pwritev",		//268
+        "SYS_kqueue",		//269
+        "SYS_kevent",		//270
+        "SYS_mlockall",		//271
+        "SYS_munlockall",	//272
+        "SYS_getpeereid",	//273
+        "SYS_274",
+        "SYS_275",
+        "SYS_276",
+        "SYS_277",
+        "SYS_278",
+        "SYS_279",
+        "SYS_280",
+        "SYS_getresuid",	//281
+        "SYS_setresuid",	//282
+        "SYS_getresgid",	//283
+        "SYS_setresgid",	//284
+        "SYS_285",
+        "SYS_mquery",		//286
+        "SYS_closefrom",	//287
+        "SYS_sigaltstack",	//288
+        "SYS_shmget",		//289
+        "SYS_semop",		//290
+        "SYS_stat",		//291
+        "SYS_fstat",		//292
+        "SYS_lstat",		//293
+        "SYS_fhstat",		//294
+        "SYS___semctl",         //295
+        "SYS_shmctl",		//296
+        "SYS_msgctl",		//297
+        "SYS_MAXSYSCALL",	//298
+        //299
+        //300
+    };
+    uint32_t    uEAX;
+    if (!LogIsEnabled())
+        return;
+    uEAX = CPUMGetGuestEAX(pVM);
+    switch (uEAX)
+    {
+        default:
+            if (uEAX < ELEMENTS(apsz))
+            {
+                uint32_t au32Args[8] = {0};
+                PGMPhysSimpleReadGCPtr(pVM, au32Args, CPUMGetGuestESP(pVM), sizeof(au32Args));
+                RTLogPrintf("REM: OpenBSD syscall %3d: %s (eip=%08x %08x %08x %08x %08x %08x %08x %08x %08x)\n",
+                            uEAX, apsz[uEAX], CPUMGetGuestEIP(pVM), au32Args[0], au32Args[1], au32Args[2], au32Args[3],
+                            au32Args[4], au32Args[5], au32Args[6], au32Args[7]);
+            }
+            else
+                RTLogPrintf("eip=%08x: OpenBSD syscall %d (#%x) unknown!!\n", CPUMGetGuestEIP(pVM), uEAX, uEAX);
+            break;
+    }
+}
+
+
+#if defined(IPRT_NO_CRT) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_X86)
+/**
+ * The Dll main entry point (stub).
+ */
+bool __stdcall _DllMainCRTStartup(void *hModule, uint32_t dwReason, void *pvReserved)
+{
+    return true;
+}
+
+void *memcpy(void *dst, const void *src, size_t size)
+{
+    uint8_t*pbDst = dst, *pbSrc = src;
+    while (size-- > 0)
+        *pbDst++ = *pbSrc++;
+    return dst;
+}
+
+#endif
+
Index: /trunk/src/recompiler_new/a.out.h
===================================================================
--- /trunk/src/recompiler_new/a.out.h	(revision 13168)
+++ /trunk/src/recompiler_new/a.out.h	(revision 13168)
@@ -0,0 +1,431 @@
+/* a.out.h
+
+   Copyright 1997, 1998, 1999, 2001 Red Hat, Inc.
+
+This file is part of Cygwin.
+
+This software is a copyrighted work licensed under the terms of the
+Cygwin license.  Please consult the file "CYGWIN_LICENSE" for
+details. */
+
+#ifndef _A_OUT_H_
+#define _A_OUT_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#define COFF_IMAGE_WITH_PE
+#define COFF_LONG_SECTION_NAMES
+
+/*** coff information for Intel 386/486.  */
+
+
+/********************** FILE HEADER **********************/
+
+struct external_filehdr {
+  short f_magic;	/* magic number			*/
+  short f_nscns;	/* number of sections		*/
+  unsigned long f_timdat;	/* time & date stamp		*/
+  unsigned long f_symptr;	/* file pointer to symtab	*/
+  unsigned long f_nsyms;	/* number of symtab entries	*/
+  short f_opthdr;	/* sizeof(optional hdr)		*/
+  short f_flags;	/* flags			*/
+};
+
+/* Bits for f_flags:
+ *	F_RELFLG	relocation info stripped from file
+ *	F_EXEC		file is executable (no unresolved external references)
+ *	F_LNNO		line numbers stripped from file
+ *	F_LSYMS		local symbols stripped from file
+ *	F_AR32WR	file has byte ordering of an AR32WR machine (e.g. vax)
+ */
+
+#define F_RELFLG	(0x0001)
+#define F_EXEC		(0x0002)
+#define F_LNNO		(0x0004)
+#define F_LSYMS		(0x0008)
+
+
+
+#define	I386MAGIC	0x14c
+#define I386PTXMAGIC	0x154
+#define I386AIXMAGIC	0x175
+
+/* This is Lynx's all-platform magic number for executables. */
+
+#define LYNXCOFFMAGIC	0415
+
+#define I386BADMAG(x) (((x).f_magic != I386MAGIC) \
+		       && (x).f_magic != I386AIXMAGIC \
+		       && (x).f_magic != I386PTXMAGIC \
+		       && (x).f_magic != LYNXCOFFMAGIC)
+
+#define	FILHDR	struct external_filehdr
+#define	FILHSZ	20
+
+
+/********************** AOUT "OPTIONAL HEADER"=
+ **********************/
+
+
+typedef struct
+{
+  unsigned short magic;		/* type of file				*/
+  unsigned short vstamp;	/* version stamp			*/
+  unsigned long	tsize;		/* text size in bytes, padded to FW bdry*/
+  unsigned long	dsize;		/* initialized data "  "		*/
+  unsigned long	bsize;		/* uninitialized data "   "		*/
+  unsigned long	entry;		/* entry pt.				*/
+  unsigned long text_start;	/* base of text used for this file */
+  unsigned long data_start;	/* base of data used for this file=
+ */
+}
+AOUTHDR;
+
+#define AOUTSZ 28
+#define AOUTHDRSZ 28
+
+#define OMAGIC          0404    /* object files, eg as output */
+#define ZMAGIC          0413    /* demand load format, eg normal ld output */
+#define STMAGIC		0401	/* target shlib */
+#define SHMAGIC		0443	/* host   shlib */
+
+
+/* define some NT default values */
+/*  #define NT_IMAGE_BASE        0x400000 moved to internal.h */
+#define NT_SECTION_ALIGNMENT 0x1000
+#define NT_FILE_ALIGNMENT    0x200
+#define NT_DEF_RESERVE       0x100000
+#define NT_DEF_COMMIT        0x1000
+
+/********************** SECTION HEADER **********************/
+
+
+struct external_scnhdr {
+  char		s_name[8];	/* section name			*/
+  unsigned long	s_paddr;	/* physical address, offset
+				   of last addr in scn */
+  unsigned long	s_vaddr;	/* virtual address		*/
+  unsigned long	s_size;		/* section size			*/
+  unsigned long	s_scnptr;	/* file ptr to raw data for section */
+  unsigned long	s_relptr;	/* file ptr to relocation	*/
+  unsigned long	s_lnnoptr;	/* file ptr to line numbers	*/
+  unsigned short s_nreloc;	/* number of relocation entries	*/
+  unsigned short s_nlnno;	/* number of line number entries*/
+  unsigned long	s_flags;	/* flags			*/
+};
+
+#define	SCNHDR	struct external_scnhdr
+#define	SCNHSZ	40
+
+/*
+ * names of "special" sections
+ */
+#define _TEXT	".text"
+#define _DATA	".data"
+#define _BSS	".bss"
+#define _COMMENT ".comment"
+#define _LIB ".lib"
+
+/********************** LINE NUMBERS **********************/
+
+/* 1 line number entry for every "breakpointable" source line in a section.
+ * Line numbers are grouped on a per function basis; first entry in a function
+ * grouping will have l_lnno = 0 and in place of physical address will be the
+ * symbol table index of the function name.
+ */
+struct external_lineno {
+  union {
+    unsigned long l_symndx; /* function name symbol index, iff l_lnno 0 */
+    unsigned long l_paddr;	/* (physical) address of line number	*/
+  } l_addr;
+  unsigned short l_lnno;	/* line number		*/
+};
+
+#define	LINENO	struct external_lineno
+#define	LINESZ	6
+
+/********************** SYMBOLS **********************/
+
+#define E_SYMNMLEN	8	/* # characters in a symbol name	*/
+#define E_FILNMLEN	14	/* # characters in a file name		*/
+#define E_DIMNUM	4	/* # array dimensions in auxiliary entry */
+
+struct __attribute__((packed)) external_syment
+{
+  union {
+    char e_name[E_SYMNMLEN];
+    struct {
+      unsigned long e_zeroes;
+      unsigned long e_offset;
+    } e;
+  } e;
+  unsigned long e_value;
+  unsigned short e_scnum;
+  unsigned short e_type;
+  char e_sclass[1];
+  char e_numaux[1];
+};
+
+#define N_BTMASK	(0xf)
+#define N_TMASK		(0x30)
+#define N_BTSHFT	(4)
+#define N_TSHIFT	(2)
+
+union external_auxent {
+  struct {
+    unsigned long x_tagndx;	/* str, un, or enum tag indx */
+    union {
+      struct {
+	unsigned short  x_lnno; /* declaration line number */
+	unsigned short  x_size; /* str/union/array size */
+      } x_lnsz;
+      unsigned long x_fsize;	/* size of function */
+    } x_misc;
+    union {
+      struct {			/* if ISFCN, tag, or .bb */
+	unsigned long x_lnnoptr;/* ptr to fcn line # */
+	unsigned long x_endndx;	/* entry ndx past block end */
+      } x_fcn;
+      struct {			/* if ISARY, up to 4 dimen. */
+	char x_dimen[E_DIMNUM][2];
+      } x_ary;
+    } x_fcnary;
+    unsigned short x_tvndx;	/* tv index */
+  } x_sym;
+
+  union {
+    char x_fname[E_FILNMLEN];
+    struct {
+      unsigned long x_zeroes;
+      unsigned long x_offset;
+    } x_n;
+  } x_file;
+
+  struct {
+    unsigned long x_scnlen;	/* section length */
+    unsigned short x_nreloc;	/* # relocation entries */
+    unsigned short x_nlinno;	/* # line numbers */
+    unsigned long x_checksum;	/* section COMDAT checksum */
+    unsigned short x_associated;/* COMDAT associated section index */
+    char x_comdat[1];		/* COMDAT selection number */
+  } x_scn;
+
+  struct {
+    unsigned long x_tvfill;	/* tv fill value */
+    unsigned short x_tvlen;	/* length of .tv */
+    char x_tvran[2][2];		/* tv range */
+  } x_tv;	/* info about .tv section (in auxent of symbol .tv)) */
+
+};
+
+#define	SYMENT	struct external_syment
+#define	SYMESZ	18
+#define	AUXENT	union external_auxent
+#define	AUXESZ	18
+
+#define _ETEXT	"etext"
+
+/********************** RELOCATION DIRECTIVES **********************/
+
+struct external_reloc {
+  char r_vaddr[4];
+  char r_symndx[4];
+  char r_type[2];
+};
+
+#define RELOC struct external_reloc
+#define RELSZ 10
+
+/* end of coff/i386.h */
+
+/* PE COFF header information */
+
+#ifndef _PE_H
+#define _PE_H
+
+/* NT specific file attributes */
+#define IMAGE_FILE_RELOCS_STRIPPED           0x0001
+#define IMAGE_FILE_EXECUTABLE_IMAGE          0x0002
+#define IMAGE_FILE_LINE_NUMS_STRIPPED        0x0004
+#define IMAGE_FILE_LOCAL_SYMS_STRIPPED       0x0008
+#define IMAGE_FILE_BYTES_REVERSED_LO         0x0080
+#define IMAGE_FILE_32BIT_MACHINE             0x0100
+#define IMAGE_FILE_DEBUG_STRIPPED            0x0200
+#define IMAGE_FILE_SYSTEM                    0x1000
+#define IMAGE_FILE_DLL                       0x2000
+#define IMAGE_FILE_BYTES_REVERSED_HI         0x8000
+
+/* additional flags to be set for section headers to allow the NT loader to
+   read and write to the section data (to replace the addresses of data in
+   dlls for one thing); also to execute the section in .text's case=
+ */
+#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000
+#define IMAGE_SCN_MEM_EXECUTE     0x20000000
+#define IMAGE_SCN_MEM_READ        0x40000000
+#define IMAGE_SCN_MEM_WRITE       0x80000000
+
+/*
+ * Section characteristics added for ppc-nt
+ */
+
+#define IMAGE_SCN_TYPE_NO_PAD                0x00000008  /* Reserved.  */
+
+#define IMAGE_SCN_CNT_CODE                   0x00000020  /* Section contains code. */
+#define IMAGE_SCN_CNT_INITIALIZED_DATA       0x00000040  /* Section contains initialized data. */
+#define IMAGE_SCN_CNT_UNINITIALIZED_DATA     0x00000080  /* Section contains uninitialized data. */
+
+#define IMAGE_SCN_LNK_OTHER                  0x00000100  /* Reserved.  */
+#define IMAGE_SCN_LNK_INFO                   0x00000200  /* Section contains comments or some other type of information. */
+#define IMAGE_SCN_LNK_REMOVE                 0x00000800  /* Section contents will not become part of image. */
+#define IMAGE_SCN_LNK_COMDAT                 0x00001000  /* Section contents comdat. */
+
+#define IMAGE_SCN_MEM_FARDATA                0x00008000
+
+#define IMAGE_SCN_MEM_PURGEABLE              0x00020000
+#define IMAGE_SCN_MEM_16BIT                  0x00020000
+#define IMAGE_SCN_MEM_LOCKED                 0x00040000
+#define IMAGE_SCN_MEM_PRELOAD                0x00080000
+
+#define IMAGE_SCN_ALIGN_1BYTES               0x00100000
+#define IMAGE_SCN_ALIGN_2BYTES               0x00200000
+#define IMAGE_SCN_ALIGN_4BYTES               0x00300000
+#define IMAGE_SCN_ALIGN_8BYTES               0x00400000
+#define IMAGE_SCN_ALIGN_16BYTES              0x00500000  /* Default alignment if no others are specified. */
+#define IMAGE_SCN_ALIGN_32BYTES              0x00600000
+#define IMAGE_SCN_ALIGN_64BYTES              0x00700000
+
+
+#define IMAGE_SCN_LNK_NRELOC_OVFL            0x01000000  /* Section contains extended relocations. */
+#define IMAGE_SCN_MEM_NOT_CACHED             0x04000000  /* Section is not cachable.               */
+#define IMAGE_SCN_MEM_NOT_PAGED              0x08000000  /* Section is not pageable.               */
+#define IMAGE_SCN_MEM_SHARED                 0x10000000  /* Section is shareable.                  */
+
+/* COMDAT selection codes.  */
+
+#define IMAGE_COMDAT_SELECT_NODUPLICATES     (1) /* Warn if duplicates.  */
+#define IMAGE_COMDAT_SELECT_ANY		     (2) /* No warning.  */
+#define IMAGE_COMDAT_SELECT_SAME_SIZE	     (3) /* Warn if different size.  */
+#define IMAGE_COMDAT_SELECT_EXACT_MATCH	     (4) /* Warn if different.  */
+#define IMAGE_COMDAT_SELECT_ASSOCIATIVE	     (5) /* Base on other section.  */
+
+/* Magic values that are true for all dos/nt implementations */
+#define DOSMAGIC       0x5a4d
+#define NT_SIGNATURE   0x00004550
+
+/* NT allows long filenames, we want to accommodate this.  This may break
+     some of the bfd functions */
+#undef  FILNMLEN
+#define FILNMLEN	18	/* # characters in a file name		*/
+
+
+#ifdef COFF_IMAGE_WITH_PE
+/* The filehdr is only weired in images */
+
+#undef FILHDR
+struct external_PE_filehdr
+{
+  /* DOS header fields */
+  unsigned short e_magic;	/* Magic number, 0x5a4d */
+  unsigned short e_cblp;	/* Bytes on last page of file, 0x90 */
+  unsigned short e_cp;		/* Pages in file, 0x3 */
+  unsigned short e_crlc;	/* Relocations, 0x0 */
+  unsigned short e_cparhdr;	/* Size of header in paragraphs, 0x4 */
+  unsigned short e_minalloc;	/* Minimum extra paragraphs needed, 0x0 */
+  unsigned short e_maxalloc;	/* Maximum extra paragraphs needed, 0xFFFF */
+  unsigned short e_ss;		/* Initial (relative) SS value, 0x0 */
+  unsigned short e_sp;		/* Initial SP value, 0xb8 */
+  unsigned short e_csum;	/* Checksum, 0x0 */
+  unsigned short e_ip;		/* Initial IP value, 0x0 */
+  unsigned short e_cs;		/* Initial (relative) CS value, 0x0 */
+  unsigned short e_lfarlc;	/* File address of relocation table, 0x40 */
+  unsigned short e_ovno;	/* Overlay number, 0x0 */
+  char e_res[4][2];		/* Reserved words, all 0x0 */
+  unsigned short e_oemid;	/* OEM identifier (for e_oeminfo), 0x0 */
+  unsigned short e_oeminfo;	/* OEM information; e_oemid specific, 0x0 */
+  char e_res2[10][2];		/* Reserved words, all 0x0 */
+  unsigned long e_lfanew;	/* File address of new exe header, 0x80 */
+  char dos_message[16][4];	/* other stuff, always follow DOS header */
+  unsigned int nt_signature;	/* required NT signature, 0x4550 */
+
+  /* From standard header */
+
+  unsigned short f_magic;	/* magic number			*/
+  unsigned short f_nscns;	/* number of sections		*/
+  unsigned long f_timdat;	/* time & date stamp		*/
+  unsigned long f_symptr;	/* file pointer to symtab	*/
+  unsigned long f_nsyms;	/* number of symtab entries	*/
+  unsigned short f_opthdr;	/* sizeof(optional hdr)		*/
+  unsigned short f_flags;	/* flags			*/
+};
+
+
+#define FILHDR struct external_PE_filehdr
+#undef FILHSZ
+#define FILHSZ 152
+
+#endif
+
+typedef struct
+{
+  unsigned short magic;		/* type of file				*/
+  unsigned short vstamp;	/* version stamp			*/
+  unsigned long	tsize;		/* text size in bytes, padded to FW bdry*/
+  unsigned long	dsize;		/* initialized data "  "		*/
+  unsigned long	bsize;		/* uninitialized data "   "		*/
+  unsigned long	entry;		/* entry pt.				*/
+  unsigned long text_start;	/* base of text used for this file */
+  unsigned long data_start;	/* base of all data used for this file */
+
+  /* NT extra fields; see internal.h for descriptions */
+  unsigned long  ImageBase;
+  unsigned long  SectionAlignment;
+  unsigned long  FileAlignment;
+  unsigned short  MajorOperatingSystemVersion;
+  unsigned short  MinorOperatingSystemVersion;
+  unsigned short  MajorImageVersion;
+  unsigned short  MinorImageVersion;
+  unsigned short  MajorSubsystemVersion;
+  unsigned short  MinorSubsystemVersion;
+  char  Reserved1[4];
+  unsigned long  SizeOfImage;
+  unsigned long  SizeOfHeaders;
+  unsigned long  CheckSum;
+  unsigned short Subsystem;
+  unsigned short DllCharacteristics;
+  unsigned long  SizeOfStackReserve;
+  unsigned long  SizeOfStackCommit;
+  unsigned long  SizeOfHeapReserve;
+  unsigned long  SizeOfHeapCommit;
+  unsigned long  LoaderFlags;
+  unsigned long  NumberOfRvaAndSizes;
+  /* IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; */
+  char  DataDirectory[16][2][4]; /* 16 entries, 2 elements/entry, 4 chars */
+
+} PEAOUTHDR;
+
+
+#undef AOUTSZ
+#define AOUTSZ (AOUTHDRSZ + 196)
+
+#undef  E_FILNMLEN
+#define E_FILNMLEN	18	/* # characters in a file name		*/
+#endif
+
+/* end of coff/pe.h */
+
+#define DT_NON		(0)	/* no derived type */
+#define DT_PTR		(1)	/* pointer */
+#define DT_FCN		(2)	/* function */
+#define DT_ARY		(3)	/* array */
+
+#define ISPTR(x)	(((x) & N_TMASK) == (DT_PTR << N_BTSHFT))
+#define ISFCN(x)	(((x) & N_TMASK) == (DT_FCN << N_BTSHFT))
+#define ISARY(x)	(((x) & N_TMASK) == (DT_ARY << N_BTSHFT))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _A_OUT_H_ */
+
Index: /trunk/src/recompiler_new/bswap.h
===================================================================
--- /trunk/src/recompiler_new/bswap.h	(revision 13168)
+++ /trunk/src/recompiler_new/bswap.h	(revision 13168)
@@ -0,0 +1,242 @@
+#ifndef BSWAP_H
+#define BSWAP_H
+
+#include "config-host.h"
+
+#ifndef _MSC_VER
+#include <inttypes.h>
+#endif
+
+#ifdef HAVE_BYTESWAP_H
+#include <byteswap.h>
+#else
+#ifdef _MSC_VER
+static _inline uint16_t bswap_16(register uint16_t x)
+{
+    return ((uint16_t)( \
+		(((uint16_t)(x) & (uint16_t)0x00ffU) << 8) | \
+		(((uint16_t)(x) & (uint16_t)0xff00U) >> 8) )); \
+}
+
+static _inline uint32_t bswap_32(register uint32_t x) \
+{ \
+    return ((uint32_t)( \
+		(((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) | \
+		(((uint32_t)(x) & (uint32_t)0x0000ff00UL) <<  8) | \
+		(((uint32_t)(x) & (uint32_t)0x00ff0000UL) >>  8) | \
+		(((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24) )); \
+}
+
+static _inline uint64_t bswap_64(register uint64_t x) \
+{ \
+    return ((uint64_t)( \
+		(uint64_t)(((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) | \
+		(uint64_t)(((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) | \
+		(uint64_t)(((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \
+		(uint64_t)(((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) <<  8) | \
+	        (uint64_t)(((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >>  8) | \
+		(uint64_t)(((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \
+		(uint64_t)(((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \
+		(uint64_t)(((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \
+}
+
+#else
+
+#define bswap_16(x) __extension__ /* <- VBOX */ \
+({ \
+	uint16_t __x = (x); \
+	((uint16_t)( \
+		(((uint16_t)(__x) & (uint16_t)0x00ffU) << 8) | \
+		(((uint16_t)(__x) & (uint16_t)0xff00U) >> 8) )); \
+})
+
+#define bswap_32(x) __extension__ /* <- VBOX */ \
+({ \
+	uint32_t __x = (x); \
+	((uint32_t)( \
+		(((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \
+		(((uint32_t)(__x) & (uint32_t)0x0000ff00UL) <<  8) | \
+		(((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >>  8) | \
+		(((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )); \
+})
+
+#define bswap_64(x) __extension__ /* <- VBOX */ \
+({ \
+	uint64_t __x = (x); \
+	((uint64_t)( \
+		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \
+		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \
+		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \
+		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) <<  8) | \
+	        (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >>  8) | \
+		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \
+		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \
+		(uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \
+})
+#endif
+
+#endif /* !HAVE_BYTESWAP_H */
+
+#ifndef bswap16 /* BSD endian.h clash */
+static inline uint16_t bswap16(uint16_t x)
+{
+    return bswap_16(x);
+}
+#endif
+
+#ifndef bswap32 /* BSD endian.h clash */
+static inline uint32_t bswap32(uint32_t x)
+{
+    return bswap_32(x);
+}
+#endif
+
+#ifndef bswap64 /* BSD endian.h clash. */
+static inline uint64_t bswap64(uint64_t x)
+{
+    return bswap_64(x);
+}
+#endif
+
+static inline void bswap16s(uint16_t *s)
+{
+    *s = bswap16(*s);
+}
+
+static inline void bswap32s(uint32_t *s)
+{
+    *s = bswap32(*s);
+}
+
+static inline void bswap64s(uint64_t *s)
+{
+    *s = bswap64(*s);
+}
+
+#if defined(WORDS_BIGENDIAN)
+#define be_bswap(v, size) (v)
+#define le_bswap(v, size) bswap ## size(v)
+#define be_bswaps(v, size)
+#define le_bswaps(p, size) *p = bswap ## size(*p);
+#else
+#define le_bswap(v, size) (v)
+#define be_bswap(v, size) bswap ## size(v)
+#define le_bswaps(v, size)
+#define be_bswaps(p, size) *p = bswap ## size(*p);
+#endif
+
+#define CPU_CONVERT(endian, size, type)\
+static inline type endian ## size ## _to_cpu(type v)\
+{\
+    return endian ## _bswap(v, size);\
+}\
+\
+static inline type cpu_to_ ## endian ## size(type v)\
+{\
+    return endian ## _bswap(v, size);\
+}\
+\
+static inline void endian ## size ## _to_cpus(type *p)\
+{\
+    endian ## _bswaps(p, size)\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## s(type *p)\
+{\
+    endian ## _bswaps(p, size)\
+}\
+\
+static inline type endian ## size ## _to_cpup(const type *p)\
+{\
+    return endian ## size ## _to_cpu(*p);\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\
+{\
+     *p = cpu_to_ ## endian ## size(v);\
+}
+
+CPU_CONVERT(be, 16, uint16_t)
+CPU_CONVERT(be, 32, uint32_t)
+CPU_CONVERT(be, 64, uint64_t)
+
+CPU_CONVERT(le, 16, uint16_t)
+CPU_CONVERT(le, 32, uint32_t)
+CPU_CONVERT(le, 64, uint64_t)
+
+/* unaligned versions (optimized for frequent unaligned accesses)*/
+
+#if defined(__i386__) || defined(__powerpc__)
+
+#define cpu_to_le16wu(p, v) cpu_to_le16w(p, v)
+#define cpu_to_le32wu(p, v) cpu_to_le32w(p, v)
+#define le16_to_cpupu(p) le16_to_cpup(p)
+#define le32_to_cpupu(p) le32_to_cpup(p)
+
+#define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
+#define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)
+
+#else
+
+static inline void cpu_to_le16wu(uint16_t *p, uint16_t v)
+{
+    uint8_t *p1 = (uint8_t *)p;
+
+    p1[0] = (uint8_t)v;
+    p1[1] = v >> 8;
+}
+
+static inline void cpu_to_le32wu(uint32_t *p, uint32_t v)
+{
+    uint8_t *p1 = (uint8_t *)p;
+
+    p1[0] = (uint8_t)v;
+    p1[1] = v >> 8;
+    p1[2] = v >> 16;
+    p1[3] = v >> 24;
+}
+
+static inline uint16_t le16_to_cpupu(const uint16_t *p)
+{
+    const uint8_t *p1 = (const uint8_t *)p;
+    return p1[0] | (p1[1] << 8);
+}
+
+static inline uint32_t le32_to_cpupu(const uint32_t *p)
+{
+    const uint8_t *p1 = (const uint8_t *)p;
+    return p1[0] | (p1[1] << 8) | (p1[2] << 16) | (p1[3] << 24);
+}
+
+static inline void cpu_to_be16wu(uint16_t *p, uint16_t v)
+{
+    uint8_t *p1 = (uint8_t *)p;
+
+    p1[0] = v >> 8;
+    p1[1] = (uint8_t)v;
+}
+
+static inline void cpu_to_be32wu(uint32_t *p, uint32_t v)
+{
+    uint8_t *p1 = (uint8_t *)p;
+
+    p1[0] = v >> 24;
+    p1[1] = v >> 16;
+    p1[2] = v >> 8;
+    p1[3] = (uint8_t)v;
+}
+
+#endif
+
+#ifdef WORDS_BIGENDIAN
+#define cpu_to_32wu cpu_to_be32wu
+#else
+#define cpu_to_32wu cpu_to_le32wu
+#endif
+
+#undef le_bswap
+#undef be_bswap
+#undef le_bswaps
+#undef be_bswaps
+
+#endif /* BSWAP_H */
Index: /trunk/src/recompiler_new/cpu-all.h
===================================================================
--- /trunk/src/recompiler_new/cpu-all.h	(revision 13168)
+++ /trunk/src/recompiler_new/cpu-all.h	(revision 13168)
@@ -0,0 +1,1235 @@
+/*
+ * defines common to all virtual CPUs
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#ifndef CPU_ALL_H
+#define CPU_ALL_H
+
+#ifdef VBOX
+# ifndef LOG_GROUP
+#  include <VBox/log.h>
+#  define LOG_GROUP LOG_GROUP_REM
+# endif
+# include <VBox/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */
+#endif
+
+#if defined(__arm__) || defined(__sparc__)
+#define WORDS_ALIGNED
+#endif
+
+/* some important defines:
+ *
+ * WORDS_ALIGNED : if defined, the host cpu can only make word aligned
+ * memory accesses.
+ *
+ * WORDS_BIGENDIAN : if defined, the host cpu is big endian and
+ * otherwise little endian.
+ *
+ * (TARGET_WORDS_ALIGNED : same for target cpu (not supported yet))
+ *
+ * TARGET_WORDS_BIGENDIAN : same for target cpu
+ */
+
+#include "bswap.h"
+
+#if defined(WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
+#define BSWAP_NEEDED
+#endif
+
+#ifdef BSWAP_NEEDED
+
+static inline uint16_t tswap16(uint16_t s)
+{
+    return bswap16(s);
+}
+
+static inline uint32_t tswap32(uint32_t s)
+{
+    return bswap32(s);
+}
+
+static inline uint64_t tswap64(uint64_t s)
+{
+    return bswap64(s);
+}
+
+static inline void tswap16s(uint16_t *s)
+{
+    *s = bswap16(*s);
+}
+
+static inline void tswap32s(uint32_t *s)
+{
+    *s = bswap32(*s);
+}
+
+static inline void tswap64s(uint64_t *s)
+{
+    *s = bswap64(*s);
+}
+
+#else
+
+static inline uint16_t tswap16(uint16_t s)
+{
+    return s;
+}
+
+static inline uint32_t tswap32(uint32_t s)
+{
+    return s;
+}
+
+static inline uint64_t tswap64(uint64_t s)
+{
+    return s;
+}
+
+static inline void tswap16s(uint16_t *s)
+{
+}
+
+static inline void tswap32s(uint32_t *s)
+{
+}
+
+static inline void tswap64s(uint64_t *s)
+{
+}
+
+#endif
+
+#if TARGET_LONG_SIZE == 4
+#define tswapl(s) tswap32(s)
+#define tswapls(s) tswap32s((uint32_t *)(s))
+#define bswaptls(s) bswap32s(s)
+#else
+#define tswapl(s) tswap64(s)
+#define tswapls(s) tswap64s((uint64_t *)(s))
+#define bswaptls(s) bswap64s(s)
+#endif
+
+/* NOTE: arm FPA is horrible as double 32 bit words are stored in big
+   endian ! */
+typedef union {
+    float64 d;
+#if defined(WORDS_BIGENDIAN) \
+    || (defined(__arm__) && !defined(__VFP_FP__) && !defined(CONFIG_SOFTFLOAT))
+    struct {
+        uint32_t upper;
+        uint32_t lower;
+    } l;
+#else
+    struct {
+        uint32_t lower;
+        uint32_t upper;
+    } l;
+#endif
+    uint64_t ll;
+} CPU_DoubleU;
+
+/* CPU memory access without any memory or io remapping */
+
+/*
+ * the generic syntax for the memory accesses is:
+ *
+ * load: ld{type}{sign}{size}{endian}_{access_type}(ptr)
+ *
+ * store: st{type}{size}{endian}_{access_type}(ptr, val)
+ *
+ * type is:
+ * (empty): integer access
+ *   f    : float access
+ *
+ * sign is:
+ * (empty): for floats or 32 bit size
+ *   u    : unsigned
+ *   s    : signed
+ *
+ * size is:
+ *   b: 8 bits
+ *   w: 16 bits
+ *   l: 32 bits
+ *   q: 64 bits
+ *
+ * endian is:
+ * (empty): target cpu endianness or 8 bit access
+ *   r    : reversed target cpu endianness (not implemented yet)
+ *   be   : big endian (not implemented yet)
+ *   le   : little endian (not implemented yet)
+ *
+ * access_type is:
+ *   raw    : host memory access
+ *   user   : user mode access using soft MMU
+ *   kernel : kernel mode access using soft MMU
+ */
+#ifdef VBOX
+
+void     remR3PhysRead(RTGCPHYS SrcGCPhys, void *pvDst, unsigned cb);
+uint8_t  remR3PhysReadU8(RTGCPHYS SrcGCPhys);
+int8_t   remR3PhysReadS8(RTGCPHYS SrcGCPhys);
+uint16_t remR3PhysReadU16(RTGCPHYS SrcGCPhys);
+int16_t  remR3PhysReadS16(RTGCPHYS SrcGCPhys);
+uint32_t remR3PhysReadU32(RTGCPHYS SrcGCPhys);
+int32_t  remR3PhysReadS32(RTGCPHYS SrcGCPhys);
+uint64_t remR3PhysReadU64(RTGCPHYS SrcGCPhys);
+int64_t  remR3PhysReadS64(RTGCPHYS SrcGCPhys);
+void     remR3PhysWrite(RTGCPHYS DstGCPhys, const void *pvSrc, unsigned cb);
+void     remR3PhysWriteU8(RTGCPHYS DstGCPhys, uint8_t val);
+void     remR3PhysWriteU16(RTGCPHYS DstGCPhys, uint16_t val);
+void     remR3PhysWriteU32(RTGCPHYS DstGCPhys, uint32_t val);
+void     remR3PhysWriteU64(RTGCPHYS DstGCPhys, uint64_t val);
+
+#ifndef VBOX_WITH_NEW_PHYS_CODE
+void     remR3GrowDynRange(unsigned long physaddr);
+#endif
+#if 0 /*defined(RT_ARCH_AMD64) && defined(VBOX_STRICT)*/
+# define VBOX_CHECK_ADDR(ptr) do { if ((uintptr_t)(ptr) >= _4G) __asm__("int3"); } while (0)
+#else
+# define VBOX_CHECK_ADDR(ptr) do { } while (0)
+#endif
+
+static inline int ldub_p(void *ptr)
+{
+    VBOX_CHECK_ADDR(ptr);
+    return remR3PhysReadU8((uintptr_t)ptr);
+}
+
+static inline int ldsb_p(void *ptr)
+{
+    VBOX_CHECK_ADDR(ptr);
+    return remR3PhysReadS8((uintptr_t)ptr);
+}
+
+static inline void stb_p(void *ptr, int v)
+{
+    VBOX_CHECK_ADDR(ptr);
+    remR3PhysWriteU8((uintptr_t)ptr, v);
+}
+
+static inline int lduw_le_p(void *ptr)
+{
+    VBOX_CHECK_ADDR(ptr);
+    return remR3PhysReadU16((uintptr_t)ptr);
+}
+
+static inline int ldsw_le_p(void *ptr)
+{
+    VBOX_CHECK_ADDR(ptr);
+    return remR3PhysReadS16((uintptr_t)ptr);
+}
+
+static inline void stw_le_p(void *ptr, int v)
+{
+    VBOX_CHECK_ADDR(ptr);
+    remR3PhysWriteU16((uintptr_t)ptr, v);
+}
+
+static inline int ldl_le_p(void *ptr)
+{
+    VBOX_CHECK_ADDR(ptr);
+    return remR3PhysReadU32((uintptr_t)ptr);
+}
+
+static inline void stl_le_p(void *ptr, int v)
+{
+    VBOX_CHECK_ADDR(ptr);
+    remR3PhysWriteU32((uintptr_t)ptr, v);
+}
+
+static inline void stq_le_p(void *ptr, uint64_t v)
+{
+    VBOX_CHECK_ADDR(ptr);
+    remR3PhysWriteU64((uintptr_t)ptr, v);
+}
+
+static inline uint64_t ldq_le_p(void *ptr)
+{
+    VBOX_CHECK_ADDR(ptr);
+    return remR3PhysReadU64((uintptr_t)ptr);
+}
+
+#undef VBOX_CHECK_ADDR
+
+/* float access */
+
+static inline float32 ldfl_le_p(void *ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = ldl_le_p(ptr);
+    return u.f;
+}
+
+static inline void stfl_le_p(void *ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    stl_le_p(ptr, u.i);
+}
+
+static inline float64 ldfq_le_p(void *ptr)
+{
+    CPU_DoubleU u;
+    u.l.lower = ldl_le_p(ptr);
+    u.l.upper = ldl_le_p(ptr + 4);
+    return u.d;
+}
+
+static inline void stfq_le_p(void *ptr, float64 v)
+{
+    CPU_DoubleU u;
+    u.d = v;
+    stl_le_p(ptr, u.l.lower);
+    stl_le_p(ptr + 4, u.l.upper);
+}
+
+#else  /* !VBOX */
+
+static inline int ldub_p(void *ptr)
+{
+    return *(uint8_t *)ptr;
+}
+
+static inline int ldsb_p(void *ptr)
+{
+    return *(int8_t *)ptr;
+}
+
+static inline void stb_p(void *ptr, int v)
+{
+    *(uint8_t *)ptr = v;
+}
+
+/* NOTE: on arm, putting 2 in /proc/sys/debug/alignment so that the
+   kernel handles unaligned load/stores may give better results, but
+   it is a system wide setting : bad */
+#if defined(WORDS_BIGENDIAN) || defined(WORDS_ALIGNED)
+
+/* conservative code for little endian unaligned accesses */
+static inline int lduw_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8);
+#endif
+}
+
+static inline int ldsw_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (int16_t)val;
+#else
+    uint8_t *p = ptr;
+    return (int16_t)(p[0] | (p[1] << 8));
+#endif
+}
+
+static inline int ldl_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline uint64_t ldq_le_p(void *ptr)
+{
+    uint8_t *p = ptr;
+    uint32_t v1, v2;
+    v1 = ldl_le_p(p);
+    v2 = ldl_le_p(p + 4);
+    return v1 | ((uint64_t)v2 << 32);
+}
+
+static inline void stw_le_p(void *ptr, int v)
+{
+#ifdef __powerpc__
+    __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v), "r" (ptr));
+#else
+    uint8_t *p = ptr;
+    p[0] = v;
+    p[1] = v >> 8;
+#endif
+}
+
+static inline void stl_le_p(void *ptr, int v)
+{
+#ifdef __powerpc__
+    __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v), "r" (ptr));
+#else
+    uint8_t *p = ptr;
+    p[0] = v;
+    p[1] = v >> 8;
+    p[2] = v >> 16;
+    p[3] = v >> 24;
+#endif
+}
+
+static inline void stq_le_p(void *ptr, uint64_t v)
+{
+    uint8_t *p = ptr;
+    stl_le_p(p, (uint32_t)v);
+    stl_le_p(p + 4, v >> 32);
+}
+
+/* float access */
+
+static inline float32 ldfl_le_p(void *ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = ldl_le_p(ptr);
+    return u.f;
+}
+
+static inline void stfl_le_p(void *ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    stl_le_p(ptr, u.i);
+}
+
+static inline float64 ldfq_le_p(void *ptr)
+{
+    CPU_DoubleU u;
+    u.l.lower = ldl_le_p(ptr);
+    u.l.upper = ldl_le_p(ptr + 4);
+    return u.d;
+}
+
+static inline void stfq_le_p(void *ptr, float64 v)
+{
+    CPU_DoubleU u;
+    u.d = v;
+    stl_le_p(ptr, u.l.lower);
+    stl_le_p(ptr + 4, u.l.upper);
+}
+
+#else
+
+static inline int lduw_le_p(void *ptr)
+{
+    return *(uint16_t *)ptr;
+}
+
+static inline int ldsw_le_p(void *ptr)
+{
+    return *(int16_t *)ptr;
+}
+
+static inline int ldl_le_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+static inline uint64_t ldq_le_p(void *ptr)
+{
+    return *(uint64_t *)ptr;
+}
+
+static inline void stw_le_p(void *ptr, int v)
+{
+    *(uint16_t *)ptr = v;
+}
+
+static inline void stl_le_p(void *ptr, int v)
+{
+    *(uint32_t *)ptr = v;
+}
+
+static inline void stq_le_p(void *ptr, uint64_t v)
+{
+    *(uint64_t *)ptr = v;
+}
+
+/* float access */
+
+static inline float32 ldfl_le_p(void *ptr)
+{
+    return *(float32 *)ptr;
+}
+
+static inline float64 ldfq_le_p(void *ptr)
+{
+    return *(float64 *)ptr;
+}
+
+static inline void stfl_le_p(void *ptr, float32 v)
+{
+    *(float32 *)ptr = v;
+}
+
+static inline void stfq_le_p(void *ptr, float64 v)
+{
+    *(float64 *)ptr = v;
+}
+#endif
+#endif /* !VBOX */
+
+#if !defined(WORDS_BIGENDIAN) || defined(WORDS_ALIGNED)
+
+static inline int lduw_be_p(void *ptr)
+{
+#if defined(__i386__)
+    int val;
+    asm volatile ("movzwl %1, %0\n"
+                  "xchgb %b0, %h0\n"
+                  : "=q" (val)
+                  : "m" (*(uint16_t *)ptr));
+    return val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return ((b[0] << 8) | b[1]);
+#endif
+}
+
+static inline int ldsw_be_p(void *ptr)
+{
+#if defined(__i386__)
+    int val;
+    asm volatile ("movzwl %1, %0\n"
+                  "xchgb %b0, %h0\n"
+                  : "=q" (val)
+                  : "m" (*(uint16_t *)ptr));
+    return (int16_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (int16_t)((b[0] << 8) | b[1]);
+#endif
+}
+
+static inline int ldl_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+#endif
+}
+
+static inline uint64_t ldq_be_p(void *ptr)
+{
+    uint32_t a,b;
+    a = ldl_be_p(ptr);
+    b = ldl_be_p(ptr+4);
+    return (((uint64_t)a<<32)|b);
+}
+
+static inline void stw_be_p(void *ptr, int v)
+{
+#if defined(__i386__)
+    asm volatile ("xchgb %b0, %h0\n"
+                  "movw %w0, %1\n"
+                  : "=q" (v)
+                  : "m" (*(uint16_t *)ptr), "0" (v));
+#else
+    uint8_t *d = (uint8_t *) ptr;
+    d[0] = v >> 8;
+    d[1] = v;
+#endif
+}
+
+static inline void stl_be_p(void *ptr, int v)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    asm volatile ("bswap %0\n"
+                  "movl %0, %1\n"
+                  : "=r" (v)
+                  : "m" (*(uint32_t *)ptr), "0" (v));
+#else
+    uint8_t *d = (uint8_t *) ptr;
+    d[0] = v >> 24;
+    d[1] = v >> 16;
+    d[2] = v >> 8;
+    d[3] = v;
+#endif
+}
+
+static inline void stq_be_p(void *ptr, uint64_t v)
+{
+    stl_be_p(ptr, v >> 32);
+    stl_be_p(ptr + 4, v);
+}
+
+/* float access */
+
+static inline float32 ldfl_be_p(void *ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = ldl_be_p(ptr);
+    return u.f;
+}
+
+static inline void stfl_be_p(void *ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    stl_be_p(ptr, u.i);
+}
+
+static inline float64 ldfq_be_p(void *ptr)
+{
+    CPU_DoubleU u;
+    u.l.upper = ldl_be_p(ptr);
+    u.l.lower = ldl_be_p(ptr + 4);
+    return u.d;
+}
+
+static inline void stfq_be_p(void *ptr, float64 v)
+{
+    CPU_DoubleU u;
+    u.d = v;
+    stl_be_p(ptr, u.l.upper);
+    stl_be_p(ptr + 4, u.l.lower);
+}
+
+#else
+
+static inline int lduw_be_p(void *ptr)
+{
+    return *(uint16_t *)ptr;
+}
+
+static inline int ldsw_be_p(void *ptr)
+{
+    return *(int16_t *)ptr;
+}
+
+static inline int ldl_be_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+static inline uint64_t ldq_be_p(void *ptr)
+{
+    return *(uint64_t *)ptr;
+}
+
+static inline void stw_be_p(void *ptr, int v)
+{
+    *(uint16_t *)ptr = v;
+}
+
+static inline void stl_be_p(void *ptr, int v)
+{
+    *(uint32_t *)ptr = v;
+}
+
+static inline void stq_be_p(void *ptr, uint64_t v)
+{
+    *(uint64_t *)ptr = v;
+}
+
+/* float access */
+
+static inline float32 ldfl_be_p(void *ptr)
+{
+    return *(float32 *)ptr;
+}
+
+static inline float64 ldfq_be_p(void *ptr)
+{
+    return *(float64 *)ptr;
+}
+
+static inline void stfl_be_p(void *ptr, float32 v)
+{
+    *(float32 *)ptr = v;
+}
+
+static inline void stfq_be_p(void *ptr, float64 v)
+{
+    *(float64 *)ptr = v;
+}
+
+#endif
+
+/* target CPU memory access functions */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define lduw_p(p) lduw_be_p(p)
+#define ldsw_p(p) ldsw_be_p(p)
+#define ldl_p(p) ldl_be_p(p)
+#define ldq_p(p) ldq_be_p(p)
+#define ldfl_p(p) ldfl_be_p(p)
+#define ldfq_p(p) ldfq_be_p(p)
+#define stw_p(p, v) stw_be_p(p, v)
+#define stl_p(p, v) stl_be_p(p, v)
+#define stq_p(p, v) stq_be_p(p, v)
+#define stfl_p(p, v) stfl_be_p(p, v)
+#define stfq_p(p, v) stfq_be_p(p, v)
+#else
+#define lduw_p(p) lduw_le_p(p)
+#define ldsw_p(p) ldsw_le_p(p)
+#define ldl_p(p) ldl_le_p(p)
+#define ldq_p(p) ldq_le_p(p)
+#define ldfl_p(p) ldfl_le_p(p)
+#define ldfq_p(p) ldfq_le_p(p)
+#define stw_p(p, v) stw_le_p(p, v)
+#define stl_p(p, v) stl_le_p(p, v)
+#define stq_p(p, v) stq_le_p(p, v)
+#define stfl_p(p, v) stfl_le_p(p, v)
+#define stfq_p(p, v) stfq_le_p(p, v)
+#endif
+
+/* MMU memory access macros */
+
+#if defined(CONFIG_USER_ONLY)
+/* On some host systems the guest address space is reserved on the host.
+ * This allows the guest address space to be offset to a convenient location.
+ */
+//#define GUEST_BASE 0x20000000
+#define GUEST_BASE 0
+
+/* All direct uses of g2h and h2g need to go away for usermode softmmu.  */
+#define g2h(x) ((void *)((unsigned long)(x) + GUEST_BASE))
+#define h2g(x) ((target_ulong)(x - GUEST_BASE))
+
+#define saddr(x) g2h(x)
+#define laddr(x) g2h(x)
+
+#else /* !CONFIG_USER_ONLY */
+/* NOTE: we use double casts if pointers and target_ulong have
+   different sizes */
+#define saddr(x) (uint8_t *)(long)(x)
+#define laddr(x) (uint8_t *)(long)(x)
+#endif
+
+#define ldub_raw(p) ldub_p(laddr((p)))
+#define ldsb_raw(p) ldsb_p(laddr((p)))
+#define lduw_raw(p) lduw_p(laddr((p)))
+#define ldsw_raw(p) ldsw_p(laddr((p)))
+#define ldl_raw(p) ldl_p(laddr((p)))
+#define ldq_raw(p) ldq_p(laddr((p)))
+#define ldfl_raw(p) ldfl_p(laddr((p)))
+#define ldfq_raw(p) ldfq_p(laddr((p)))
+#define stb_raw(p, v) stb_p(saddr((p)), v)
+#define stw_raw(p, v) stw_p(saddr((p)), v)
+#define stl_raw(p, v) stl_p(saddr((p)), v)
+#define stq_raw(p, v) stq_p(saddr((p)), v)
+#define stfl_raw(p, v) stfl_p(saddr((p)), v)
+#define stfq_raw(p, v) stfq_p(saddr((p)), v)
+
+
+#if defined(CONFIG_USER_ONLY)
+
+/* if user mode, no other memory access functions */
+#define ldub(p) ldub_raw(p)
+#define ldsb(p) ldsb_raw(p)
+#define lduw(p) lduw_raw(p)
+#define ldsw(p) ldsw_raw(p)
+#define ldl(p) ldl_raw(p)
+#define ldq(p) ldq_raw(p)
+#define ldfl(p) ldfl_raw(p)
+#define ldfq(p) ldfq_raw(p)
+#define stb(p, v) stb_raw(p, v)
+#define stw(p, v) stw_raw(p, v)
+#define stl(p, v) stl_raw(p, v)
+#define stq(p, v) stq_raw(p, v)
+#define stfl(p, v) stfl_raw(p, v)
+#define stfq(p, v) stfq_raw(p, v)
+
+#define ldub_code(p) ldub_raw(p)
+#define ldsb_code(p) ldsb_raw(p)
+#define lduw_code(p) lduw_raw(p)
+#define ldsw_code(p) ldsw_raw(p)
+#define ldl_code(p) ldl_raw(p)
+
+#define ldub_kernel(p) ldub_raw(p)
+#define ldsb_kernel(p) ldsb_raw(p)
+#define lduw_kernel(p) lduw_raw(p)
+#define ldsw_kernel(p) ldsw_raw(p)
+#define ldl_kernel(p) ldl_raw(p)
+#define ldfl_kernel(p) ldfl_raw(p)
+#define ldfq_kernel(p) ldfq_raw(p)
+#define stb_kernel(p, v) stb_raw(p, v)
+#define stw_kernel(p, v) stw_raw(p, v)
+#define stl_kernel(p, v) stl_raw(p, v)
+#define stq_kernel(p, v) stq_raw(p, v)
+#define stfl_kernel(p, v) stfl_raw(p, v)
+#define stfq_kernel(p, vt) stfq_raw(p, v)
+
+#endif /* defined(CONFIG_USER_ONLY) */
+
+/* page related stuff */
+
+#define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS)
+#define TARGET_PAGE_MASK ~(TARGET_PAGE_SIZE - 1)
+#define TARGET_PAGE_ALIGN(addr) (((addr) + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK)
+
+/* ??? These should be the larger of unsigned long and target_ulong.  */
+extern unsigned long qemu_real_host_page_size;
+extern unsigned long qemu_host_page_bits;
+extern unsigned long qemu_host_page_size;
+extern unsigned long qemu_host_page_mask;
+
+#define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask)
+
+/* same as PROT_xxx */
+#define PAGE_READ      0x0001
+#define PAGE_WRITE     0x0002
+#define PAGE_EXEC      0x0004
+#define PAGE_BITS      (PAGE_READ | PAGE_WRITE | PAGE_EXEC)
+#define PAGE_VALID     0x0008
+/* original state of the write flag (used when tracking self-modifying
+   code */
+#define PAGE_WRITE_ORG 0x0010
+
+void page_dump(FILE *f);
+int page_get_flags(target_ulong address);
+void page_set_flags(target_ulong start, target_ulong end, int flags);
+void page_unprotect_range(target_ulong data, target_ulong data_size);
+
+#define SINGLE_CPU_DEFINES
+#ifdef SINGLE_CPU_DEFINES
+
+#if defined(TARGET_I386)
+
+#define CPUState CPUX86State
+#define cpu_init cpu_x86_init
+#define cpu_exec cpu_x86_exec
+#define cpu_gen_code cpu_x86_gen_code
+#define cpu_signal_handler cpu_x86_signal_handler
+
+#elif defined(TARGET_ARM)
+
+#define CPUState CPUARMState
+#define cpu_init cpu_arm_init
+#define cpu_exec cpu_arm_exec
+#define cpu_gen_code cpu_arm_gen_code
+#define cpu_signal_handler cpu_arm_signal_handler
+
+#elif defined(TARGET_SPARC)
+
+#define CPUState CPUSPARCState
+#define cpu_init cpu_sparc_init
+#define cpu_exec cpu_sparc_exec
+#define cpu_gen_code cpu_sparc_gen_code
+#define cpu_signal_handler cpu_sparc_signal_handler
+
+#elif defined(TARGET_PPC)
+
+#define CPUState CPUPPCState
+#define cpu_init cpu_ppc_init
+#define cpu_exec cpu_ppc_exec
+#define cpu_gen_code cpu_ppc_gen_code
+#define cpu_signal_handler cpu_ppc_signal_handler
+
+#elif defined(TARGET_M68K)
+#define CPUState CPUM68KState
+#define cpu_init cpu_m68k_init
+#define cpu_exec cpu_m68k_exec
+#define cpu_gen_code cpu_m68k_gen_code
+#define cpu_signal_handler cpu_m68k_signal_handler
+
+#elif defined(TARGET_MIPS)
+#define CPUState CPUMIPSState
+#define cpu_init cpu_mips_init
+#define cpu_exec cpu_mips_exec
+#define cpu_gen_code cpu_mips_gen_code
+#define cpu_signal_handler cpu_mips_signal_handler
+
+#elif defined(TARGET_SH4)
+#define CPUState CPUSH4State
+#define cpu_init cpu_sh4_init
+#define cpu_exec cpu_sh4_exec
+#define cpu_gen_code cpu_sh4_gen_code
+#define cpu_signal_handler cpu_sh4_signal_handler
+
+#else
+
+#error unsupported target CPU
+
+#endif
+
+#endif /* SINGLE_CPU_DEFINES */
+
+void cpu_dump_state(CPUState *env, FILE *f,
+                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+                    int flags);
+
+DECLNORETURN(void) cpu_abort(CPUState *env, const char *fmt, ...);
+extern CPUState *first_cpu;
+extern CPUState *cpu_single_env;
+extern int code_copy_enabled;
+
+#define CPU_INTERRUPT_EXIT   0x01 /* wants exit from main loop */
+#define CPU_INTERRUPT_HARD   0x02 /* hardware interrupt pending */
+#define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */
+#define CPU_INTERRUPT_TIMER  0x08 /* internal timer exception pending */
+#define CPU_INTERRUPT_FIQ    0x10 /* Fast interrupt pending.  */
+#define CPU_INTERRUPT_HALT   0x20 /* CPU halt wanted */
+#define CPU_INTERRUPT_SMI    0x40 /* (x86 only) SMI interrupt pending */
+
+#ifdef VBOX
+/** Executes a single instruction. cpu_exec() will normally return EXCP_SINGLE_INSTR. */
+#define CPU_INTERRUPT_SINGLE_INSTR              0x0200
+/** Executing a CPU_INTERRUPT_SINGLE_INSTR request, quit the cpu_loop. (for exceptions and suchlike) */
+#define CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT    0x0400
+/** VM execution was interrupted by VMR3Reset, VMR3Suspend or VMR3PowerOff. */
+#define CPU_INTERRUPT_RC                        0x0800
+/** Exit current TB to process an external interrupt request (also in op.c!!) */
+#define CPU_INTERRUPT_EXTERNAL_EXIT             0x1000
+/** Exit current TB to process an external interrupt request (also in op.c!!) */
+#define CPU_INTERRUPT_EXTERNAL_HARD             0x2000
+/** Exit current TB to process an external interrupt request (also in op.c!!) */
+#define CPU_INTERRUPT_EXTERNAL_TIMER            0x4000
+/** Exit current TB to process an external interrupt request (also in op.c!!) */
+#define CPU_INTERRUPT_EXTERNAL_DMA              0x8000
+#endif /* VBOX */
+void cpu_interrupt(CPUState *s, int mask);
+void cpu_reset_interrupt(CPUState *env, int mask);
+
+int cpu_breakpoint_insert(CPUState *env, target_ulong pc);
+int cpu_breakpoint_remove(CPUState *env, target_ulong pc);
+void cpu_single_step(CPUState *env, int enabled);
+void cpu_reset(CPUState *s);
+
+/* Return the physical page corresponding to a virtual one. Use it
+   only for debugging because no protection checks are done. Return -1
+   if no page found. */
+target_ulong cpu_get_phys_page_debug(CPUState *env, target_ulong addr);
+
+#define CPU_LOG_TB_OUT_ASM (1 << 0)
+#define CPU_LOG_TB_IN_ASM  (1 << 1)
+#define CPU_LOG_TB_OP      (1 << 2)
+#define CPU_LOG_TB_OP_OPT  (1 << 3)
+#define CPU_LOG_INT        (1 << 4)
+#define CPU_LOG_EXEC       (1 << 5)
+#define CPU_LOG_PCALL      (1 << 6)
+#define CPU_LOG_IOPORT     (1 << 7)
+#define CPU_LOG_TB_CPU     (1 << 8)
+
+/* define log items */
+typedef struct CPULogItem {
+    int mask;
+    const char *name;
+    const char *help;
+} CPULogItem;
+
+extern CPULogItem cpu_log_items[];
+
+void cpu_set_log(int log_flags);
+void cpu_set_log_filename(const char *filename);
+int cpu_str_to_log_mask(const char *str);
+
+/* IO ports API */
+
+/* NOTE: as these functions may be even used when there is an isa
+   brige on non x86 targets, we always defined them */
+#ifndef NO_CPU_IO_DEFS
+void cpu_outb(CPUState *env, int addr, int val);
+void cpu_outw(CPUState *env, int addr, int val);
+void cpu_outl(CPUState *env, int addr, int val);
+int cpu_inb(CPUState *env, int addr);
+int cpu_inw(CPUState *env, int addr);
+int cpu_inl(CPUState *env, int addr);
+#endif
+
+/* memory API */
+
+#ifndef VBOX
+extern int phys_ram_size;
+extern int phys_ram_fd;
+extern int phys_ram_size;
+#else /* VBOX */
+extern RTGCPHYS phys_ram_size;
+/** This is required for bounds checking the phys_ram_dirty accesses. */
+extern uint32_t phys_ram_dirty_size;
+#endif /* VBOX */
+#if !defined(VBOX)
+extern uint8_t *phys_ram_base;
+#endif
+extern uint8_t *phys_ram_dirty;
+
+/* physical memory access */
+#define TLB_INVALID_MASK   (1 << 3)
+#define IO_MEM_SHIFT       4
+#define IO_MEM_NB_ENTRIES  (1 << (TARGET_PAGE_BITS  - IO_MEM_SHIFT))
+
+#define IO_MEM_RAM         (0 << IO_MEM_SHIFT) /* hardcoded offset */
+#define IO_MEM_ROM         (1 << IO_MEM_SHIFT) /* hardcoded offset */
+#define IO_MEM_UNASSIGNED  (2 << IO_MEM_SHIFT)
+#define IO_MEM_NOTDIRTY    (4 << IO_MEM_SHIFT) /* used internally, never use directly */
+#if defined(VBOX) && !defined(VBOX_WITH_NEW_PHYS_CODE)
+#define IO_MEM_RAM_MISSING (5 << IO_MEM_SHIFT) /* used internally, never use directly */
+#endif
+/* acts like a ROM when read and like a device when written. As an
+   exception, the write memory callback gets the ram offset instead of
+   the physical address */
+#define IO_MEM_ROMD        (1)
+
+typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
+typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
+
+void cpu_register_physical_memory(target_phys_addr_t start_addr,
+                                  unsigned long size,
+                                  unsigned long phys_offset);
+uint32_t cpu_get_physical_page_desc(target_phys_addr_t addr);
+int cpu_register_io_memory(int io_index,
+                           CPUReadMemoryFunc **mem_read,
+                           CPUWriteMemoryFunc **mem_write,
+                           void *opaque);
+CPUWriteMemoryFunc **cpu_get_io_memory_write(int io_index);
+CPUReadMemoryFunc **cpu_get_io_memory_read(int io_index);
+
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+                            int len, int is_write);
+static inline void cpu_physical_memory_read(target_phys_addr_t addr,
+                                            uint8_t *buf, int len)
+{
+    cpu_physical_memory_rw(addr, buf, len, 0);
+}
+static inline void cpu_physical_memory_write(target_phys_addr_t addr,
+                                             const uint8_t *buf, int len)
+{
+    cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1);
+}
+uint32_t ldub_phys(target_phys_addr_t addr);
+uint32_t lduw_phys(target_phys_addr_t addr);
+uint32_t ldl_phys(target_phys_addr_t addr);
+uint64_t ldq_phys(target_phys_addr_t addr);
+void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
+void stb_phys(target_phys_addr_t addr, uint32_t val);
+void stw_phys(target_phys_addr_t addr, uint32_t val);
+void stl_phys(target_phys_addr_t addr, uint32_t val);
+void stq_phys(target_phys_addr_t addr, uint64_t val);
+
+void cpu_physical_memory_write_rom(target_phys_addr_t addr,
+                                   const uint8_t *buf, int len);
+int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
+                        uint8_t *buf, int len, int is_write);
+
+#define VGA_DIRTY_FLAG  0x01
+#define CODE_DIRTY_FLAG 0x02
+
+/* read dirty bit (return 0 or 1) */
+static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
+{
+#ifdef VBOX
+    if (RT_UNLIKELY((addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+    {
+        Log(("cpu_physical_memory_is_dirty: %VGp\n", (RTGCPHYS)addr));
+        /*AssertMsgFailed(("cpu_physical_memory_is_dirty: %VGp\n", (RTGCPHYS)addr));*/
+        return 0;
+    }
+#endif
+    return phys_ram_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
+}
+
+static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
+                                                int dirty_flags)
+{
+#ifdef VBOX
+    if (RT_UNLIKELY((addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+    {
+        Log(("cpu_physical_memory_is_dirty: %VGp\n", (RTGCPHYS)addr));
+        /*AssertMsgFailed(("cpu_physical_memory_is_dirty: %VGp\n", (RTGCPHYS)addr));*/
+        return 0xff & dirty_flags; /** @todo I don't think this is the right thing to return, fix! */
+    }
+#endif
+    return phys_ram_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags;
+}
+
+static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
+{
+#ifdef VBOX
+    if (RT_UNLIKELY((addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+    {
+        Log(("cpu_physical_memory_is_dirty: %VGp\n", (RTGCPHYS)addr));
+        /*AssertMsgFailed(("cpu_physical_memory_is_dirty: %VGp\n", (RTGCPHYS)addr));*/
+        return;
+    }
+#endif
+    phys_ram_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
+}
+
+void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
+                                     int dirty_flags);
+void cpu_tlb_update_dirty(CPUState *env);
+
+void dump_exec_info(FILE *f,
+                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
+
+/*******************************************/
+/* host CPU ticks (if available) */
+
+#if defined(__powerpc__)
+
+static inline uint32_t get_tbl(void)
+{
+    uint32_t tbl;
+    asm volatile("mftb %0" : "=r" (tbl));
+    return tbl;
+}
+
+static inline uint32_t get_tbu(void)
+{
+	uint32_t tbl;
+	asm volatile("mftbu %0" : "=r" (tbl));
+	return tbl;
+}
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+    uint32_t l, h, h1;
+    /* NOTE: we test if wrapping has occurred */
+    do {
+        h = get_tbu();
+        l = get_tbl();
+        h1 = get_tbu();
+    } while (h != h1);
+    return ((int64_t)h << 32) | l;
+}
+
+#elif defined(__i386__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+    int64_t val;
+    asm volatile ("rdtsc" : "=A" (val));
+    return val;
+}
+
+#elif defined(__x86_64__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+    uint32_t low,high;
+    int64_t val;
+    asm volatile("rdtsc" : "=a" (low), "=d" (high));
+    val = high;
+    val <<= 32;
+    val |= low;
+    return val;
+}
+
+#elif defined(__ia64)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+	int64_t val;
+	asm volatile ("mov %0 = ar.itc" : "=r"(val) :: "memory");
+	return val;
+}
+
+#elif defined(__s390__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+    int64_t val;
+    asm volatile("stck 0(%1)" : "=m" (val) : "a" (&val) : "cc");
+    return val;
+}
+
+#elif defined(__sparc_v9__)
+
+static inline int64_t cpu_get_real_ticks (void)
+{
+#if     defined(_LP64)
+        uint64_t        rval;
+        asm volatile("rd %%tick,%0" : "=r"(rval));
+        return rval;
+#else
+        union {
+                uint64_t i64;
+                struct {
+                        uint32_t high;
+                        uint32_t low;
+                }       i32;
+        } rval;
+        asm volatile("rd %%tick,%1; srlx %1,32,%0"
+                : "=r"(rval.i32.high), "=r"(rval.i32.low));
+        return rval.i64;
+#endif
+}
+#else
+/* The host CPU doesn't have an easily accessible cycle counter.
+   Just return a monotonically increasing vlue.  This will be totally wrong,
+   but hopefully better than nothing.  */
+static inline int64_t cpu_get_real_ticks (void)
+{
+    static int64_t ticks = 0;
+    return ticks++;
+}
+#endif
+
+/* profiling */
+#ifdef CONFIG_PROFILER
+static inline int64_t profile_getclock(void)
+{
+    return cpu_get_real_ticks();
+}
+
+extern int64_t kqemu_time, kqemu_time_start;
+extern int64_t qemu_time, qemu_time_start;
+extern int64_t tlb_flush_time;
+extern int64_t kqemu_exec_count;
+extern int64_t dev_time;
+extern int64_t kqemu_ret_int_count;
+extern int64_t kqemu_ret_excp_count;
+extern int64_t kqemu_ret_intr_count;
+
+#endif
+
+#ifdef VBOX
+void tb_invalidate_virt(CPUState *env, uint32_t eip);
+#endif /* VBOX */
+
+#endif /* CPU_ALL_H */
Index: /trunk/src/recompiler_new/cpu-defs.h
===================================================================
--- /trunk/src/recompiler_new/cpu-defs.h	(revision 13168)
+++ /trunk/src/recompiler_new/cpu-defs.h	(revision 13168)
@@ -0,0 +1,148 @@
+/*
+ * common defines for all CPUs
+ * 
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#ifndef CPU_DEFS_H
+#define CPU_DEFS_H
+
+#include "config.h"
+#include <setjmp.h>
+#include <inttypes.h>
+#include "osdep.h"
+
+#ifndef TARGET_LONG_BITS
+#error TARGET_LONG_BITS must be defined before including this header
+#endif
+
+#ifndef TARGET_PHYS_ADDR_BITS 
+#if TARGET_LONG_BITS >= HOST_LONG_BITS
+#define TARGET_PHYS_ADDR_BITS TARGET_LONG_BITS
+#else
+#define TARGET_PHYS_ADDR_BITS HOST_LONG_BITS
+#endif
+#endif
+
+#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
+
+/* target_ulong is the type of a virtual address */
+#if TARGET_LONG_SIZE == 4
+typedef int32_t target_long;
+typedef uint32_t target_ulong;
+#define TARGET_FMT_lx "%08x"
+#elif TARGET_LONG_SIZE == 8
+typedef int64_t target_long;
+typedef uint64_t target_ulong;
+#define TARGET_FMT_lx "%016" PRIx64
+#else
+#error TARGET_LONG_SIZE undefined
+#endif
+
+/* target_phys_addr_t is the type of a physical address (its size can
+   be different from 'target_ulong'). We have sizeof(target_phys_addr)
+   = max(sizeof(unsigned long),
+   sizeof(size_of_target_physical_address)) because we must pass a
+   host pointer to memory operations in some cases */
+
+#if TARGET_PHYS_ADDR_BITS == 32
+typedef uint32_t target_phys_addr_t;
+#elif TARGET_PHYS_ADDR_BITS == 64
+typedef uint64_t target_phys_addr_t;
+#else
+#error TARGET_PHYS_ADDR_BITS undefined
+#endif
+
+/* address in the RAM (different from a physical address) */
+typedef unsigned long ram_addr_t;
+
+#define HOST_LONG_SIZE (HOST_LONG_BITS / 8)
+
+#define EXCP_INTERRUPT 	0x10000 /* async interruption */
+#define EXCP_HLT        0x10001 /* hlt instruction reached */
+#define EXCP_DEBUG      0x10002 /* cpu stopped after a breakpoint or singlestep */
+#define EXCP_HALTED     0x10003 /* cpu is halted (waiting for external event) */
+#if defined(VBOX)
+#define EXCP_EXECUTE_RAW    0x11024 /* execute raw mode. */
+#define EXCP_EXECUTE_HWACC  0x11025 /* execute hardware accelerated raw mode. */
+#define EXCP_SINGLE_INSTR   0x11026 /* executed single instruction. */
+#define EXCP_RC             0x11027 /* a EM rc was raised (VMR3Reset/Suspend/PowerOff). */
+#endif /* VBOX */
+#define MAX_BREAKPOINTS 32
+
+#define TB_JMP_CACHE_BITS 12
+#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
+
+/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
+   addresses on the same page.  The top bits are the same.  This allows
+   TLB invalidation to quickly clear a subset of the hash table.  */
+#define TB_JMP_PAGE_BITS (TB_JMP_CACHE_BITS / 2)
+#define TB_JMP_PAGE_SIZE (1 << TB_JMP_PAGE_BITS)
+#define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1)
+#define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
+
+#define CPU_TLB_BITS 8
+#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
+
+typedef struct CPUTLBEntry {
+    /* bit 31 to TARGET_PAGE_BITS : virtual address 
+       bit TARGET_PAGE_BITS-1..IO_MEM_SHIFT : if non zero, memory io
+                                              zone number
+       bit 3                      : indicates that the entry is invalid
+       bit 2..0                   : zero
+    */
+    target_ulong addr_read; 
+    target_ulong addr_write; 
+    target_ulong addr_code; 
+    /* addend to virtual address to get physical address */
+    target_phys_addr_t addend; 
+} CPUTLBEntry;
+
+#define CPU_COMMON                                                      \
+    struct TranslationBlock *current_tb; /* currently executing TB  */  \
+    /* soft mmu support */                                              \
+    /* in order to avoid passing too many arguments to the memory       \
+       write helpers, we store some rarely used information in the CPU  \
+       context) */                                                      \
+    unsigned long mem_write_pc; /* host pc at which the memory was      \
+                                   written */                           \
+    target_ulong mem_write_vaddr; /* target virtual addr at which the   \
+                                     memory was written */              \
+    /* 0 = kernel, 1 = user */                                          \
+    CPUTLBEntry tlb_table[2][CPU_TLB_SIZE];                             \
+    struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];           \
+                                                                        \
+    /* from this point: preserved by CPU reset */                       \
+    /* ice debug support */                                             \
+    target_ulong breakpoints[MAX_BREAKPOINTS];                          \
+    int nb_breakpoints;                                                 \
+    int singlestep_enabled;                                             \
+                                                                        \
+    void *next_cpu; /* next CPU sharing TB cache */                     \
+    int cpu_index; /* CPU index (informative) */                        \
+    /* user data */                                                     \
+    void *opaque;
+
+#endif
Index: /trunk/src/recompiler_new/cpu-exec.c
===================================================================
--- /trunk/src/recompiler_new/cpu-exec.c	(revision 13168)
+++ /trunk/src/recompiler_new/cpu-exec.c	(revision 13168)
@@ -0,0 +1,1997 @@
+/*
+ *  i386 emulator main execution loop
+ *
+ *  Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#include "config.h"
+#include "exec.h"
+#include "disas.h"
+
+#if !defined(CONFIG_SOFTMMU)
+#undef EAX
+#undef ECX
+#undef EDX
+#undef EBX
+#undef ESP
+#undef EBP
+#undef ESI
+#undef EDI
+#undef EIP
+#include <signal.h>
+#include <sys/ucontext.h>
+#endif
+
+int tb_invalidated_flag;
+
+//#define DEBUG_EXEC
+//#define DEBUG_SIGNAL
+
+#if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_M68K)
+/* XXX: unify with i386 target */
+void cpu_loop_exit(void)
+{
+    longjmp(env->jmp_env, 1);
+}
+#endif
+#if !(defined(TARGET_SPARC) || defined(TARGET_SH4) || defined(TARGET_M68K))
+#define reg_T2
+#endif
+
+/* exit the current TB from a signal handler. The host registers are
+   restored in a state compatible with the CPU emulator
+ */
+void cpu_resume_from_signal(CPUState *env1, void *puc)
+{
+#if !defined(CONFIG_SOFTMMU)
+    struct ucontext *uc = puc;
+#endif
+
+    env = env1;
+
+    /* XXX: restore cpu registers saved in host registers */
+
+#if !defined(CONFIG_SOFTMMU)
+    if (puc) {
+        /* XXX: use siglongjmp ? */
+        sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL);
+    }
+#endif
+    longjmp(env->jmp_env, 1);
+}
+
+
+static TranslationBlock *tb_find_slow(target_ulong pc,
+                                      target_ulong cs_base,
+                                      unsigned int flags)
+{
+    TranslationBlock *tb, **ptb1;
+    int code_gen_size;
+    unsigned int h;
+    target_ulong phys_pc, phys_page1, phys_page2, virt_page2;
+    uint8_t *tc_ptr;
+
+    spin_lock(&tb_lock);
+
+    tb_invalidated_flag = 0;
+
+    regs_to_env(); /* XXX: do it just before cpu_gen_code() */
+
+    /* find translated block using physical mappings */
+    phys_pc = get_phys_addr_code(env, pc);
+    phys_page1 = phys_pc & TARGET_PAGE_MASK;
+    phys_page2 = -1;
+    h = tb_phys_hash_func(phys_pc);
+    ptb1 = &tb_phys_hash[h];
+    for(;;) {
+        tb = *ptb1;
+        if (!tb)
+            goto not_found;
+        if (tb->pc == pc &&
+            tb->page_addr[0] == phys_page1 &&
+            tb->cs_base == cs_base &&
+            tb->flags == flags) {
+            /* check next page if needed */
+            if (tb->page_addr[1] != -1) {
+                virt_page2 = (pc & TARGET_PAGE_MASK) +
+                    TARGET_PAGE_SIZE;
+                phys_page2 = get_phys_addr_code(env, virt_page2);
+                if (tb->page_addr[1] == phys_page2)
+                    goto found;
+            } else {
+                goto found;
+            }
+        }
+        ptb1 = &tb->phys_hash_next;
+    }
+ not_found:
+    /* if no translated code available, then translate it now */
+    tb = tb_alloc(pc);
+    if (!tb) {
+        /* flush must be done */
+        tb_flush(env);
+        /* cannot fail at this point */
+        tb = tb_alloc(pc);
+        /* don't forget to invalidate previous TB info */
+        tb_invalidated_flag = 1;
+    }
+    tc_ptr = code_gen_ptr;
+    tb->tc_ptr = tc_ptr;
+    tb->cs_base = cs_base;
+    tb->flags = flags;
+    cpu_gen_code(env, tb, CODE_GEN_MAX_SIZE, &code_gen_size);
+    code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+
+    /* check next page if needed */
+    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
+    phys_page2 = -1;
+    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
+        phys_page2 = get_phys_addr_code(env, virt_page2);
+    }
+    tb_link_phys(tb, phys_pc, phys_page2);
+
+ found:
+    /* we add the TB in the virtual pc hash table */
+    env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
+    spin_unlock(&tb_lock);
+    return tb;
+}
+
+static inline TranslationBlock *tb_find_fast(void)
+{
+    TranslationBlock *tb;
+    target_ulong cs_base, pc;
+    unsigned int flags;
+
+    /* we record a subset of the CPU state. It will
+       always be the same before a given translated block
+       is executed. */
+#if defined(TARGET_I386)
+    flags = env->hflags;
+    flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
+    cs_base = env->segs[R_CS].base;
+    pc = cs_base + env->eip;
+#elif defined(TARGET_ARM)
+    flags = env->thumb | (env->vfp.vec_len << 1)
+            | (env->vfp.vec_stride << 4);
+    if ((env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR)
+        flags |= (1 << 6);
+    if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30))
+        flags |= (1 << 7);
+    cs_base = 0;
+    pc = env->regs[15];
+#elif defined(TARGET_SPARC)
+#ifdef TARGET_SPARC64
+    // Combined FPU enable bits . PRIV . DMMU enabled . IMMU enabled
+    flags = (((env->pstate & PS_PEF) >> 1) | ((env->fprs & FPRS_FEF) << 2))
+        | (env->pstate & PS_PRIV) | ((env->lsu & (DMMU_E | IMMU_E)) >> 2);
+#else
+    // FPU enable . MMU enabled . MMU no-fault . Supervisor
+    flags = (env->psref << 3) | ((env->mmuregs[0] & (MMU_E | MMU_NF)) << 1)
+        | env->psrs;
+#endif
+    cs_base = env->npc;
+    pc = env->pc;
+#elif defined(TARGET_PPC)
+    flags = (msr_pr << MSR_PR) | (msr_fp << MSR_FP) |
+        (msr_se << MSR_SE) | (msr_le << MSR_LE);
+    cs_base = 0;
+    pc = env->nip;
+#elif defined(TARGET_MIPS)
+    flags = env->hflags & (MIPS_HFLAG_TMASK | MIPS_HFLAG_BMASK);
+    cs_base = 0;
+    pc = env->PC;
+#elif defined(TARGET_M68K)
+    flags = env->fpcr & M68K_FPCR_PREC;
+    cs_base = 0;
+    pc = env->pc;
+#elif defined(TARGET_SH4)
+    flags = env->sr & (SR_MD | SR_RB);
+    cs_base = 0;         /* XXXXX */
+    pc = env->pc;
+#else
+#error unsupported CPU
+#endif
+    tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
+    if (__builtin_expect(!tb || tb->pc != pc || tb->cs_base != cs_base ||
+                         tb->flags != flags, 0)) {
+        tb = tb_find_slow(pc, cs_base, flags);
+        /* Note: we do it here to avoid a gcc bug on Mac OS X when
+           doing it in tb_find_slow */
+        if (tb_invalidated_flag) {
+            /* as some TB could have been invalidated because
+               of memory exceptions while generating the code, we
+               must recompute the hash index here */
+            T0 = 0;
+        }
+    }
+    return tb;
+}
+
+
+/* main execution loop */
+
+#ifdef VBOX
+
+int cpu_exec(CPUState *env1)
+{
+#define DECLARE_HOST_REGS 1
+#include "hostregs_helper.h"
+    int ret, interrupt_request;
+    void (*gen_func)(void);
+    TranslationBlock *tb;
+    uint8_t *tc_ptr;
+
+#if defined(TARGET_I386)
+    /* handle exit of HALTED state */
+    if (env1->hflags & HF_HALTED_MASK) {
+        /* disable halt condition */
+        if ((env1->interrupt_request & CPU_INTERRUPT_HARD) &&
+            (env1->eflags & IF_MASK)) {
+            env1->hflags &= ~HF_HALTED_MASK;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#elif defined(TARGET_PPC)
+    if (env1->halted) {
+        if (env1->msr[MSR_EE] &&
+            (env1->interrupt_request &
+             (CPU_INTERRUPT_HARD | CPU_INTERRUPT_TIMER))) {
+            env1->halted = 0;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#elif defined(TARGET_SPARC)
+    if (env1->halted) {
+        if ((env1->interrupt_request & CPU_INTERRUPT_HARD) &&
+            (env1->psret != 0)) {
+            env1->halted = 0;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#elif defined(TARGET_ARM)
+    if (env1->halted) {
+        /* An interrupt wakes the CPU even if the I and F CPSR bits are
+           set.  */
+        if (env1->interrupt_request
+            & (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD)) {
+            env1->halted = 0;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#elif defined(TARGET_MIPS)
+    if (env1->halted) {
+        if (env1->interrupt_request &
+            (CPU_INTERRUPT_HARD | CPU_INTERRUPT_TIMER)) {
+            env1->halted = 0;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#endif
+
+    cpu_single_env = env1;
+
+    /* first we save global registers */
+#define SAVE_HOST_REGS 1
+#include "hostregs_helper.h"
+    env = env1;
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+    /* we also save i7 because longjmp may not restore it */
+    asm volatile ("mov %%i7, %0" : "=r" (saved_i7));
+#endif
+
+#if defined(TARGET_I386)
+
+    env_to_regs();
+    /* put eflags in CPU temporary format */
+    CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((env->eflags >> 10) & 1));
+    CC_OP = CC_OP_EFLAGS;
+    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+#elif defined(TARGET_ARM)
+#elif defined(TARGET_SPARC)
+#if defined(reg_REGWPTR)
+    saved_regwptr = REGWPTR;
+#endif
+#elif defined(TARGET_PPC)
+#elif defined(TARGET_MIPS)
+#elif defined(TARGET_SH4)
+    /* XXXXX */
+#else
+#error unsupported target CPU
+#endif
+#ifndef VBOX /* VBOX: We need to raise traps and suchlike from the outside. */
+    env->exception_index = -1;
+#endif
+
+    /* prepare setjmp context for exception handling */
+    for(;;) {
+        if (setjmp(env->jmp_env) == 0)
+        {
+            env->current_tb = NULL;
+            VMMR3Unlock(env->pVM);
+            VMMR3Lock(env->pVM);
+
+            /*
+             * Check for fatal errors first
+             */
+            if (env->interrupt_request & CPU_INTERRUPT_RC) {
+                env->exception_index = EXCP_RC;
+                ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_RC);
+                ret = env->exception_index;
+                cpu_loop_exit();
+            }
+
+            /* if an exception is pending, we execute it here */
+            if (env->exception_index >= 0) {
+                Assert(!env->user_mode_only);
+                if (env->exception_index >= EXCP_INTERRUPT) {
+                    /* exit request from the cpu execution loop */
+                    ret = env->exception_index;
+                    break;
+                } else {
+                    /* simulate a real cpu exception. On i386, it can
+                       trigger new exceptions, but we do not handle
+                       double or triple faults yet. */
+                    RAWEx_ProfileStart(env, STATS_IRQ_HANDLING);
+                    Log(("do_interrupt %d %d %VGv\n", env->exception_index, env->exception_is_int, env->exception_next_eip));
+                    do_interrupt(env->exception_index,
+                                 env->exception_is_int,
+                                 env->error_code,
+                                 env->exception_next_eip, 0);
+                    RAWEx_ProfileStop(env, STATS_IRQ_HANDLING);
+                }
+                env->exception_index = -1;
+            }
+
+            T0 = 0; /* force lookup of first TB */
+            for(;;)
+            {
+                interrupt_request = env->interrupt_request;
+                if (__builtin_expect(interrupt_request, 0))
+                {
+                    /* Single instruction exec request, we execute it and return (one way or the other).
+                       The caller will always reschedule after doing this operation! */
+                    if (interrupt_request & CPU_INTERRUPT_SINGLE_INSTR)
+                    {
+                        /* not in flight are we? (if we are, we trapped) */
+                        if (!(env->interrupt_request & CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT))
+                        {
+                            ASMAtomicOrS32(&env->interrupt_request, CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT);
+                            env->exception_index = EXCP_SINGLE_INSTR;
+                            if (emulate_single_instr(env) == -1)
+                                AssertMsgFailed(("REM: emulate_single_instr failed for EIP=%VGv!!\n", env->eip));
+
+                            /* When we receive an external interrupt during execution of this single
+                               instruction, then we should stay here. We will leave when we're ready
+                               for raw-mode or when interrupted by pending EMT requests.  */
+                            interrupt_request = env->interrupt_request; /* reload this! */
+                            if (   !(interrupt_request & CPU_INTERRUPT_HARD)
+                                || !(env->eflags & IF_MASK)
+                                ||  (env->hflags & HF_INHIBIT_IRQ_MASK)
+                                ||  (env->state & CPU_RAW_HWACC)
+                               )
+                            {
+                                env->exception_index = ret = EXCP_SINGLE_INSTR;
+                                cpu_loop_exit();
+                            }
+                        }
+                        /* Clear CPU_INTERRUPT_SINGLE_INSTR and leave CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT set. */
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_SINGLE_INSTR);
+                    }
+
+                    RAWEx_ProfileStart(env, STATS_IRQ_HANDLING);
+                    if ((interrupt_request & CPU_INTERRUPT_SMI) &&
+                        !(env->hflags & HF_SMM_MASK)) {
+                        env->interrupt_request &= ~CPU_INTERRUPT_SMI;
+                        do_smm_enter();
+                        T0 = 0;
+                    }
+                    else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                             (env->eflags & IF_MASK) &&
+                             !(env->hflags & HF_INHIBIT_IRQ_MASK))
+                    {
+                        /* if hardware interrupt pending, we execute it */
+                        int intno;
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_HARD);
+                        intno = cpu_get_pic_interrupt(env);
+                        if (intno >= 0)
+                        {
+                            Log(("do_interrupt %d\n", intno));
+                            do_interrupt(intno, 0, 0, 0, 1);
+                        }
+                        /* ensure that no TB jump will be modified as
+                           the program flow was changed */
+                        T0 = 0;
+                    }
+                    if (env->interrupt_request & CPU_INTERRUPT_EXITTB)
+                    {
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_EXITTB);
+                        /* ensure that no TB jump will be modified as
+                           the program flow was changed */
+                        T0 = 0;
+                    }
+                    RAWEx_ProfileStop(env, STATS_IRQ_HANDLING);
+                    if (interrupt_request & CPU_INTERRUPT_EXIT)
+                    {
+                        env->exception_index = EXCP_INTERRUPT;
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_EXIT);
+                        ret = env->exception_index;
+                        cpu_loop_exit();
+                    }
+                    if (interrupt_request & CPU_INTERRUPT_RC)
+                    {
+                        env->exception_index = EXCP_RC;
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_RC);
+                        ret = env->exception_index;
+                        cpu_loop_exit();
+                    }
+                }
+
+                /*
+                 * Check if we the CPU state allows us to execute the code in raw-mode.
+                 */
+                RAWEx_ProfileStart(env, STATS_RAW_CHECK);
+                if (remR3CanExecuteRaw(env,
+                                       env->eip + env->segs[R_CS].base,
+                                       env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK)),
+                                       &env->exception_index))
+                {
+                    RAWEx_ProfileStop(env, STATS_RAW_CHECK);
+                    ret = env->exception_index;
+                    cpu_loop_exit();
+                }
+                RAWEx_ProfileStop(env, STATS_RAW_CHECK);
+
+                RAWEx_ProfileStart(env, STATS_TLB_LOOKUP);
+                tb = tb_find_fast();
+
+                /* see if we can patch the calling TB. When the TB
+                   spans two pages, we cannot safely do a direct
+                   jump. */
+                if (T0 != 0
+                    && !(tb->cflags & CF_RAW_MODE)
+                    && tb->page_addr[1] == -1)
+                {
+                    spin_lock(&tb_lock);
+                    tb_add_jump((TranslationBlock *)(long)(T0 & ~3), T0 & 3, tb);
+                    spin_unlock(&tb_lock);
+                }
+                tc_ptr = tb->tc_ptr;
+                env->current_tb = tb;
+                /* execute the generated code */
+                gen_func = (void *)tc_ptr;
+                RAWEx_ProfileStop(env, STATS_TLB_LOOKUP);
+
+#if defined(DEBUG) && defined(VBOX) && !defined(DEBUG_dmik)
+#if !defined(DEBUG_bird)
+                if (((env->hflags >> HF_CPL_SHIFT) & 3) == 0 && (env->hflags & HF_PE_MASK) && (env->cr[0] & CR0_PG_MASK))
+                {
+                    if(!(env->state & CPU_EMULATE_SINGLE_STEP))
+                    {
+                        Log(("EMR0: %VGv ESP=%VGv IF=%d TF=%d CPL=%d\n", env->eip, ESP, (env->eflags & IF_MASK) ? 1 : 0, (env->eflags & TF_MASK) ? 1 : 0, (env->hflags >> HF_CPL_SHIFT) & 3));
+                    }
+                }
+                else
+                if (((env->hflags >> HF_CPL_SHIFT) & 3) == 3 && (env->hflags & HF_PE_MASK) && (env->cr[0] & CR0_PG_MASK))
+                {
+                    if(!(env->state & CPU_EMULATE_SINGLE_STEP))
+                    {
+                        if(env->eflags & VM_MASK)
+                        {
+                            Log(("EMV86: %04X:%VGv IF=%d TF=%d CPL=%d CR0=%RGr\n", env->segs[R_CS].selector, env->eip, (env->eflags & IF_MASK) ? 1 : 0, (env->eflags & TF_MASK) ? 1 : 0, (env->hflags >> HF_CPL_SHIFT) & 3, env->cr[0]));
+                        }
+                        else
+                        {
+                            Log(("EMR3: %VGv ESP=%VGv IF=%d TF=%d CPL=%d IOPL=%d CR0=%RGr\n", env->eip, ESP, (env->eflags & IF_MASK) ? 1 : 0, (env->eflags & TF_MASK) ? 1 : 0, (env->hflags >> HF_CPL_SHIFT) & 3, ((env->eflags >> IOPL_SHIFT) & 3), env->cr[0]));
+                        }
+                    }
+                }
+                else
+                {
+                    /* Seriously slows down realmode booting. */
+                    LogFlow(("EMRM: %04X:%VGv SS:ESP=%04X:%VGv IF=%d TF=%d CPL=%d PE=%d PG=%d\n", env->segs[R_CS].selector, env->eip, env->segs[R_SS].selector, ESP, (env->eflags & IF_MASK) ? 1 : 0, (env->eflags & TF_MASK) ? 1 : 0, (env->hflags >> HF_CPL_SHIFT) & 3, env->cr[0] & X86_CR0_PE, env->cr[0] & X86_CR0_PG));
+                }
+#endif /* !DEBUG_bird */
+                if(env->state & CPU_EMULATE_SINGLE_STEP)
+                {
+#ifdef DEBUG_bird
+                    static int s_cTimes = 0;
+                    if (s_cTimes++ > 1000000)
+                    {
+                        RTLogPrintf("Enough stepping!\n");
+                        #if 0
+                        env->exception_index = EXCP_DEBUG;
+                        ret = env->exception_index;
+                        cpu_loop_exit();
+                        #else
+                        env->state &= ~CPU_EMULATE_SINGLE_STEP;
+                        #endif
+                    }
+#endif
+                    TMCpuTickPause(env->pVM);
+                    remR3DisasInstr(env, -1, NULL);
+                    TMCpuTickResume(env->pVM);
+                    if(emulate_single_instr(env) == -1)
+                    {
+                        Log(("emulate_single_instr failed for EIP=%VGv!!\n", env->eip));
+                    }
+                }
+                else
+                {
+                    RAWEx_ProfileStart(env, STATS_QEMU_RUN_EMULATED_CODE);
+                    gen_func();
+                    RAWEx_ProfileStop(env, STATS_QEMU_RUN_EMULATED_CODE);
+                }
+#else /* !DEBUG || !VBOX || DEBUG_dmik */
+
+                RAWEx_ProfileStart(env, STATS_QEMU_RUN_EMULATED_CODE);
+                gen_func();
+                RAWEx_ProfileStop(env, STATS_QEMU_RUN_EMULATED_CODE);
+
+#endif /* !DEBUG || !VBOX || DEBUG_dmik */
+                env->current_tb = NULL;
+                /* reset soft MMU for next block (it can currently
+                   only be set by a memory fault) */
+#if defined(TARGET_I386) && !defined(CONFIG_SOFTMMU)
+                if (env->hflags & HF_SOFTMMU_MASK) {
+                    env->hflags &= ~HF_SOFTMMU_MASK;
+                    /* do not allow linking to another block */
+                    T0 = 0;
+                }
+#endif
+            }
+        } else {
+            env_to_regs();
+        }
+#ifdef VBOX_HIGH_RES_TIMERS_HACK
+        /* NULL the current_tb here so cpu_interrupt() doesn't do
+           anything unnecessary (like crashing during emulate single instruction). */
+        env->current_tb = NULL;
+        TMTimerPoll(env1->pVM);
+#endif
+    } /* for(;;) */
+
+#if defined(TARGET_I386)
+    /* restore flags in standard format */
+    env->eflags = env->eflags | cc_table[CC_OP].compute_all() | (DF & DF_MASK);
+#else
+#error unsupported target CPU
+#endif
+#include "hostregs_helper.h"
+    return ret;
+}
+
+
+#else /* !VBOX */
+
+
+int cpu_exec(CPUState *env1)
+{
+#define DECLARE_HOST_REGS 1
+#include "hostregs_helper.h"
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+    int saved_i7;
+    target_ulong tmp_T0;
+#endif
+    int ret, interrupt_request;
+    void (*gen_func)(void);
+    TranslationBlock *tb;
+    uint8_t *tc_ptr;
+
+#if defined(TARGET_I386)
+    /* handle exit of HALTED state */
+    if (env1->hflags & HF_HALTED_MASK) {
+        /* disable halt condition */
+        if ((env1->interrupt_request & CPU_INTERRUPT_HARD) &&
+            (env1->eflags & IF_MASK)) {
+            env1->hflags &= ~HF_HALTED_MASK;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#elif defined(TARGET_PPC)
+    if (env1->halted) {
+        if (env1->msr[MSR_EE] &&
+            (env1->interrupt_request &
+             (CPU_INTERRUPT_HARD | CPU_INTERRUPT_TIMER))) {
+            env1->halted = 0;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#elif defined(TARGET_SPARC)
+    if (env1->halted) {
+        if ((env1->interrupt_request & CPU_INTERRUPT_HARD) &&
+            (env1->psret != 0)) {
+            env1->halted = 0;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#elif defined(TARGET_ARM)
+    if (env1->halted) {
+        /* An interrupt wakes the CPU even if the I and F CPSR bits are
+           set.  */
+        if (env1->interrupt_request
+            & (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD)) {
+            env1->halted = 0;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#elif defined(TARGET_MIPS)
+    if (env1->halted) {
+        if (env1->interrupt_request &
+            (CPU_INTERRUPT_HARD | CPU_INTERRUPT_TIMER)) {
+            env1->halted = 0;
+        } else {
+            return EXCP_HALTED;
+        }
+    }
+#endif
+
+    cpu_single_env = env1;
+
+    /* first we save global registers */
+#define SAVE_HOST_REGS 1
+#include "hostregs_helper.h"
+    env = env1;
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+    /* we also save i7 because longjmp may not restore it */
+    asm volatile ("mov %%i7, %0" : "=r" (saved_i7));
+#endif
+
+#if defined(TARGET_I386)
+    env_to_regs();
+    /* put eflags in CPU temporary format */
+    CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((env->eflags >> 10) & 1));
+    CC_OP = CC_OP_EFLAGS;
+    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+#elif defined(TARGET_ARM)
+#elif defined(TARGET_SPARC)
+#if defined(reg_REGWPTR)
+    saved_regwptr = REGWPTR;
+#endif
+#elif defined(TARGET_PPC)
+#elif defined(TARGET_M68K)
+    env->cc_op = CC_OP_FLAGS;
+    env->cc_dest = env->sr & 0xf;
+    env->cc_x = (env->sr >> 4) & 1;
+#elif defined(TARGET_MIPS)
+#elif defined(TARGET_SH4)
+    /* XXXXX */
+#else
+#error unsupported target CPU
+#endif
+#ifndef VBOX /* VBOX: We need to raise traps and suchlike from the outside. */
+    env->exception_index = -1;
+#endif
+
+    /* prepare setjmp context for exception handling */
+    for(;;) {
+        if (setjmp(env->jmp_env) == 0) {
+            env->current_tb = NULL;
+#ifdef VBOX
+            VMMR3Unlock(env->pVM);
+            VMMR3Lock(env->pVM);
+
+            /* Check for high priority requests first (like fatal
+               errors). */
+            if (env->interrupt_request & CPU_INTERRUPT_RC) {
+                env->exception_index = EXCP_RC;
+                ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_RC);
+                ret = env->exception_index;
+                cpu_loop_exit();
+            }
+#endif /* VBOX */
+
+
+            /* if an exception is pending, we execute it here */
+            if (env->exception_index >= 0) {
+                if (env->exception_index >= EXCP_INTERRUPT) {
+                    /* exit request from the cpu execution loop */
+                    ret = env->exception_index;
+                    break;
+                } else if (env->user_mode_only) {
+                    /* if user mode only, we simulate a fake exception
+                       which will be handled outside the cpu execution
+                       loop */
+#if defined(TARGET_I386)
+                    do_interrupt_user(env->exception_index,
+                                      env->exception_is_int,
+                                      env->error_code,
+                                      env->exception_next_eip);
+#endif
+                    ret = env->exception_index;
+                    break;
+                } else {
+#if defined(TARGET_I386)
+                    /* simulate a real cpu exception. On i386, it can
+                       trigger new exceptions, but we do not handle
+                       double or triple faults yet. */
+                    do_interrupt(env->exception_index,
+                                 env->exception_is_int,
+                                 env->error_code,
+                                 env->exception_next_eip, 0);
+#elif defined(TARGET_PPC)
+                    do_interrupt(env);
+#elif defined(TARGET_MIPS)
+                    do_interrupt(env);
+#elif defined(TARGET_SPARC)
+                    do_interrupt(env->exception_index);
+#elif defined(TARGET_ARM)
+                    do_interrupt(env);
+#elif defined(TARGET_SH4)
+		    do_interrupt(env);
+#endif
+                }
+                env->exception_index = -1;
+            }
+#ifdef USE_KQEMU
+            if (kqemu_is_ok(env) && env->interrupt_request == 0) {
+                int ret;
+                env->eflags = env->eflags | cc_table[CC_OP].compute_all() | (DF & DF_MASK);
+                ret = kqemu_cpu_exec(env);
+                /* put eflags in CPU temporary format */
+                CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+                DF = 1 - (2 * ((env->eflags >> 10) & 1));
+                CC_OP = CC_OP_EFLAGS;
+                env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+                if (ret == 1) {
+                    /* exception */
+                    longjmp(env->jmp_env, 1);
+                } else if (ret == 2) {
+                    /* softmmu execution needed */
+                } else {
+                    if (env->interrupt_request != 0) {
+                        /* hardware interrupt will be executed just after */
+                    } else {
+                        /* otherwise, we restart */
+                        longjmp(env->jmp_env, 1);
+                    }
+                }
+            }
+#endif
+
+            T0 = 0; /* force lookup of first TB */
+            for(;;) {
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                /* g1 can be modified by some libc? functions */
+                tmp_T0 = T0;
+#endif
+                interrupt_request = env->interrupt_request;
+                if (__builtin_expect(interrupt_request, 0)) {
+#ifdef VBOX
+                    /* Single instruction exec request, we execute it and return (one way or the other).
+                       The caller will always reschedule after doing this operation! */
+                    if (interrupt_request & CPU_INTERRUPT_SINGLE_INSTR)
+                    {
+                        /* not in flight are we? */
+                        if (!(env->interrupt_request & CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT))
+                        {
+                            ASMAtomicOrS32(&env->interrupt_request, CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT);
+                            env->exception_index = EXCP_SINGLE_INSTR;
+                            if (emulate_single_instr(env) == -1)
+                                AssertMsgFailed(("REM: emulate_single_instr failed for EIP=%VGv!!\n", env->eip));
+
+                            /* When we receive an external interrupt during execution of this single
+                               instruction, then we should stay here. We will leave when we're ready
+                               for raw-mode or when interrupted by pending EMT requests.  */
+                            interrupt_request = env->interrupt_request; /* reload this! */
+                            if (   !(interrupt_request & CPU_INTERRUPT_HARD)
+                                || !(env->eflags & IF_MASK)
+                                ||  (env->hflags & HF_INHIBIT_IRQ_MASK)
+                               )
+                            {
+                                env->exception_index = ret = EXCP_SINGLE_INSTR;
+                                cpu_loop_exit();
+                            }
+                        }
+                        env->exception_index = EXCP_SINGLE_INSTR;
+                        cpu_loop_exit();
+                    }
+
+                    RAWEx_ProfileStart(env, STATS_IRQ_HANDLING);
+#endif /* VBOX */
+#if defined(TARGET_I386)
+                    if ((interrupt_request & CPU_INTERRUPT_SMI) &&
+                        !(env->hflags & HF_SMM_MASK)) {
+                        env->interrupt_request &= ~CPU_INTERRUPT_SMI;
+                        do_smm_enter();
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                        tmp_T0 = 0;
+#else
+                        T0 = 0;
+#endif
+                    } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                        (env->eflags & IF_MASK) &&
+                        !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
+                        int intno;
+#if defined(VBOX)
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_HARD);
+#else
+                        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+#endif
+                        intno = cpu_get_pic_interrupt(env);
+                        if (loglevel & CPU_LOG_TB_IN_ASM) {
+                            fprintf(logfile, "Servicing hardware INT=0x%02x\n", intno);
+                        }
+#if defined(VBOX)
+                        if (intno >= 0)
+#endif
+                        do_interrupt(intno, 0, 0, 0, 1);
+                        /* ensure that no TB jump will be modified as
+                           the program flow was changed */
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                        tmp_T0 = 0;
+#else
+                        T0 = 0;
+#endif
+                    }
+#elif defined(TARGET_PPC)
+#if 0
+                    if ((interrupt_request & CPU_INTERRUPT_RESET)) {
+                        cpu_ppc_reset(env);
+                    }
+#endif
+                    if (msr_ee != 0) {
+                        if ((interrupt_request & CPU_INTERRUPT_HARD)) {
+			    /* Raise it */
+			    env->exception_index = EXCP_EXTERNAL;
+			    env->error_code = 0;
+                            do_interrupt(env);
+                            env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                            tmp_T0 = 0;
+#else
+                            T0 = 0;
+#endif
+                        } else if ((interrupt_request & CPU_INTERRUPT_TIMER)) {
+                            /* Raise it */
+                            env->exception_index = EXCP_DECR;
+                            env->error_code = 0;
+                            do_interrupt(env);
+                            env->interrupt_request &= ~CPU_INTERRUPT_TIMER;
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                            tmp_T0 = 0;
+#else
+                            T0 = 0;
+#endif
+                        }
+                    }
+#elif defined(TARGET_MIPS)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                        (env->CP0_Status & (1 << CP0St_IE)) &&
+                        (env->CP0_Status & env->CP0_Cause & 0x0000FF00) &&
+                        !(env->hflags & MIPS_HFLAG_EXL) &&
+                        !(env->hflags & MIPS_HFLAG_ERL) &&
+                        !(env->hflags & MIPS_HFLAG_DM)) {
+                        /* Raise it */
+                        env->exception_index = EXCP_EXT_INTERRUPT;
+                        env->error_code = 0;
+                        do_interrupt(env);
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                        tmp_T0 = 0;
+#else
+                        T0 = 0;
+#endif
+                    }
+#elif defined(TARGET_SPARC)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+			(env->psret != 0)) {
+			int pil = env->interrupt_index & 15;
+			int type = env->interrupt_index & 0xf0;
+
+			if (((type == TT_EXTINT) &&
+			     (pil == 15 || pil > env->psrpil)) ||
+			    type != TT_EXTINT) {
+			    env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+			    do_interrupt(env->interrupt_index);
+			    env->interrupt_index = 0;
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                            tmp_T0 = 0;
+#else
+                            T0 = 0;
+#endif
+			}
+		    } else if (interrupt_request & CPU_INTERRUPT_TIMER) {
+			//do_interrupt(0, 0, 0, 0, 0);
+			env->interrupt_request &= ~CPU_INTERRUPT_TIMER;
+		    } else if (interrupt_request & CPU_INTERRUPT_HALT) {
+			env->interrupt_request &= ~CPU_INTERRUPT_HALT;
+			env->halted = 1;
+			env->exception_index = EXCP_HLT;
+			cpu_loop_exit();
+                    }
+#elif defined(TARGET_ARM)
+                    if (interrupt_request & CPU_INTERRUPT_FIQ
+                        && !(env->uncached_cpsr & CPSR_F)) {
+                        env->exception_index = EXCP_FIQ;
+                        do_interrupt(env);
+                    }
+                    if (interrupt_request & CPU_INTERRUPT_HARD
+                        && !(env->uncached_cpsr & CPSR_I)) {
+                        env->exception_index = EXCP_IRQ;
+                        do_interrupt(env);
+                    }
+#elif defined(TARGET_SH4)
+		    /* XXXXX */
+#endif
+                   /* Don't use the cached interupt_request value,
+                      do_interrupt may have updated the EXITTB flag. */
+                    if (env->interrupt_request & CPU_INTERRUPT_EXITTB) {
+#if defined(VBOX)
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_EXITTB);
+#else
+                        env->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
+#endif
+                        /* ensure that no TB jump will be modified as
+                           the program flow was changed */
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                        tmp_T0 = 0;
+#else
+                        T0 = 0;
+#endif
+                    }
+#ifdef VBOX
+                    RAWEx_ProfileStop(env, STATS_IRQ_HANDLING);
+#endif
+                    if (interrupt_request & CPU_INTERRUPT_EXIT) {
+#if defined(VBOX)
+                        env->exception_index = EXCP_INTERRUPT;
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_EXIT);
+#else
+                        env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
+                        env->exception_index = EXCP_INTERRUPT;
+#endif
+                        cpu_loop_exit();
+                    }
+#if defined(VBOX)
+                    if (interrupt_request & CPU_INTERRUPT_RC) {
+                        env->exception_index = EXCP_RC;
+                        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_RC);
+                        cpu_loop_exit();
+                    }
+#endif
+                }
+#ifdef DEBUG_EXEC
+                if ((loglevel & CPU_LOG_TB_CPU)) {
+#if defined(TARGET_I386)
+                    /* restore flags in standard format */
+#ifdef reg_EAX
+                    env->regs[R_EAX] = EAX;
+#endif
+#ifdef reg_EBX
+                    env->regs[R_EBX] = EBX;
+#endif
+#ifdef reg_ECX
+                    env->regs[R_ECX] = ECX;
+#endif
+#ifdef reg_EDX
+                    env->regs[R_EDX] = EDX;
+#endif
+#ifdef reg_ESI
+                    env->regs[R_ESI] = ESI;
+#endif
+#ifdef reg_EDI
+                    env->regs[R_EDI] = EDI;
+#endif
+#ifdef reg_EBP
+                    env->regs[R_EBP] = EBP;
+#endif
+#ifdef reg_ESP
+                    env->regs[R_ESP] = ESP;
+#endif
+                    env->eflags = env->eflags | cc_table[CC_OP].compute_all() | (DF & DF_MASK);
+                    cpu_dump_state(env, logfile, fprintf, X86_DUMP_CCOP);
+                    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+#elif defined(TARGET_ARM)
+                    cpu_dump_state(env, logfile, fprintf, 0);
+#elif defined(TARGET_SPARC)
+		    REGWPTR = env->regbase + (env->cwp * 16);
+		    env->regwptr = REGWPTR;
+                    cpu_dump_state(env, logfile, fprintf, 0);
+#elif defined(TARGET_PPC)
+                    cpu_dump_state(env, logfile, fprintf, 0);
+#elif defined(TARGET_M68K)
+                    cpu_m68k_flush_flags(env, env->cc_op);
+                    env->cc_op = CC_OP_FLAGS;
+                    env->sr = (env->sr & 0xffe0)
+                              | env->cc_dest | (env->cc_x << 4);
+                    cpu_dump_state(env, logfile, fprintf, 0);
+#elif defined(TARGET_MIPS)
+                    cpu_dump_state(env, logfile, fprintf, 0);
+#elif defined(TARGET_SH4)
+		    cpu_dump_state(env, logfile, fprintf, 0);
+#else
+#error unsupported target CPU
+#endif
+                }
+#endif
+#ifdef VBOX
+                /*
+                 * Check if we the CPU state allows us to execute the code in raw-mode.
+                 */
+                RAWEx_ProfileStart(env, STATS_RAW_CHECK);
+                if (remR3CanExecuteRaw(env,
+                                       env->eip + env->segs[R_CS].base,
+                                       env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK))
+                                       flags, &env->exception_index))
+                {
+                    RAWEx_ProfileStop(env, STATS_RAW_CHECK);
+                    ret = env->exception_index;
+                    cpu_loop_exit();
+                }
+                RAWEx_ProfileStop(env, STATS_RAW_CHECK);
+#endif /* VBOX */
+                tb = tb_find_fast();
+#ifdef DEBUG_EXEC
+                if ((loglevel & CPU_LOG_EXEC)) {
+                    fprintf(logfile, "Trace 0x%08lx [" TARGET_FMT_lx "] %s\n",
+                            (long)tb->tc_ptr, tb->pc,
+                            lookup_symbol(tb->pc));
+                }
+#endif
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+                T0 = tmp_T0;
+#endif
+                /* see if we can patch the calling TB. When the TB
+                   spans two pages, we cannot safely do a direct
+                   jump. */
+                {
+                    if (T0 != 0 &&
+#if USE_KQEMU
+                        (env->kqemu_enabled != 2) &&
+#endif
+#ifdef VBOX
+                        !(tb->cflags & CF_RAW_MODE) &&
+#endif
+                        tb->page_addr[1] == -1
+#if defined(TARGET_I386) && defined(USE_CODE_COPY)
+                    && (tb->cflags & CF_CODE_COPY) ==
+                    (((TranslationBlock *)(T0 & ~3))->cflags & CF_CODE_COPY)
+#endif
+                    ) {
+                    spin_lock(&tb_lock);
+                    tb_add_jump((TranslationBlock *)(long)(T0 & ~3), T0 & 3, tb);
+#if defined(USE_CODE_COPY)
+                    /* propagates the FP use info */
+                    ((TranslationBlock *)(T0 & ~3))->cflags |=
+                        (tb->cflags & CF_FP_USED);
+#endif
+                    spin_unlock(&tb_lock);
+                }
+                }
+                tc_ptr = tb->tc_ptr;
+                env->current_tb = tb;
+                /* execute the generated code */
+                gen_func = (void *)tc_ptr;
+#if defined(__sparc__)
+                __asm__ __volatile__("call	%0\n\t"
+                                     "mov	%%o7,%%i0"
+                                     : /* no outputs */
+                                     : "r" (gen_func)
+                                     : "i0", "i1", "i2", "i3", "i4", "i5",
+                                       "l0", "l1", "l2", "l3", "l4", "l5",
+                                       "l6", "l7");
+#elif defined(__arm__)
+                asm volatile ("mov pc, %0\n\t"
+                              ".global exec_loop\n\t"
+                              "exec_loop:\n\t"
+                              : /* no outputs */
+                              : "r" (gen_func)
+                              : "r1", "r2", "r3", "r8", "r9", "r10", "r12", "r14");
+#elif defined(TARGET_I386) && defined(USE_CODE_COPY)
+{
+    if (!(tb->cflags & CF_CODE_COPY)) {
+        if ((tb->cflags & CF_FP_USED) && env->native_fp_regs) {
+            save_native_fp_state(env);
+        }
+        gen_func();
+    } else {
+        if ((tb->cflags & CF_FP_USED) && !env->native_fp_regs) {
+            restore_native_fp_state(env);
+        }
+        /* we work with native eflags */
+        CC_SRC = cc_table[CC_OP].compute_all();
+        CC_OP = CC_OP_EFLAGS;
+        asm(".globl exec_loop\n"
+            "\n"
+            "debug1:\n"
+            "    pushl %%ebp\n"
+            "    fs movl %10, %9\n"
+            "    fs movl %11, %%eax\n"
+            "    andl $0x400, %%eax\n"
+            "    fs orl %8, %%eax\n"
+            "    pushl %%eax\n"
+            "    popf\n"
+            "    fs movl %%esp, %12\n"
+            "    fs movl %0, %%eax\n"
+            "    fs movl %1, %%ecx\n"
+            "    fs movl %2, %%edx\n"
+            "    fs movl %3, %%ebx\n"
+            "    fs movl %4, %%esp\n"
+            "    fs movl %5, %%ebp\n"
+            "    fs movl %6, %%esi\n"
+            "    fs movl %7, %%edi\n"
+            "    fs jmp *%9\n"
+            "exec_loop:\n"
+            "    fs movl %%esp, %4\n"
+            "    fs movl %12, %%esp\n"
+            "    fs movl %%eax, %0\n"
+            "    fs movl %%ecx, %1\n"
+            "    fs movl %%edx, %2\n"
+            "    fs movl %%ebx, %3\n"
+            "    fs movl %%ebp, %5\n"
+            "    fs movl %%esi, %6\n"
+            "    fs movl %%edi, %7\n"
+            "    pushf\n"
+            "    popl %%eax\n"
+            "    movl %%eax, %%ecx\n"
+            "    andl $0x400, %%ecx\n"
+            "    shrl $9, %%ecx\n"
+            "    andl $0x8d5, %%eax\n"
+            "    fs movl %%eax, %8\n"
+            "    movl $1, %%eax\n"
+            "    subl %%ecx, %%eax\n"
+            "    fs movl %%eax, %11\n"
+            "    fs movl %9, %%ebx\n" /* get T0 value */
+            "    popl %%ebp\n"
+            :
+            : "m" (*(uint8_t *)offsetof(CPUState, regs[0])),
+            "m" (*(uint8_t *)offsetof(CPUState, regs[1])),
+            "m" (*(uint8_t *)offsetof(CPUState, regs[2])),
+            "m" (*(uint8_t *)offsetof(CPUState, regs[3])),
+            "m" (*(uint8_t *)offsetof(CPUState, regs[4])),
+            "m" (*(uint8_t *)offsetof(CPUState, regs[5])),
+            "m" (*(uint8_t *)offsetof(CPUState, regs[6])),
+            "m" (*(uint8_t *)offsetof(CPUState, regs[7])),
+            "m" (*(uint8_t *)offsetof(CPUState, cc_src)),
+            "m" (*(uint8_t *)offsetof(CPUState, tmp0)),
+            "a" (gen_func),
+            "m" (*(uint8_t *)offsetof(CPUState, df)),
+            "m" (*(uint8_t *)offsetof(CPUState, saved_esp))
+            : "%ecx", "%edx"
+            );
+    }
+}
+#elif defined(__ia64)
+		struct fptr {
+			void *ip;
+			void *gp;
+		} fp;
+
+		fp.ip = tc_ptr;
+		fp.gp = code_gen_buffer + 2 * (1 << 20);
+		(*(void (*)(void)) &fp)();
+#else
+#if defined(DEBUG) && defined(VBOX) && !defined(DEBUG_dmik)
+#if !defined(DEBUG_bird)
+                if (((env->hflags >> HF_CPL_SHIFT) & 3) == 0 && (env->hflags & HF_PE_MASK) && (env->cr[0] & CR0_PG_MASK))
+                {
+                    if(!(env->state & CPU_EMULATE_SINGLE_STEP))
+                    {
+                        Log(("EMR0: %VGv IF=%d TF=%d CPL=%d\n", env->eip, (env->eflags & IF_MASK) ? 1 : 0, (env->eflags & TF_MASK) ? 1 : 0, (env->hflags >> HF_CPL_SHIFT) & 3));
+                    }
+                }
+                else
+                if (((env->hflags >> HF_CPL_SHIFT) & 3) == 3 && (env->hflags & HF_PE_MASK) && (env->cr[0] & CR0_PG_MASK))
+                {
+                    if(!(env->state & CPU_EMULATE_SINGLE_STEP))
+                    {
+                        if(env->eflags & VM_MASK)
+                        {
+                            Log(("EMV86: %VGv IF=%d TF=%d CPL=%d flags=%08X CR0=%RGr\n", env->eip, (env->eflags & IF_MASK) ? 1 : 0, (env->eflags & TF_MASK) ? 1 : 0, (env->hflags >> HF_CPL_SHIFT) & 3, flags, env->cr[0]));
+                        }
+                        else
+                        {
+                            Log(("EMR3: %VGv IF=%d TF=%d CPL=%d IOPL=%d flags=%08X CR0=%RGr\n", env->eip, (env->eflags & IF_MASK) ? 1 : 0, (env->eflags & TF_MASK) ? 1 : 0, (env->hflags >> HF_CPL_SHIFT) & 3, ((env->eflags >> IOPL_SHIFT) & 3), flags, env->cr[0]));
+                        }
+                    }
+                }
+#endif /* !DEBUG_bird */
+                if(env->state & CPU_EMULATE_SINGLE_STEP)
+                {
+#ifdef DEBUG_bird
+                    static int s_cTimes = 0;
+                    if (s_cTimes++ > 1000000) /* 1 million */
+                    {
+                        RTLogPrintf("Enough stepping!\n");
+                        #if 0
+                        env->exception_index = EXCP_DEBUG;
+                        cpu_loop_exit();
+                        #else
+                        env->state &= ~CPU_EMULATE_SINGLE_STEP;
+                        #endif
+                    }
+#endif
+                    TMCpuTickPause(env->pVM);
+                    remR3DisasInstr(env, -1, NULL);
+                    TMCpuTickResume(env->pVM);
+                    if(emulate_single_instr(env) == -1)
+                    {
+                        printf("emulate_single_instr failed for EIP=%VGv!!\n", env->eip);
+                    }
+                }
+                else
+                {
+                    RAWEx_ProfileStart(env, STATS_QEMU_RUN_EMULATED_CODE);
+                    gen_func();
+                    RAWEx_ProfileStop(env, STATS_QEMU_RUN_EMULATED_CODE);
+                }
+#else /* !DEBUG || !VBOX || DEBUG_dmik */
+
+#ifdef VBOX
+                RAWEx_ProfileStart(env, STATS_QEMU_RUN_EMULATED_CODE);
+                gen_func();
+                RAWEx_ProfileStop(env, STATS_QEMU_RUN_EMULATED_CODE);
+#else /* !VBOX */
+                gen_func();
+#endif /* !VBOX */
+
+#endif /* !DEBUG || !VBOX || DEBUG_dmik */
+#endif
+                env->current_tb = NULL;
+                /* reset soft MMU for next block (it can currently
+                   only be set by a memory fault) */
+#if defined(TARGET_I386) && !defined(CONFIG_SOFTMMU)
+                if (env->hflags & HF_SOFTMMU_MASK) {
+                    env->hflags &= ~HF_SOFTMMU_MASK;
+                    /* do not allow linking to another block */
+                    T0 = 0;
+                }
+#endif
+#if defined(USE_KQEMU)
+#define MIN_CYCLE_BEFORE_SWITCH (100 * 1000)
+                if (kqemu_is_ok(env) &&
+                    (cpu_get_time_fast() - env->last_io_time) >= MIN_CYCLE_BEFORE_SWITCH) {
+                    cpu_loop_exit();
+                }
+#endif
+            }
+        } else {
+            env_to_regs();
+        }
+    } /* for(;;) */
+
+
+#if defined(TARGET_I386)
+#if defined(USE_CODE_COPY)
+    if (env->native_fp_regs) {
+        save_native_fp_state(env);
+    }
+#endif
+    /* restore flags in standard format */
+    env->eflags = env->eflags | cc_table[CC_OP].compute_all() | (DF & DF_MASK);
+#elif defined(TARGET_ARM)
+    /* XXX: Save/restore host fpu exception state?.  */
+#elif defined(TARGET_SPARC)
+#if defined(reg_REGWPTR)
+    REGWPTR = saved_regwptr;
+#endif
+#elif defined(TARGET_PPC)
+#elif defined(TARGET_M68K)
+    cpu_m68k_flush_flags(env, env->cc_op);
+    env->cc_op = CC_OP_FLAGS;
+    env->sr = (env->sr & 0xffe0)
+              | env->cc_dest | (env->cc_x << 4);
+#elif defined(TARGET_MIPS)
+#elif defined(TARGET_SH4)
+    /* XXXXX */
+#else
+#error unsupported target CPU
+#endif
+#if defined(__sparc__) && !defined(HOST_SOLARIS)
+    asm volatile ("mov %0, %%i7" : : "r" (saved_i7));
+#endif
+#include "hostregs_helper.h"
+
+    /* fail safe : never use cpu_single_env outside cpu_exec() */
+    cpu_single_env = NULL;
+    return ret;
+}
+
+#endif /* !VBOX */
+
+/* must only be called from the generated code as an exception can be
+   generated */
+void tb_invalidate_page_range(target_ulong start, target_ulong end)
+{
+    /* XXX: cannot enable it yet because it yields to MMU exception
+       where NIP != read address on PowerPC */
+#if 0
+    target_ulong phys_addr;
+    phys_addr = get_phys_addr_code(env, start);
+    tb_invalidate_phys_page_range(phys_addr, phys_addr + end - start, 0);
+#endif
+}
+
+#if defined(TARGET_I386) && defined(CONFIG_USER_ONLY)
+
+void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector)
+{
+    CPUX86State *saved_env;
+
+    saved_env = env;
+    env = s;
+    if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+        selector &= 0xffff;
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                               (selector << 4), 0xffff, 0);
+    } else {
+        load_seg(seg_reg, selector);
+    }
+    env = saved_env;
+}
+
+void cpu_x86_fsave(CPUX86State *s, uint8_t *ptr, int data32)
+{
+    CPUX86State *saved_env;
+
+    saved_env = env;
+    env = s;
+
+    helper_fsave((target_ulong)ptr, data32);
+
+    env = saved_env;
+}
+
+void cpu_x86_frstor(CPUX86State *s, uint8_t *ptr, int data32)
+{
+    CPUX86State *saved_env;
+
+    saved_env = env;
+    env = s;
+
+    helper_frstor((target_ulong)ptr, data32);
+
+    env = saved_env;
+}
+
+#endif /* TARGET_I386 */
+
+#if !defined(CONFIG_SOFTMMU)
+
+#if defined(TARGET_I386)
+
+/* 'pc' is the host PC at which the exception was raised. 'address' is
+   the effective address of the memory exception. 'is_write' is 1 if a
+   write caused the exception and otherwise 0'. 'old_set' is the
+   signal set which should be restored */
+static inline int handle_cpu_signal(unsigned long pc, unsigned long address,
+                                    int is_write, sigset_t *old_set,
+                                    void *puc)
+{
+    TranslationBlock *tb;
+    int ret;
+
+    if (cpu_single_env)
+        env = cpu_single_env; /* XXX: find a correct solution for multithread */
+#if defined(DEBUG_SIGNAL)
+    qemu_printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+                pc, address, is_write, *(unsigned long *)old_set);
+#endif
+    /* XXX: locking issue */
+    if (is_write && page_unprotect(h2g(address), pc, puc)) {
+        return 1;
+    }
+
+    /* see if it is an MMU fault */
+    ret = cpu_x86_handle_mmu_fault(env, address, is_write,
+                                   ((env->hflags & HF_CPL_MASK) == 3), 0);
+    if (ret < 0)
+        return 0; /* not an MMU fault */
+    if (ret == 0)
+        return 1; /* the MMU fault was handled without causing real CPU fault */
+    /* now we have a real cpu fault */
+    tb = tb_find_pc(pc);
+    if (tb) {
+        /* the PC is inside the translated code. It means that we have
+           a virtual CPU fault */
+        cpu_restore_state(tb, env, pc, puc);
+    }
+    if (ret == 1) {
+#if 0
+        printf("PF exception: EIP=0x%VGv CR2=0x%VGv error=0x%x\n",
+               env->eip, env->cr[2], env->error_code);
+#endif
+        /* we restore the process signal mask as the sigreturn should
+           do it (XXX: use sigsetjmp) */
+        sigprocmask(SIG_SETMASK, old_set, NULL);
+        raise_exception_err(env->exception_index, env->error_code);
+    } else {
+        /* activate soft MMU for this block */
+        env->hflags |= HF_SOFTMMU_MASK;
+        cpu_resume_from_signal(env, puc);
+    }
+    /* never comes here */
+    return 1;
+}
+
+#elif defined(TARGET_ARM)
+static inline int handle_cpu_signal(unsigned long pc, unsigned long address,
+                                    int is_write, sigset_t *old_set,
+                                    void *puc)
+{
+    TranslationBlock *tb;
+    int ret;
+
+    if (cpu_single_env)
+        env = cpu_single_env; /* XXX: find a correct solution for multithread */
+#if defined(DEBUG_SIGNAL)
+    printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+           pc, address, is_write, *(unsigned long *)old_set);
+#endif
+    /* XXX: locking issue */
+    if (is_write && page_unprotect(h2g(address), pc, puc)) {
+        return 1;
+    }
+    /* see if it is an MMU fault */
+    ret = cpu_arm_handle_mmu_fault(env, address, is_write, 1, 0);
+    if (ret < 0)
+        return 0; /* not an MMU fault */
+    if (ret == 0)
+        return 1; /* the MMU fault was handled without causing real CPU fault */
+    /* now we have a real cpu fault */
+    tb = tb_find_pc(pc);
+    if (tb) {
+        /* the PC is inside the translated code. It means that we have
+           a virtual CPU fault */
+        cpu_restore_state(tb, env, pc, puc);
+    }
+    /* we restore the process signal mask as the sigreturn should
+       do it (XXX: use sigsetjmp) */
+    sigprocmask(SIG_SETMASK, old_set, NULL);
+    cpu_loop_exit();
+}
+#elif defined(TARGET_SPARC)
+static inline int handle_cpu_signal(unsigned long pc, unsigned long address,
+                                    int is_write, sigset_t *old_set,
+                                    void *puc)
+{
+    TranslationBlock *tb;
+    int ret;
+
+    if (cpu_single_env)
+        env = cpu_single_env; /* XXX: find a correct solution for multithread */
+#if defined(DEBUG_SIGNAL)
+    printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+           pc, address, is_write, *(unsigned long *)old_set);
+#endif
+    /* XXX: locking issue */
+    if (is_write && page_unprotect(h2g(address), pc, puc)) {
+        return 1;
+    }
+    /* see if it is an MMU fault */
+    ret = cpu_sparc_handle_mmu_fault(env, address, is_write, 1, 0);
+    if (ret < 0)
+        return 0; /* not an MMU fault */
+    if (ret == 0)
+        return 1; /* the MMU fault was handled without causing real CPU fault */
+    /* now we have a real cpu fault */
+    tb = tb_find_pc(pc);
+    if (tb) {
+        /* the PC is inside the translated code. It means that we have
+           a virtual CPU fault */
+        cpu_restore_state(tb, env, pc, puc);
+    }
+    /* we restore the process signal mask as the sigreturn should
+       do it (XXX: use sigsetjmp) */
+    sigprocmask(SIG_SETMASK, old_set, NULL);
+    cpu_loop_exit();
+}
+#elif defined (TARGET_PPC)
+static inline int handle_cpu_signal(unsigned long pc, unsigned long address,
+                                    int is_write, sigset_t *old_set,
+                                    void *puc)
+{
+    TranslationBlock *tb;
+    int ret;
+
+    if (cpu_single_env)
+        env = cpu_single_env; /* XXX: find a correct solution for multithread */
+#if defined(DEBUG_SIGNAL)
+    printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+           pc, address, is_write, *(unsigned long *)old_set);
+#endif
+    /* XXX: locking issue */
+    if (is_write && page_unprotect(h2g(address), pc, puc)) {
+        return 1;
+    }
+
+    /* see if it is an MMU fault */
+    ret = cpu_ppc_handle_mmu_fault(env, address, is_write, msr_pr, 0);
+    if (ret < 0)
+        return 0; /* not an MMU fault */
+    if (ret == 0)
+        return 1; /* the MMU fault was handled without causing real CPU fault */
+
+    /* now we have a real cpu fault */
+    tb = tb_find_pc(pc);
+    if (tb) {
+        /* the PC is inside the translated code. It means that we have
+           a virtual CPU fault */
+        cpu_restore_state(tb, env, pc, puc);
+    }
+    if (ret == 1) {
+#if 0
+        printf("PF exception: NIP=0x%08x error=0x%x %p\n",
+               env->nip, env->error_code, tb);
+#endif
+    /* we restore the process signal mask as the sigreturn should
+       do it (XXX: use sigsetjmp) */
+        sigprocmask(SIG_SETMASK, old_set, NULL);
+        do_raise_exception_err(env->exception_index, env->error_code);
+    } else {
+        /* activate soft MMU for this block */
+        cpu_resume_from_signal(env, puc);
+    }
+    /* never comes here */
+    return 1;
+}
+
+#elif defined(TARGET_M68K)
+static inline int handle_cpu_signal(unsigned long pc, unsigned long address,
+                                    int is_write, sigset_t *old_set,
+                                    void *puc)
+{
+    TranslationBlock *tb;
+    int ret;
+
+    if (cpu_single_env)
+        env = cpu_single_env; /* XXX: find a correct solution for multithread */
+#if defined(DEBUG_SIGNAL)
+    printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+           pc, address, is_write, *(unsigned long *)old_set);
+#endif
+    /* XXX: locking issue */
+    if (is_write && page_unprotect(address, pc, puc)) {
+        return 1;
+    }
+    /* see if it is an MMU fault */
+    ret = cpu_m68k_handle_mmu_fault(env, address, is_write, 1, 0);
+    if (ret < 0)
+        return 0; /* not an MMU fault */
+    if (ret == 0)
+        return 1; /* the MMU fault was handled without causing real CPU fault */
+    /* now we have a real cpu fault */
+    tb = tb_find_pc(pc);
+    if (tb) {
+        /* the PC is inside the translated code. It means that we have
+           a virtual CPU fault */
+        cpu_restore_state(tb, env, pc, puc);
+    }
+    /* we restore the process signal mask as the sigreturn should
+       do it (XXX: use sigsetjmp) */
+    sigprocmask(SIG_SETMASK, old_set, NULL);
+    cpu_loop_exit();
+    /* never comes here */
+    return 1;
+}
+
+#elif defined (TARGET_MIPS)
+static inline int handle_cpu_signal(unsigned long pc, unsigned long address,
+                                    int is_write, sigset_t *old_set,
+                                    void *puc)
+{
+    TranslationBlock *tb;
+    int ret;
+
+    if (cpu_single_env)
+        env = cpu_single_env; /* XXX: find a correct solution for multithread */
+#if defined(DEBUG_SIGNAL)
+    printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+           pc, address, is_write, *(unsigned long *)old_set);
+#endif
+    /* XXX: locking issue */
+    if (is_write && page_unprotect(h2g(address), pc, puc)) {
+        return 1;
+    }
+
+    /* see if it is an MMU fault */
+    ret = cpu_mips_handle_mmu_fault(env, address, is_write, 1, 0);
+    if (ret < 0)
+        return 0; /* not an MMU fault */
+    if (ret == 0)
+        return 1; /* the MMU fault was handled without causing real CPU fault */
+
+    /* now we have a real cpu fault */
+    tb = tb_find_pc(pc);
+    if (tb) {
+        /* the PC is inside the translated code. It means that we have
+           a virtual CPU fault */
+        cpu_restore_state(tb, env, pc, puc);
+    }
+    if (ret == 1) {
+#if 0
+        printf("PF exception: NIP=0x%08x error=0x%x %p\n",
+               env->nip, env->error_code, tb);
+#endif
+    /* we restore the process signal mask as the sigreturn should
+       do it (XXX: use sigsetjmp) */
+        sigprocmask(SIG_SETMASK, old_set, NULL);
+        do_raise_exception_err(env->exception_index, env->error_code);
+    } else {
+        /* activate soft MMU for this block */
+        cpu_resume_from_signal(env, puc);
+    }
+    /* never comes here */
+    return 1;
+}
+
+#elif defined (TARGET_SH4)
+static inline int handle_cpu_signal(unsigned long pc, unsigned long address,
+                                    int is_write, sigset_t *old_set,
+                                    void *puc)
+{
+    TranslationBlock *tb;
+    int ret;
+
+    if (cpu_single_env)
+        env = cpu_single_env; /* XXX: find a correct solution for multithread */
+#if defined(DEBUG_SIGNAL)
+    printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+           pc, address, is_write, *(unsigned long *)old_set);
+#endif
+    /* XXX: locking issue */
+    if (is_write && page_unprotect(h2g(address), pc, puc)) {
+        return 1;
+    }
+
+    /* see if it is an MMU fault */
+    ret = cpu_sh4_handle_mmu_fault(env, address, is_write, 1, 0);
+    if (ret < 0)
+        return 0; /* not an MMU fault */
+    if (ret == 0)
+        return 1; /* the MMU fault was handled without causing real CPU fault */
+
+    /* now we have a real cpu fault */
+    tb = tb_find_pc(pc);
+    if (tb) {
+        /* the PC is inside the translated code. It means that we have
+           a virtual CPU fault */
+        cpu_restore_state(tb, env, pc, puc);
+    }
+#if 0
+        printf("PF exception: NIP=0x%08x error=0x%x %p\n",
+               env->nip, env->error_code, tb);
+#endif
+    /* we restore the process signal mask as the sigreturn should
+       do it (XXX: use sigsetjmp) */
+    sigprocmask(SIG_SETMASK, old_set, NULL);
+    cpu_loop_exit();
+    /* never comes here */
+    return 1;
+}
+#else
+#error unsupported target CPU
+#endif
+
+#if defined(__i386__)
+
+#if defined(USE_CODE_COPY)
+static void cpu_send_trap(unsigned long pc, int trap,
+                          struct ucontext *uc)
+{
+    TranslationBlock *tb;
+
+    if (cpu_single_env)
+        env = cpu_single_env; /* XXX: find a correct solution for multithread */
+    /* now we have a real cpu fault */
+    tb = tb_find_pc(pc);
+    if (tb) {
+        /* the PC is inside the translated code. It means that we have
+           a virtual CPU fault */
+        cpu_restore_state(tb, env, pc, uc);
+    }
+    sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL);
+    raise_exception_err(trap, env->error_code);
+}
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    struct ucontext *uc = puc;
+    unsigned long pc;
+    int trapno;
+
+#ifndef REG_EIP
+/* for glibc 2.1 */
+#define REG_EIP    EIP
+#define REG_ERR    ERR
+#define REG_TRAPNO TRAPNO
+#endif
+    pc = uc->uc_mcontext.gregs[REG_EIP];
+    trapno = uc->uc_mcontext.gregs[REG_TRAPNO];
+#if defined(TARGET_I386) && defined(USE_CODE_COPY)
+    if (trapno == 0x00 || trapno == 0x05) {
+        /* send division by zero or bound exception */
+        cpu_send_trap(pc, trapno, uc);
+        return 1;
+    } else
+#endif
+        return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                                 trapno == 0xe ?
+                                 (uc->uc_mcontext.gregs[REG_ERR] >> 1) & 1 : 0,
+                                 &uc->uc_sigmask, puc);
+}
+
+#elif defined(__x86_64__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    struct ucontext *uc = puc;
+    unsigned long pc;
+
+    pc = uc->uc_mcontext.gregs[REG_RIP];
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe ?
+                             (uc->uc_mcontext.gregs[REG_ERR] >> 1) & 1 : 0,
+                             &uc->uc_sigmask, puc);
+}
+
+#elif defined(__powerpc__)
+
+/***********************************************************************
+ * signal context platform-specific definitions
+ * From Wine
+ */
+#ifdef linux
+/* All Registers access - only for local access */
+# define REG_sig(reg_name, context)		((context)->uc_mcontext.regs->reg_name)
+/* Gpr Registers access  */
+# define GPR_sig(reg_num, context)		REG_sig(gpr[reg_num], context)
+# define IAR_sig(context)			REG_sig(nip, context)	/* Program counter */
+# define MSR_sig(context)			REG_sig(msr, context)   /* Machine State Register (Supervisor) */
+# define CTR_sig(context)			REG_sig(ctr, context)   /* Count register */
+# define XER_sig(context)			REG_sig(xer, context) /* User's integer exception register */
+# define LR_sig(context)			REG_sig(link, context) /* Link register */
+# define CR_sig(context)			REG_sig(ccr, context) /* Condition register */
+/* Float Registers access  */
+# define FLOAT_sig(reg_num, context)		(((double*)((char*)((context)->uc_mcontext.regs+48*4)))[reg_num])
+# define FPSCR_sig(context)			(*(int*)((char*)((context)->uc_mcontext.regs+(48+32*2)*4)))
+/* Exception Registers access */
+# define DAR_sig(context)			REG_sig(dar, context)
+# define DSISR_sig(context)			REG_sig(dsisr, context)
+# define TRAP_sig(context)			REG_sig(trap, context)
+#endif /* linux */
+
+#ifdef __APPLE__
+# include <sys/ucontext.h>
+typedef struct ucontext SIGCONTEXT;
+/* All Registers access - only for local access */
+# define REG_sig(reg_name, context)		((context)->uc_mcontext->ss.reg_name)
+# define FLOATREG_sig(reg_name, context)	((context)->uc_mcontext->fs.reg_name)
+# define EXCEPREG_sig(reg_name, context)	((context)->uc_mcontext->es.reg_name)
+# define VECREG_sig(reg_name, context)		((context)->uc_mcontext->vs.reg_name)
+/* Gpr Registers access */
+# define GPR_sig(reg_num, context)		REG_sig(r##reg_num, context)
+# define IAR_sig(context)			REG_sig(srr0, context)	/* Program counter */
+# define MSR_sig(context)			REG_sig(srr1, context)  /* Machine State Register (Supervisor) */
+# define CTR_sig(context)			REG_sig(ctr, context)
+# define XER_sig(context)			REG_sig(xer, context) /* Link register */
+# define LR_sig(context)			REG_sig(lr, context)  /* User's integer exception register */
+# define CR_sig(context)			REG_sig(cr, context)  /* Condition register */
+/* Float Registers access */
+# define FLOAT_sig(reg_num, context)		FLOATREG_sig(fpregs[reg_num], context)
+# define FPSCR_sig(context)			((double)FLOATREG_sig(fpscr, context))
+/* Exception Registers access */
+# define DAR_sig(context)			EXCEPREG_sig(dar, context)     /* Fault registers for coredump */
+# define DSISR_sig(context)			EXCEPREG_sig(dsisr, context)
+# define TRAP_sig(context)			EXCEPREG_sig(exception, context) /* number of powerpc exception taken */
+#endif /* __APPLE__ */
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    struct ucontext *uc = puc;
+    unsigned long pc;
+    int is_write;
+
+    pc = IAR_sig(uc);
+    is_write = 0;
+#if 0
+    /* ppc 4xx case */
+    if (DSISR_sig(uc) & 0x00800000)
+        is_write = 1;
+#else
+    if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000))
+        is_write = 1;
+#endif
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write, &uc->uc_sigmask, puc);
+}
+
+#elif defined(__alpha__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                           void *puc)
+{
+    siginfo_t *info = pinfo;
+    struct ucontext *uc = puc;
+    uint32_t *pc = uc->uc_mcontext.sc_pc;
+    uint32_t insn = *pc;
+    int is_write = 0;
+
+    /* XXX: need kernel patch to get write flag faster */
+    switch (insn >> 26) {
+    case 0x0d: // stw
+    case 0x0e: // stb
+    case 0x0f: // stq_u
+    case 0x24: // stf
+    case 0x25: // stg
+    case 0x26: // sts
+    case 0x27: // stt
+    case 0x2c: // stl
+    case 0x2d: // stq
+    case 0x2e: // stl_c
+    case 0x2f: // stq_c
+	is_write = 1;
+    }
+
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write, &uc->uc_sigmask, puc);
+}
+#elif defined(__sparc__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    uint32_t *regs = (uint32_t *)(info + 1);
+    void *sigmask = (regs + 20);
+    unsigned long pc;
+    int is_write;
+    uint32_t insn;
+
+    /* XXX: is there a standard glibc define ? */
+    pc = regs[1];
+    /* XXX: need kernel patch to get write flag faster */
+    is_write = 0;
+    insn = *(uint32_t *)pc;
+    if ((insn >> 30) == 3) {
+      switch((insn >> 19) & 0x3f) {
+      case 0x05: // stb
+      case 0x06: // sth
+      case 0x04: // st
+      case 0x07: // std
+      case 0x24: // stf
+      case 0x27: // stdf
+      case 0x25: // stfsr
+	is_write = 1;
+	break;
+      }
+    }
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write, sigmask, NULL);
+}
+
+#elif defined(__arm__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    struct ucontext *uc = puc;
+    unsigned long pc;
+    int is_write;
+
+    pc = uc->uc_mcontext.gregs[R15];
+    /* XXX: compute is_write */
+    is_write = 0;
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write,
+                             &uc->uc_sigmask, puc);
+}
+
+#elif defined(__mc68000)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    struct ucontext *uc = puc;
+    unsigned long pc;
+    int is_write;
+
+    pc = uc->uc_mcontext.gregs[16];
+    /* XXX: compute is_write */
+    is_write = 0;
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write,
+                             &uc->uc_sigmask, puc);
+}
+
+#elif defined(__ia64)
+
+#ifndef __ISR_VALID
+  /* This ought to be in <bits/siginfo.h>... */
+# define __ISR_VALID	1
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
+{
+    siginfo_t *info = pinfo;
+    struct ucontext *uc = puc;
+    unsigned long ip;
+    int is_write = 0;
+
+    ip = uc->uc_mcontext.sc_ip;
+    switch (host_signum) {
+      case SIGILL:
+      case SIGFPE:
+      case SIGSEGV:
+      case SIGBUS:
+      case SIGTRAP:
+	  if (info->si_code && (info->si_segvflags & __ISR_VALID))
+	      /* ISR.W (write-access) is bit 33:  */
+	      is_write = (info->si_isr >> 33) & 1;
+	  break;
+
+      default:
+	  break;
+    }
+    return handle_cpu_signal(ip, (unsigned long)info->si_addr,
+                             is_write,
+                             &uc->uc_sigmask, puc);
+}
+
+#elif defined(__s390__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    struct ucontext *uc = puc;
+    unsigned long pc;
+    int is_write;
+
+    pc = uc->uc_mcontext.psw.addr;
+    /* XXX: compute is_write */
+    is_write = 0;
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write,
+                             &uc->uc_sigmask, puc);
+}
+
+#else
+
+#error host CPU specific signal handler needed
+
+#endif
+
+#endif /* !defined(CONFIG_SOFTMMU) */
Index: /trunk/src/recompiler_new/disas.h
===================================================================
--- /trunk/src/recompiler_new/disas.h	(revision 13168)
+++ /trunk/src/recompiler_new/disas.h	(revision 13168)
@@ -0,0 +1,21 @@
+#ifndef _QEMU_DISAS_H
+#define _QEMU_DISAS_H
+
+/* Disassemble this for me please... (debugging). */
+void disas(FILE *out, void *code, unsigned long size);
+void target_disas(FILE *out, target_ulong code, target_ulong size, int flags);
+void monitor_disas(CPUState *env,
+                   target_ulong pc, int nb_insn, int is_physical, int flags);
+
+/* Look up symbol for debugging purpose.  Returns "" if unknown. */
+const char *lookup_symbol(target_ulong orig_addr);
+
+/* Filled in by elfload.c.  Simplistic, but will do for now. */
+extern struct syminfo {
+    unsigned int disas_num_syms;
+    void *disas_symtab;
+    const char *disas_strtab;
+    struct syminfo *next;
+} *syminfos;
+
+#endif /* _QEMU_DISAS_H */
Index: /trunk/src/recompiler_new/dyngen-exec.h
===================================================================
--- /trunk/src/recompiler_new/dyngen-exec.h	(revision 13168)
+++ /trunk/src/recompiler_new/dyngen-exec.h	(revision 13168)
@@ -0,0 +1,310 @@
+/*
+ *  dyngen defines for micro operation code
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#if !defined(__DYNGEN_EXEC_H__)
+#define __DYNGEN_EXEC_H__
+
+/* prevent Solaris from trying to typedef FILE in gcc's
+   include/floatingpoint.h which will conflict with the
+   definition down below */
+#ifdef __sun__
+#define _FILEDEFED
+#endif
+
+/* NOTE: standard headers should be used with special care at this
+   point because host CPU registers are used as global variables. Some
+   host headers do not allow that. */
+#include <stddef.h>
+
+#ifndef VBOX
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+// Linux/Sparc64 defines uint64_t
+#if !(defined (__sparc_v9__) && defined(__linux__))
+/* XXX may be done for all 64 bits targets ? */
+#if defined (__x86_64__) || defined(__ia64)
+typedef unsigned long uint64_t;
+#else
+typedef unsigned long long uint64_t;
+#endif
+#endif
+
+/* if Solaris/__sun__, don't typedef int8_t, as it will be typedef'd
+   prior to this and will cause an error in compliation, conflicting
+   with /usr/include/sys/int_types.h, line 75 */
+#ifndef __sun__
+typedef signed char int8_t;
+#endif
+typedef signed short int16_t;
+typedef signed int int32_t;
+// Linux/Sparc64 defines int64_t
+#if !(defined (__sparc_v9__) && defined(__linux__))
+#if defined (__x86_64__) || defined(__ia64)
+typedef signed long int64_t;
+#else
+typedef signed long long int64_t;
+#endif
+#endif
+
+/* XXX: This may be wrong for 64-bit ILP32 hosts.  */
+typedef void * host_reg_t;
+
+#define INT8_MIN		(-128)
+#define INT16_MIN		(-32767-1)
+#define INT32_MIN		(-2147483647-1)
+#define INT64_MIN		(-(int64_t)(9223372036854775807)-1)
+#define INT8_MAX		(127)
+#define INT16_MAX		(32767)
+#define INT32_MAX		(2147483647)
+#define INT64_MAX		((int64_t)(9223372036854775807))
+#define UINT8_MAX		(255)
+#define UINT16_MAX		(65535)
+#define UINT32_MAX		(4294967295U)
+#define UINT64_MAX		((uint64_t)(18446744073709551615))
+
+typedef struct FILE FILE;
+extern int fprintf(FILE *, const char *, ...);
+extern int printf(const char *, ...);
+#undef NULL
+#define NULL 0
+
+#else  /* VBOX */
+
+/* XXX: This may be wrong for 64-bit ILP32 hosts.  */
+typedef void * host_reg_t;
+
+#include <iprt/stdint.h>
+#include <stdio.h>
+
+#endif /* VBOX */
+
+#ifdef __i386__
+#define AREG0 "ebp"
+#define AREG1 "ebx"
+#define AREG2 "esi"
+#define AREG3 "edi"
+#endif
+#ifdef __x86_64__
+#ifdef VBOX 
+/* gcc 3.4.3 on 64-bit Solaris screws up when using rbp, it 
+   seems so at least. (Setting AREG4 to "r15" causes compiler 
+   error btw, so don't try it.)  */
+# define AREG0 "rbx" 
+# define AREG1 "r12"
+# define AREG2 "r13"
+# define AREG3 "r14"
+#else
+#define AREG0 "rbp"
+#define AREG1 "rbx"
+#define AREG2 "r12"
+#define AREG3 "r13"
+#endif 
+//#define AREG4 "r14"
+//#define AREG5 "r15"
+#endif
+#ifdef __powerpc__
+#define AREG0 "r27"
+#define AREG1 "r24"
+#define AREG2 "r25"
+#define AREG3 "r26"
+/* XXX: suppress this hack */
+#if defined(CONFIG_USER_ONLY)
+#define AREG4 "r16"
+#define AREG5 "r17"
+#define AREG6 "r18"
+#define AREG7 "r19"
+#define AREG8 "r20"
+#define AREG9 "r21"
+#define AREG10 "r22"
+#define AREG11 "r23"
+#endif
+#define USE_INT_TO_FLOAT_HELPERS
+#define BUGGY_GCC_DIV64
+#endif
+#ifdef __arm__
+#define AREG0 "r7"
+#define AREG1 "r4"
+#define AREG2 "r5"
+#define AREG3 "r6"
+#endif
+#ifdef __mips__
+#define AREG0 "s3"
+#define AREG1 "s0"
+#define AREG2 "s1"
+#define AREG3 "s2"
+#endif
+#ifdef __sparc__
+#ifdef HOST_SOLARIS
+#define AREG0 "g2"
+#define AREG1 "g3"
+#define AREG2 "g4"
+#define AREG3 "g5"
+#define AREG4 "g6"
+#else
+#ifdef __sparc_v9__
+#define AREG0 "g1"
+#define AREG1 "g4"
+#define AREG2 "g5"
+#define AREG3 "g7"
+#else
+#define AREG0 "g6"
+#define AREG1 "g1"
+#define AREG2 "g2"
+#define AREG3 "g3"
+#define AREG4 "l0"
+#define AREG5 "l1"
+#define AREG6 "l2"
+#define AREG7 "l3"
+#define AREG8 "l4"
+#define AREG9 "l5"
+#define AREG10 "l6"
+#define AREG11 "l7"
+#endif
+#endif
+#define USE_FP_CONVERT
+#endif
+#ifdef __s390__
+#define AREG0 "r10"
+#define AREG1 "r7"
+#define AREG2 "r8"
+#define AREG3 "r9"
+#endif
+#ifdef __alpha__
+/* Note $15 is the frame pointer, so anything in op-i386.c that would
+   require a frame pointer, like alloca, would probably loose.  */
+#define AREG0 "$15"
+#define AREG1 "$9"
+#define AREG2 "$10"
+#define AREG3 "$11"
+#define AREG4 "$12"
+#define AREG5 "$13"
+#define AREG6 "$14"
+#endif
+#ifdef __mc68000
+#define AREG0 "%a5"
+#define AREG1 "%a4"
+#define AREG2 "%d7"
+#define AREG3 "%d6"
+#define AREG4 "%d5"
+#endif
+#ifdef __ia64__
+#define AREG0 "r7"
+#define AREG1 "r4"
+#define AREG2 "r5"
+#define AREG3 "r6"
+#endif
+
+/* force GCC to generate only one epilog at the end of the function */
+#define FORCE_RET() __asm__ __volatile__("" : : : "memory");
+
+#ifndef OPPROTO
+#define OPPROTO
+#endif
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s)	tostring(s)
+#define tostring(s)	#s
+
+#ifdef __alpha__
+/* the symbols are considered non exported so a br immediate is generated */
+#define __hidden __attribute__((visibility("hidden")))
+#else
+#define __hidden 
+#endif
+
+#if defined(__alpha__)
+/* Suggested by Richard Henderson. This will result in code like
+        ldah $0,__op_param1($29)        !gprelhigh
+        lda $0,__op_param1($0)          !gprellow
+   We can then conveniently change $29 to $31 and adapt the offsets to
+   emit the appropriate constant.  */
+extern int __op_param1 __hidden;
+extern int __op_param2 __hidden;
+extern int __op_param3 __hidden;
+#define PARAM1 ({ int _r; asm("" : "=r"(_r) : "0" (&__op_param1)); _r; })
+#define PARAM2 ({ int _r; asm("" : "=r"(_r) : "0" (&__op_param2)); _r; })
+#define PARAM3 ({ int _r; asm("" : "=r"(_r) : "0" (&__op_param3)); _r; })
+#else
+#if defined(__APPLE__)
+static int __op_param1, __op_param2, __op_param3;
+#else
+extern int __op_param1, __op_param2, __op_param3;
+#endif
+#define PARAM1 ((long)(&__op_param1))
+#define PARAM2 ((long)(&__op_param2))
+#define PARAM3 ((long)(&__op_param3))
+#endif /* !defined(__alpha__) */
+
+extern int __op_jmp0, __op_jmp1, __op_jmp2, __op_jmp3;
+
+#if defined(_WIN32) || defined(__APPLE__) || defined(__OS2__)
+#define ASM_NAME(x) "_" #x
+#else
+#define ASM_NAME(x) #x
+#endif
+
+#ifdef __i386__
+#define EXIT_TB() asm volatile ("ret")
+#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
+#endif
+#ifdef __x86_64__
+#define EXIT_TB() asm volatile ("ret")
+#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
+#endif
+#ifdef __powerpc__
+#define EXIT_TB() asm volatile ("blr")
+#define GOTO_LABEL_PARAM(n) asm volatile ("b " ASM_NAME(__op_gen_label) #n)
+#endif
+#ifdef __s390__
+#define EXIT_TB() asm volatile ("br %r14")
+#define GOTO_LABEL_PARAM(n) asm volatile ("b " ASM_NAME(__op_gen_label) #n)
+#endif
+#ifdef __alpha__
+#define EXIT_TB() asm volatile ("ret")
+#endif
+#ifdef __ia64__
+#define EXIT_TB() asm volatile ("br.ret.sptk.many b0;;")
+#define GOTO_LABEL_PARAM(n) asm volatile ("br.sptk.many " \
+					  ASM_NAME(__op_gen_label) #n)
+#endif
+#ifdef __sparc__
+#define EXIT_TB() asm volatile ("jmpl %i0 + 8, %g0; nop")
+#define GOTO_LABEL_PARAM(n) asm volatile ("ba " ASM_NAME(__op_gen_label) #n ";nop")
+#endif
+#ifdef __arm__
+#define EXIT_TB() asm volatile ("b exec_loop")
+#define GOTO_LABEL_PARAM(n) asm volatile ("b " ASM_NAME(__op_gen_label) #n)
+#endif
+#ifdef __mc68000
+#define EXIT_TB() asm volatile ("rts")
+#endif
+
+#endif /* !defined(__DYNGEN_EXEC_H__) */
Index: /trunk/src/recompiler_new/dyngen-op.h
===================================================================
--- /trunk/src/recompiler_new/dyngen-op.h	(revision 13168)
+++ /trunk/src/recompiler_new/dyngen-op.h	(revision 13168)
@@ -0,0 +1,9 @@
+static inline int gen_new_label(void)
+{
+    return nb_gen_labels++;
+}
+
+static inline void gen_set_label(int n)
+{
+    gen_labels[n] = gen_opc_ptr - gen_opc_buf;
+}
Index: /trunk/src/recompiler_new/dyngen.c
===================================================================
--- /trunk/src/recompiler_new/dyngen.c	(revision 13168)
+++ /trunk/src/recompiler_new/dyngen.c	(revision 13168)
@@ -0,0 +1,3214 @@
+/*
+ *  Generic Dynamic compiler generator
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ *  The COFF object format support was extracted from Kazu's QEMU port
+ *  to Win32.
+ *
+ *  Mach-O Support by Matt Reda and Pierre d'Herbemont
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Sun GPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the General Public License version 2 (GPLv2) at this time for any software where
+ * a choice of GPL license versions is made available with the language indicating
+ * that GPLv2 or any later version may be used, or where a choice of which version
+ * of the GPL is applied is otherwise unspecified.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "config-host.h"
+
+/* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross
+   compilation */
+#if defined(CONFIG_WIN32)
+#define CONFIG_FORMAT_COFF
+#elif defined(CONFIG_DARWIN)
+#define CONFIG_FORMAT_MACH
+#elif defined(CONFIG_OS2)
+#define CONFIG_FORMAT_AOUT
+#else
+#define CONFIG_FORMAT_ELF
+#endif
+
+#ifdef CONFIG_FORMAT_ELF
+
+/* elf format definitions. We use these macros to test the CPU to
+   allow cross compilation (this tool must be ran on the build
+   platform) */
+#if defined(HOST_I386)
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_386
+#define elf_check_arch(x) ( ((x) == EM_386) || ((x) == EM_486) )
+#undef ELF_USES_RELOCA
+
+#elif defined(HOST_X86_64)
+
+#define ELF_CLASS	ELFCLASS64
+#define ELF_ARCH	EM_X86_64
+#define elf_check_arch(x) ((x) == EM_X86_64)
+#define ELF_USES_RELOCA
+
+#elif defined(HOST_PPC)
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_PPC
+#define elf_check_arch(x) ((x) == EM_PPC)
+#define ELF_USES_RELOCA
+
+#elif defined(HOST_S390)
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_S390
+#define elf_check_arch(x) ((x) == EM_S390)
+#define ELF_USES_RELOCA
+
+#elif defined(HOST_ALPHA)
+
+#define ELF_CLASS	ELFCLASS64
+#define ELF_ARCH	EM_ALPHA
+#define elf_check_arch(x) ((x) == EM_ALPHA)
+#define ELF_USES_RELOCA
+
+#elif defined(HOST_IA64)
+
+#define ELF_CLASS	ELFCLASS64
+#define ELF_ARCH	EM_IA_64
+#define elf_check_arch(x) ((x) == EM_IA_64)
+#define ELF_USES_RELOCA
+
+#elif defined(HOST_SPARC)
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_SPARC
+#define elf_check_arch(x) ((x) == EM_SPARC || (x) == EM_SPARC32PLUS)
+#define ELF_USES_RELOCA
+
+#elif defined(HOST_SPARC64)
+
+#define ELF_CLASS	ELFCLASS64
+#define ELF_ARCH	EM_SPARCV9
+#define elf_check_arch(x) ((x) == EM_SPARCV9)
+#define ELF_USES_RELOCA
+
+#elif defined(HOST_ARM)
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_ARM
+#define elf_check_arch(x) ((x) == EM_ARM)
+#define ELF_USES_RELOC
+
+#elif defined(HOST_M68K)
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_68K
+#define elf_check_arch(x) ((x) == EM_68K)
+#define ELF_USES_RELOCA
+
+#else
+#error unsupported CPU - please update the code
+#endif
+
+#include "elf.h"
+
+#if ELF_CLASS == ELFCLASS32
+typedef int32_t host_long;
+typedef uint32_t host_ulong;
+#define swabls(x) swab32s(x)
+#define swablss(x) swab32ss(x)
+#else
+typedef int64_t host_long;
+typedef uint64_t host_ulong;
+#define swabls(x) swab64s(x)
+#define swablss(x) swab64ss(x)
+#endif
+
+#ifdef ELF_USES_RELOCA
+#define SHT_RELOC SHT_RELA
+#else
+#define SHT_RELOC SHT_REL
+#endif
+
+#define EXE_RELOC ELF_RELOC
+#define EXE_SYM ElfW(Sym)
+
+#endif /* CONFIG_FORMAT_ELF */
+
+#ifdef CONFIG_FORMAT_COFF
+
+#include "a.out.h"
+
+typedef int32_t host_long;
+typedef uint32_t host_ulong;
+
+#define FILENAMELEN 256
+
+typedef struct coff_sym {
+    struct external_syment *st_syment;
+    char st_name[FILENAMELEN];
+    uint32_t st_value;
+    int  st_size;
+    uint8_t st_type;
+    uint8_t st_shndx;
+} coff_Sym;
+
+typedef struct coff_rel {
+    struct external_reloc *r_reloc;
+    int  r_offset;
+    uint8_t r_type;
+} coff_Rel;
+
+#define EXE_RELOC struct coff_rel
+#define EXE_SYM struct coff_sym
+
+#endif /* CONFIG_FORMAT_COFF */
+
+#ifdef CONFIG_FORMAT_MACH
+
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+#include <mach-o/reloc.h>
+#if !defined(HOST_I386)
+#include <mach-o/ppc/reloc.h>
+#endif 
+
+# define check_mach_header(x) (x.magic == MH_MAGIC)
+typedef int32_t host_long;
+typedef uint32_t host_ulong;
+
+struct nlist_extended
+{
+   union {
+   char *n_name; 
+   long  n_strx; 
+   } n_un;
+   unsigned char n_type; 
+   unsigned char n_sect; 
+   short st_desc;
+   unsigned long st_value;
+   unsigned long st_size;
+};
+
+#define EXE_RELOC struct relocation_info
+#define EXE_SYM struct nlist_extended
+#if defined(HOST_I386)
+# define r_offset r_address
+#endif 
+
+#endif /* CONFIG_FORMAT_MACH */
+
+#ifdef CONFIG_FORMAT_AOUT
+
+#include "a_out.h"
+
+typedef int32_t host_long;
+typedef uint32_t host_ulong;
+
+struct nlist_extended
+{
+    union {
+      char *n_name;
+      struct nlist *n_next;
+      long n_strx;
+    } n_un;
+    unsigned char n_type;
+    char n_other;
+    short n_desc;
+    unsigned long st_value; /* n_value -> st_value */
+    unsigned long st_size;  /* added */
+};
+
+#define EXE_RELOC struct relocation_info
+#define EXE_SYM struct nlist_extended
+#define r_offset r_address
+
+#endif /* CONFIG_FORMAT_AOUT */
+
+#include "bswap.h"
+
+enum {
+    OUT_GEN_OP,
+    OUT_CODE,
+    OUT_INDEX_OP
+};
+
+/* all dynamically generated functions begin with this code */
+#define OP_PREFIX "op_"
+
+int do_swap;
+
+void __attribute__((noreturn)) __attribute__((format (printf, 1, 2))) error(const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    fprintf(stderr, "dyngen: ");
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+void *load_data(int fd, long offset, unsigned int size)
+{
+    char *data;
+
+    data = malloc(size);
+    if (!data)
+        return NULL;
+    lseek(fd, offset, SEEK_SET);
+    if (read(fd, data, size) != size) {
+        free(data);
+        return NULL;
+    }
+    return data;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+    const char *p, *q;
+    p = str;
+    q = val;
+    while (*q != '\0') {
+        if (*p != *q)
+            return 0;
+        p++;
+        q++;
+    }
+    if (ptr)
+        *ptr = p;
+    return 1;
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+    int c;
+    char *q = buf;
+
+    if (buf_size <= 0)
+        return;
+
+    for(;;) {
+        c = *str++;
+        if (c == 0 || q >= buf + buf_size - 1)
+            break;
+        *q++ = c;
+    }
+    *q = '\0';
+}
+
+void swab16s(uint16_t *p)
+{
+    *p = bswap16(*p);
+}
+
+void swab32s(uint32_t *p)
+{
+    *p = bswap32(*p);
+}
+
+void swab32ss(int32_t *p)
+{
+    *p = bswap32(*p);
+}
+
+void swab64s(uint64_t *p)
+{
+    *p = bswap64(*p);
+}
+
+void swab64ss(int64_t *p)
+{
+    *p = bswap64(*p);
+}
+
+uint16_t get16(uint16_t *p)
+{
+    uint16_t val;
+    val = *p;
+    if (do_swap)
+        val = bswap16(val);
+    return val;
+}
+
+uint32_t get32(uint32_t *p)
+{
+    uint32_t val;
+    val = *p;
+    if (do_swap)
+        val = bswap32(val);
+    return val;
+}
+
+void put16(uint16_t *p, uint16_t val)
+{
+    if (do_swap)
+        val = bswap16(val);
+    *p = val;
+}
+
+void put32(uint32_t *p, uint32_t val)
+{
+    if (do_swap)
+        val = bswap32(val);
+    *p = val;
+}
+
+/* executable information */
+EXE_SYM *symtab;
+int nb_syms;
+int text_shndx;
+uint8_t *text;
+EXE_RELOC *relocs;
+int nb_relocs;
+
+#ifdef CONFIG_FORMAT_ELF
+
+/* ELF file info */
+struct elf_shdr *shdr;
+uint8_t **sdata;
+struct elfhdr ehdr;
+char *strtab;
+
+int elf_must_swap(struct elfhdr *h)
+{
+  union {
+      uint32_t i;
+      uint8_t b[4];
+  } swaptest;
+
+  swaptest.i = 1;
+  return (h->e_ident[EI_DATA] == ELFDATA2MSB) != 
+      (swaptest.b[0] == 0);
+}
+  
+void elf_swap_ehdr(struct elfhdr *h)
+{
+    swab16s(&h->e_type);			/* Object file type */
+    swab16s(&h->	e_machine);		/* Architecture */
+    swab32s(&h->	e_version);		/* Object file version */
+    swabls(&h->	e_entry);		/* Entry point virtual address */
+    swabls(&h->	e_phoff);		/* Program header table file offset */
+    swabls(&h->	e_shoff);		/* Section header table file offset */
+    swab32s(&h->	e_flags);		/* Processor-specific flags */
+    swab16s(&h->	e_ehsize);		/* ELF header size in bytes */
+    swab16s(&h->	e_phentsize);		/* Program header table entry size */
+    swab16s(&h->	e_phnum);		/* Program header table entry count */
+    swab16s(&h->	e_shentsize);		/* Section header table entry size */
+    swab16s(&h->	e_shnum);		/* Section header table entry count */
+    swab16s(&h->	e_shstrndx);		/* Section header string table index */
+}
+
+void elf_swap_shdr(struct elf_shdr *h)
+{
+  swab32s(&h->	sh_name);		/* Section name (string tbl index) */
+  swab32s(&h->	sh_type);		/* Section type */
+  swabls(&h->	sh_flags);		/* Section flags */
+  swabls(&h->	sh_addr);		/* Section virtual addr at execution */
+  swabls(&h->	sh_offset);		/* Section file offset */
+  swabls(&h->	sh_size);		/* Section size in bytes */
+  swab32s(&h->	sh_link);		/* Link to another section */
+  swab32s(&h->	sh_info);		/* Additional section information */
+  swabls(&h->	sh_addralign);		/* Section alignment */
+  swabls(&h->	sh_entsize);		/* Entry size if section holds table */
+}
+
+void elf_swap_phdr(struct elf_phdr *h)
+{
+    swab32s(&h->p_type);			/* Segment type */
+    swabls(&h->p_offset);		/* Segment file offset */
+    swabls(&h->p_vaddr);		/* Segment virtual address */
+    swabls(&h->p_paddr);		/* Segment physical address */
+    swabls(&h->p_filesz);		/* Segment size in file */
+    swabls(&h->p_memsz);		/* Segment size in memory */
+    swab32s(&h->p_flags);		/* Segment flags */
+    swabls(&h->p_align);		/* Segment alignment */
+}
+
+void elf_swap_rel(ELF_RELOC *rel)
+{
+    swabls(&rel->r_offset);
+    swabls(&rel->r_info);
+#ifdef ELF_USES_RELOCA
+    swablss(&rel->r_addend);
+#endif
+}
+
+struct elf_shdr *find_elf_section(struct elf_shdr *shdr, int shnum, const char *shstr, 
+                                  const char *name)
+{
+    int i;
+    const char *shname;
+    struct elf_shdr *sec;
+
+    for(i = 0; i < shnum; i++) {
+        sec = &shdr[i];
+        if (!sec->sh_name)
+            continue;
+        shname = shstr + sec->sh_name;
+        if (!strcmp(shname, name))
+            return sec;
+    }
+    return NULL;
+}
+
+int find_reloc(int sh_index)
+{
+    struct elf_shdr *sec;
+    int i;
+
+    for(i = 0; i < ehdr.e_shnum; i++) {
+        sec = &shdr[i];
+        if (sec->sh_type == SHT_RELOC && sec->sh_info == sh_index) 
+            return i;
+    }
+    return 0;
+}
+
+static host_ulong get_rel_offset(EXE_RELOC *rel)
+{
+    return rel->r_offset;
+}
+
+static char *get_rel_sym_name(EXE_RELOC *rel)
+{
+    return strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
+}
+
+static char *get_sym_name(EXE_SYM *sym)
+{
+    return strtab + sym->st_name;
+}
+
+/* load an elf object file */
+int load_object(const char *filename)
+{
+    int fd;
+    struct elf_shdr *sec, *symtab_sec, *strtab_sec, *text_sec;
+    int i, j;
+    ElfW(Sym) *sym;
+    char *shstr;
+    ELF_RELOC *rel;
+    
+    fd = open(filename, O_RDONLY
+#ifdef O_BINARY
+              | O_BINARY
+#endif 
+              );
+    if (fd < 0) 
+        error("can't open file '%s'", filename);
+    
+    /* Read ELF header.  */
+    if (read(fd, &ehdr, sizeof (ehdr)) != sizeof (ehdr))
+        error("unable to read file header");
+
+    /* Check ELF identification.  */
+    if (ehdr.e_ident[EI_MAG0] != ELFMAG0
+     || ehdr.e_ident[EI_MAG1] != ELFMAG1
+     || ehdr.e_ident[EI_MAG2] != ELFMAG2
+     || ehdr.e_ident[EI_MAG3] != ELFMAG3
+     || ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+        error("bad ELF header");
+    }
+
+    do_swap = elf_must_swap(&ehdr);
+    if (do_swap)
+        elf_swap_ehdr(&ehdr);
+    if (ehdr.e_ident[EI_CLASS] != ELF_CLASS)
+        error("Unsupported ELF class (%#x)", ehdr.e_ident[EI_CLASS]);
+    if (ehdr.e_type != ET_REL)
+        error("ELF object file expected");
+    if (ehdr.e_version != EV_CURRENT)
+        error("Invalid ELF version");
+    if (!elf_check_arch(ehdr.e_machine))
+        error("Unsupported CPU (e_machine=%d)", ehdr.e_machine);
+
+    /* read section headers */
+    shdr = load_data(fd, ehdr.e_shoff, ehdr.e_shnum * sizeof(struct elf_shdr));
+    if (do_swap) {
+        for(i = 0; i < ehdr.e_shnum; i++) {
+            elf_swap_shdr(&shdr[i]);
+        }
+    }
+
+    /* read all section data */
+    sdata = malloc(sizeof(void *) * ehdr.e_shnum);
+    memset(sdata, 0, sizeof(void *) * ehdr.e_shnum);
+    
+    for(i = 0;i < ehdr.e_shnum; i++) {
+        sec = &shdr[i];
+        if (sec->sh_type != SHT_NOBITS)
+            sdata[i] = load_data(fd, sec->sh_offset, sec->sh_size);
+    }
+
+    sec = &shdr[ehdr.e_shstrndx];
+    shstr = (char *)sdata[ehdr.e_shstrndx];
+
+    /* swap relocations */
+    for(i = 0; i < ehdr.e_shnum; i++) {
+        sec = &shdr[i];
+        if (sec->sh_type == SHT_RELOC) {
+            nb_relocs = sec->sh_size / sec->sh_entsize;
+            if (do_swap) {
+                for(j = 0, rel = (ELF_RELOC *)sdata[i]; j < nb_relocs; j++, rel++)
+                    elf_swap_rel(rel);
+            }
+        }
+    }
+    /* text section */
+
+    text_sec = find_elf_section(shdr, ehdr.e_shnum, shstr, ".text");
+    if (!text_sec)
+        error("could not find .text section");
+    text_shndx = text_sec - shdr;
+    text = sdata[text_shndx];
+
+    /* find text relocations, if any */
+    relocs = NULL;
+    nb_relocs = 0;
+    i = find_reloc(text_shndx);
+    if (i != 0) {
+        relocs = (ELF_RELOC *)sdata[i];
+        nb_relocs = shdr[i].sh_size / shdr[i].sh_entsize;
+    }
+
+    symtab_sec = find_elf_section(shdr, ehdr.e_shnum, shstr, ".symtab");
+    if (!symtab_sec)
+        error("could not find .symtab section");
+    strtab_sec = &shdr[symtab_sec->sh_link];
+
+    symtab = (ElfW(Sym) *)sdata[symtab_sec - shdr];
+    strtab = (char *)sdata[symtab_sec->sh_link];
+    
+    nb_syms = symtab_sec->sh_size / sizeof(ElfW(Sym));
+    if (do_swap) {
+        for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+            swab32s(&sym->st_name);
+            swabls(&sym->st_value);
+            swabls(&sym->st_size);
+            swab16s(&sym->st_shndx);
+        }
+    }
+    close(fd);
+    return 0;
+}
+
+#endif /* CONFIG_FORMAT_ELF */
+
+#ifdef CONFIG_FORMAT_COFF
+
+/* COFF file info */
+struct external_scnhdr *shdr;
+uint8_t **sdata;
+struct external_filehdr fhdr;
+struct external_syment *coff_symtab;
+char *strtab;
+int coff_text_shndx, coff_data_shndx;
+
+int data_shndx;
+
+#define STRTAB_SIZE 4
+
+#define DIR32   0x06
+#define DISP32  0x14
+
+#define T_FUNCTION  0x20
+#define C_EXTERNAL  2
+
+void sym_ent_name(struct external_syment *ext_sym, EXE_SYM *sym)
+{
+    char *q;
+    int c, i, len;
+    
+    if (ext_sym->e.e.e_zeroes != 0) {
+        q = sym->st_name;
+        for(i = 0; i < 8; i++) {
+            c = ext_sym->e.e_name[i];
+            if (c == '\0')
+                break;
+            *q++ = c;
+        }
+        *q = '\0';
+    } else {
+        pstrcpy(sym->st_name, sizeof(sym->st_name), strtab + ext_sym->e.e.e_offset);
+    }
+
+    /* now convert the name to a C name (suppress the leading '_') */
+    if (sym->st_name[0] == '_') {
+        len = strlen(sym->st_name);
+        memmove(sym->st_name, sym->st_name + 1, len - 1);
+        sym->st_name[len - 1] = '\0';
+    }
+}
+
+char *name_for_dotdata(struct coff_rel *rel)
+{
+	int i;
+	struct coff_sym *sym;
+	uint32_t text_data;
+
+	text_data = *(uint32_t *)(text + rel->r_offset);
+
+	for (i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+		if (sym->st_syment->e_scnum == data_shndx &&
+                    text_data >= sym->st_value &&
+                    text_data < sym->st_value + sym->st_size) {
+                    
+                    return sym->st_name;
+
+		}
+	}
+	return NULL;
+}
+
+static char *get_sym_name(EXE_SYM *sym)
+{
+    return sym->st_name;
+}
+
+static char *get_rel_sym_name(EXE_RELOC *rel)
+{
+    char *name;
+    name = get_sym_name(symtab + *(uint32_t *)(rel->r_reloc->r_symndx));
+    if (!strcmp(name, ".data"))
+        name = name_for_dotdata(rel);
+    if (name[0] == '.')
+        return NULL;
+    return name;
+}
+
+static host_ulong get_rel_offset(EXE_RELOC *rel)
+{
+    return rel->r_offset;
+}
+
+struct external_scnhdr *find_coff_section(struct external_scnhdr *shdr, int shnum, const char *name)
+{
+    int i;
+    const char *shname;
+    struct external_scnhdr *sec;
+
+    for(i = 0; i < shnum; i++) {
+        sec = &shdr[i];
+        if (!sec->s_name)
+            continue;
+        shname = sec->s_name;
+        if (!strcmp(shname, name))
+            return sec;
+    }
+    return NULL;
+}
+
+/* load a coff object file */
+int load_object(const char *filename)
+{
+    int fd;
+    struct external_scnhdr *sec, *text_sec, *data_sec;
+    int i;
+    struct external_syment *ext_sym;
+    struct external_reloc *coff_relocs;
+    struct external_reloc *ext_rel;
+    uint32_t *n_strtab;
+    EXE_SYM *sym;
+    EXE_RELOC *rel;
+	
+    fd = open(filename, O_RDONLY 
+#ifdef O_BINARY
+              | O_BINARY
+#endif
+              );
+    if (fd < 0) 
+        error("can't open file '%s'", filename);
+    
+    /* Read COFF header.  */
+    if (read(fd, &fhdr, sizeof (fhdr)) != sizeof (fhdr))
+        error("unable to read file header");
+
+    /* Check COFF identification.  */
+    if (fhdr.f_magic != I386MAGIC) {
+        error("bad COFF header");
+    }
+    do_swap = 0;
+
+    /* read section headers */
+    shdr = load_data(fd, sizeof(struct external_filehdr) + fhdr.f_opthdr, fhdr.f_nscns * sizeof(struct external_scnhdr));
+	
+    /* read all section data */
+    sdata = malloc(sizeof(void *) * fhdr.f_nscns);
+    memset(sdata, 0, sizeof(void *) * fhdr.f_nscns);
+    
+    const char *p;
+    for(i = 0;i < fhdr.f_nscns; i++) {
+        sec = &shdr[i];
+        if (!strstart(sec->s_name,  ".bss", &p))
+            sdata[i] = load_data(fd, sec->s_scnptr, sec->s_size);
+    }
+
+
+    /* text section */
+    text_sec = find_coff_section(shdr, fhdr.f_nscns, ".text");
+    if (!text_sec)
+        error("could not find .text section");
+    coff_text_shndx = text_sec - shdr;
+    text = sdata[coff_text_shndx];
+
+    /* data section */
+    data_sec = find_coff_section(shdr, fhdr.f_nscns, ".data");
+    if (!data_sec)
+        error("could not find .data section");
+    coff_data_shndx = data_sec - shdr;
+    
+    coff_symtab = load_data(fd, fhdr.f_symptr, fhdr.f_nsyms*SYMESZ);
+    for (i = 0, ext_sym = coff_symtab; i < nb_syms; i++, ext_sym++) {
+        for(i=0;i<8;i++)
+            printf(" %02x", ((uint8_t *)ext_sym->e.e_name)[i]);
+        printf("\n");
+    }
+
+
+    n_strtab = load_data(fd, (fhdr.f_symptr + fhdr.f_nsyms*SYMESZ), STRTAB_SIZE);
+    strtab = load_data(fd, (fhdr.f_symptr + fhdr.f_nsyms*SYMESZ), *n_strtab); 
+    
+    nb_syms = fhdr.f_nsyms;
+
+    for (i = 0, ext_sym = coff_symtab; i < nb_syms; i++, ext_sym++) {
+      if (strstart(ext_sym->e.e_name, ".text", NULL))
+		  text_shndx = ext_sym->e_scnum;
+	  if (strstart(ext_sym->e.e_name, ".data", NULL))
+		  data_shndx = ext_sym->e_scnum;
+    }
+
+	/* set coff symbol */
+	symtab = malloc(sizeof(struct coff_sym) * nb_syms);
+
+	int aux_size, j;
+	for (i = 0, ext_sym = coff_symtab, sym = symtab; i < nb_syms; i++, ext_sym++, sym++) {
+		memset(sym, 0, sizeof(*sym));
+		sym->st_syment = ext_sym;
+		sym_ent_name(ext_sym, sym);
+		sym->st_value = ext_sym->e_value;
+
+		aux_size = *(int8_t *)ext_sym->e_numaux;
+		if (ext_sym->e_scnum == text_shndx && ext_sym->e_type == T_FUNCTION) {
+			for (j = aux_size + 1; j < nb_syms - i; j++) {
+				if ((ext_sym + j)->e_scnum == text_shndx &&
+					(ext_sym + j)->e_type == T_FUNCTION ){
+					sym->st_size = (ext_sym + j)->e_value - ext_sym->e_value;
+					break;
+				} else if (j == nb_syms - i - 1) {
+					sec = &shdr[coff_text_shndx];
+					sym->st_size = sec->s_size - ext_sym->e_value;
+					break;
+				}
+			}
+		} else if (ext_sym->e_scnum == data_shndx && *(uint8_t *)ext_sym->e_sclass == C_EXTERNAL) {
+			for (j = aux_size + 1; j < nb_syms - i; j++) {
+				if ((ext_sym + j)->e_scnum == data_shndx) {
+					sym->st_size = (ext_sym + j)->e_value - ext_sym->e_value;
+					break;
+				} else if (j == nb_syms - i - 1) {
+					sec = &shdr[coff_data_shndx];
+					sym->st_size = sec->s_size - ext_sym->e_value;
+					break;
+				}
+			}
+		} else {
+			sym->st_size = 0;
+		}
+		
+		sym->st_type = ext_sym->e_type;
+		sym->st_shndx = ext_sym->e_scnum;
+	}
+
+		
+    /* find text relocations, if any */
+    sec = &shdr[coff_text_shndx];
+    coff_relocs = load_data(fd, sec->s_relptr, sec->s_nreloc*RELSZ);
+    nb_relocs = sec->s_nreloc;
+
+    /* set coff relocation */
+    relocs = malloc(sizeof(struct coff_rel) * nb_relocs);
+    for (i = 0, ext_rel = coff_relocs, rel = relocs; i < nb_relocs; 
+         i++, ext_rel++, rel++) {
+        memset(rel, 0, sizeof(*rel));
+        rel->r_reloc = ext_rel;
+        rel->r_offset = *(uint32_t *)ext_rel->r_vaddr;
+        rel->r_type = *(uint16_t *)ext_rel->r_type;
+    }
+    return 0;
+}
+
+#endif /* CONFIG_FORMAT_COFF */
+
+#ifdef CONFIG_FORMAT_MACH
+
+/* File Header */
+struct mach_header 	mach_hdr;
+
+/* commands */
+struct segment_command 	*segment = 0;
+struct dysymtab_command *dysymtabcmd = 0;
+struct symtab_command 	*symtabcmd = 0;
+
+/* section */
+struct section 	*section_hdr;
+struct section *text_sec_hdr;
+uint8_t 	**sdata;
+
+/* relocs */
+struct relocation_info *relocs;
+	
+/* symbols */
+EXE_SYM			*symtab;
+struct nlist 	*symtab_std;
+char			*strtab;
+
+/* indirect symbols */
+uint32_t 	*tocdylib;
+
+/* Utility functions */
+
+static inline char *find_str_by_index(int index)
+{
+    return strtab+index;
+}
+
+/* Used by dyngen common code */
+static char *get_sym_name(EXE_SYM *sym)
+{
+	char *name = find_str_by_index(sym->n_un.n_strx);
+	
+	if ( sym->n_type & N_STAB ) /* Debug symbols are ignored */
+		return "debug";
+			
+	if(!name)
+		return name;
+	if(name[0]=='_')
+		return name + 1;
+	else
+		return name;
+}
+
+/* find a section index given its segname, sectname */
+static int find_mach_sec_index(struct section *section_hdr, int shnum, const char *segname, 
+                                  const char *sectname)
+{
+    int i;
+    struct section *sec = section_hdr;
+
+    for(i = 0; i < shnum; i++, sec++) {
+        if (!sec->segname || !sec->sectname)
+            continue;
+        if (!strcmp(sec->sectname, sectname) && !strcmp(sec->segname, segname))
+            return i;
+    }
+    return -1;
+}
+
+/* find a section header given its segname, sectname */
+struct section *find_mach_sec_hdr(struct section *section_hdr, int shnum, const char *segname, 
+                                  const char *sectname)
+{
+    int index = find_mach_sec_index(section_hdr, shnum, segname, sectname);
+	if(index == -1)
+		return NULL;
+	return section_hdr+index;
+}
+
+
+#if defined(HOST_PPC)
+static inline void fetch_next_pair_value(struct relocation_info * rel, unsigned int *value)
+{
+    struct scattered_relocation_info * scarel;
+	
+    if(R_SCATTERED & rel->r_address) {
+        scarel = (struct scattered_relocation_info*)rel;
+        if(scarel->r_type != PPC_RELOC_PAIR)
+            error("fetch_next_pair_value: looking for a pair which was not found (1)");
+        *value = scarel->r_value;
+    } else {
+		if(rel->r_type != PPC_RELOC_PAIR)
+			error("fetch_next_pair_value: looking for a pair which was not found (2)");
+		*value = rel->r_address;
+	}
+}
+#endif
+
+/* find a sym name given its value, in a section number */
+static const char * find_sym_with_value_and_sec_number( int value, int sectnum, int * offset )
+{
+	int i, ret = -1;
+	
+	for( i = 0 ; i < nb_syms; i++ )
+	{
+	    if( !(symtab[i].n_type & N_STAB) && (symtab[i].n_type & N_SECT) &&
+			 (symtab[i].n_sect ==  sectnum) && (symtab[i].st_value <= value) )
+		{
+			if( (ret<0) || (symtab[i].st_value >= symtab[ret].st_value) )
+				ret = i;
+		}
+	}
+	if( ret < 0 ) {
+		*offset = 0;
+		return 0;
+	} else {
+		*offset = value - symtab[ret].st_value;
+		return get_sym_name(&symtab[ret]);
+	}
+}
+
+/* 
+ *  Find symbol name given a (virtual) address, and a section which is of type 
+ *  S_NON_LAZY_SYMBOL_POINTERS or S_LAZY_SYMBOL_POINTERS or S_SYMBOL_STUBS
+ */
+static const char * find_reloc_name_in_sec_ptr(int address, struct section * sec_hdr)
+{
+    unsigned int tocindex, symindex, size;
+    const char *name = 0;
+    
+    /* Sanity check */
+    if(!( address >= sec_hdr->addr && address < (sec_hdr->addr + sec_hdr->size) ) )
+        return (char*)0;
+		
+	if( sec_hdr->flags & S_SYMBOL_STUBS ){
+		size = sec_hdr->reserved2;
+		if(size == 0)
+		    error("size = 0");
+		
+	}
+	else if( sec_hdr->flags & S_LAZY_SYMBOL_POINTERS ||
+	            sec_hdr->flags & S_NON_LAZY_SYMBOL_POINTERS)
+		size = sizeof(unsigned long);
+	else
+		return 0;
+		
+    /* Compute our index in toc */
+	tocindex = (address - sec_hdr->addr)/size;
+	symindex = tocdylib[sec_hdr->reserved1 + tocindex];
+	
+	name = get_sym_name(&symtab[symindex]);
+
+    return name;
+}
+
+static const char * find_reloc_name_given_its_address(int address)
+{
+    unsigned int i;
+    for(i = 0; i < segment->nsects ; i++)
+    {
+        const char * name = find_reloc_name_in_sec_ptr(address, &section_hdr[i]);
+        if((long)name != -1)
+            return name;
+    }
+    return 0;
+}
+
+static const char * get_reloc_name(EXE_RELOC * rel, int * sslide)
+{
+	char * name = 0;
+	struct scattered_relocation_info * sca_rel = (struct scattered_relocation_info*)rel;
+	int sectnum = rel->r_symbolnum;
+	int sectoffset;
+	int other_half=0;
+	
+	/* init the slide value */
+	*sslide = 0;
+	
+	if(R_SCATTERED & rel->r_address)
+		return (char *)find_reloc_name_given_its_address(sca_rel->r_value);
+
+	if(rel->r_extern)
+	{
+		/* ignore debug sym */
+		if ( symtab[rel->r_symbolnum].n_type & N_STAB ) 
+			return 0;
+		return get_sym_name(&symtab[rel->r_symbolnum]);
+	}
+
+#if defined(HOST_I386)
+	/* ignore internal pc relative fixups where both ends are in the text section. */
+	if (rel->r_pcrel && !rel->r_extern && rel->r_symbolnum == 1 /* ASSUMES text */)
+		return NULL;
+#endif 
+
+	/* Intruction contains an offset to the symbols pointed to, in the rel->r_symbolnum section */
+	sectoffset = *(uint32_t *)(text + rel->r_address) & 0xffff;
+			
+	if(sectnum==0xffffff)
+		return 0;
+
+	/* Sanity Check */
+	if(sectnum > segment->nsects)
+		error("sectnum > segment->nsects");
+
+#if defined(HOST_PPC)
+	switch(rel->r_type)
+	{
+		case PPC_RELOC_LO16: fetch_next_pair_value(rel+1, &other_half); sectoffset |= (other_half << 16);
+			break;
+		case PPC_RELOC_HI16: fetch_next_pair_value(rel+1, &other_half); sectoffset = (sectoffset << 16) | (uint16_t)(other_half & 0xffff);
+			break;
+		case PPC_RELOC_HA16: fetch_next_pair_value(rel+1, &other_half); sectoffset = (sectoffset << 16) + (int16_t)(other_half & 0xffff);
+			break;
+		case PPC_RELOC_BR24:
+			sectoffset = ( *(uint32_t *)(text + rel->r_address) & 0x03fffffc );
+			if (sectoffset & 0x02000000) sectoffset |= 0xfc000000;
+			break;
+		default:
+			error("switch(rel->type) not found");
+	}
+#elif defined(HOST_I386)
+	/* The intruction contains the addend. */
+	sectoffset = *(uint32_t *)(text + rel->r_address);
+#else
+#error unsupported mach-o host
+#endif 
+
+	if(rel->r_pcrel)
+		sectoffset += rel->r_address;
+			
+#if defined(HOST_PPC)
+	if (rel->r_type == PPC_RELOC_BR24)
+		name = (char *)find_reloc_name_in_sec_ptr((int)sectoffset, &section_hdr[sectnum-1]);
+#endif
+
+	/* search it in the full symbol list, if not found */
+	if(!name)
+		name = (char *)find_sym_with_value_and_sec_number(sectoffset, sectnum, sslide);
+	
+	return name;
+}
+
+#if defined(HOST_I386)
+static const char *get_rel_sym_name_and_addend(EXE_RELOC *rel, int *addend)
+{
+	const char *name = NULL;
+
+    if (R_SCATTERED & rel->r_address) {
+		unsigned int i;
+		struct scattered_relocation_info * sca_rel = (struct scattered_relocation_info*)rel;
+		if (sca_rel->r_length != 2 || rel->r_pcrel) {
+			error("Fully implement R_SCATTERED! r_address=%#x r_type=%#x r_length=%d r_pcrel=%d r_value=%#x\n", 
+				  (int)sca_rel->r_address, sca_rel->r_type, sca_rel->r_length, sca_rel->r_pcrel, sca_rel->r_value);
+		}
+
+		/* this seems to be the way to calc the addend. */
+		*addend = *(int32_t *)(text + sca_rel->r_address) - sca_rel->r_value;
+
+		/* todo: do we need to ignore internal relocations? */
+#if 0
+		if (sca_rel->r_pcrel ...)
+			return NULL;
+#endif
+
+		/* find_reloc_name_given_its_address doesn't do the right thing here, so 
+		   we locate the section and use find_sym_with_value_and_sec_number  */
+		for (i = 0; i < segment->nsects ; i++) {
+			if ((uintptr_t)sca_rel->r_value - section_hdr[i].addr < section_hdr[i].size) {
+				int off = 0;
+				name = find_sym_with_value_and_sec_number(sca_rel->r_value, i + 1, &off);
+				if (name) {
+					*addend += off;
+					 break;
+				}
+			}
+		}
+		if (!name)
+			error("Fully implement R_SCATTERED! r_address=%#x r_type=%#x r_length=%d r_pcrel=%d r_value=%#x\n", 
+				  (int)sca_rel->r_address, sca_rel->r_type, sca_rel->r_length, sca_rel->r_pcrel, sca_rel->r_value);
+	}
+	else 
+	{
+		/* ignore debug syms (paranoia). */
+		if (symtab[rel->r_symbolnum].n_type & N_STAB)
+			return NULL;
+
+		/* ignore internal pc relative fixups where both ends are in the text section. */
+		if (rel->r_pcrel && !rel->r_extern && rel->r_symbolnum == 1 /* ASSUMES text */)
+			return NULL;
+
+		/* get the addend, it is in the instruction stream. */
+		*addend = *(int32_t *)(text + rel->r_address);
+		if (rel->r_pcrel)
+			*addend += rel->r_address;
+
+		/* external fixups are easy. */
+		if (rel->r_extern)
+		{
+			if (rel->r_symbolnum >= nb_syms) 
+				error("rel->r_symbolnum (%d) >= nb_syms (%d)", rel->r_symbolnum, nb_syms);
+			name = get_sym_name(&symtab[rel->r_symbolnum]);
+		}
+		else
+		{
+			/* sanity checks. */
+			if (rel->r_symbolnum == 0xffffff)
+				return NULL;
+			if (rel->r_symbolnum > segment->nsects)
+				error("sectnum (%d) > segment->nsects (%d)", rel->r_symbolnum, segment->nsects);
+			if (rel->r_pcrel) 
+				error("internal pcrel fixups not implemented");
+
+			/* search for the symbol. */
+			name = find_sym_with_value_and_sec_number(*addend, rel->r_symbolnum, addend);
+		}
+	}
+    return name;
+}
+#endif /* HOST_I386 */
+
+/* Used by dyngen common code */
+static const char * get_rel_sym_name(EXE_RELOC * rel)
+{
+	int sslide;
+#if defined(HOST_I386)
+	return get_rel_sym_name_and_addend(rel, &sslide);
+#else
+	return get_reloc_name( rel, &sslide);
+#endif
+}
+
+/* Used by dyngen common code */
+static host_ulong get_rel_offset(EXE_RELOC *rel)
+{
+	struct scattered_relocation_info * sca_rel = (struct scattered_relocation_info*)rel;
+    if(R_SCATTERED & rel->r_address)
+		return sca_rel->r_address;
+	else
+		return rel->r_address;
+}
+
+/* load a mach-o object file */
+int load_object(const char *filename)
+{
+	int fd;
+	unsigned int offset_to_segment = 0;
+    unsigned int offset_to_dysymtab = 0;
+    unsigned int offset_to_symtab = 0;
+    struct load_command lc;
+    unsigned int i, j;
+	EXE_SYM *sym;
+	struct nlist *syment;
+    
+	fd = open(filename, O_RDONLY
+#ifdef O_BINARY
+                  | O_BINARY
+#endif
+                  );
+    if (fd < 0) 
+        error("can't open file '%s'", filename);
+		
+    /* Read Mach header.  */
+    if (read(fd, &mach_hdr, sizeof (mach_hdr)) != sizeof (mach_hdr))
+        error("unable to read file header");
+
+    /* Check Mach identification.  */
+    if (!check_mach_header(mach_hdr)) {
+        error("bad Mach header");
+    }
+
+#if defined(HOST_PPC)
+    if (mach_hdr.cputype != CPU_TYPE_POWERPC)
+#elif defined(HOST_I386)
+    if (mach_hdr.cputype != CPU_TYPE_X86)
+#else
+#error unsupported host
+#endif 
+        error("Unsupported CPU");
+        
+    if (mach_hdr.filetype != MH_OBJECT)
+        error("Unsupported Mach Object");
+    
+    /* read segment headers */
+    for(i=0, j=sizeof(mach_hdr); i<mach_hdr.ncmds ; i++)
+    {
+        if(read(fd, &lc, sizeof(struct load_command)) != sizeof(struct load_command))
+            error("unable to read load_command");
+        if(lc.cmd == LC_SEGMENT)
+        {
+            offset_to_segment = j;
+            lseek(fd, offset_to_segment, SEEK_SET);
+            segment = malloc(sizeof(struct segment_command));
+            if(read(fd, segment, sizeof(struct segment_command)) != sizeof(struct segment_command))
+                error("unable to read LC_SEGMENT");
+        }
+        if(lc.cmd == LC_DYSYMTAB)
+        {
+            offset_to_dysymtab = j;
+            lseek(fd, offset_to_dysymtab, SEEK_SET);
+            dysymtabcmd = malloc(sizeof(struct dysymtab_command));
+            if(read(fd, dysymtabcmd, sizeof(struct dysymtab_command)) != sizeof(struct dysymtab_command))
+                error("unable to read LC_DYSYMTAB");
+        }
+        if(lc.cmd == LC_SYMTAB)
+        {
+            offset_to_symtab = j;
+            lseek(fd, offset_to_symtab, SEEK_SET);
+            symtabcmd = malloc(sizeof(struct symtab_command));
+            if(read(fd, symtabcmd, sizeof(struct symtab_command)) != sizeof(struct symtab_command))
+                error("unable to read LC_SYMTAB");
+        }
+        j+=lc.cmdsize;
+
+        lseek(fd, j, SEEK_SET);
+    }
+
+    if(!segment)
+        error("unable to find LC_SEGMENT");
+
+    /* read section headers */
+    section_hdr = load_data(fd, offset_to_segment + sizeof(struct segment_command), segment->nsects * sizeof(struct section));
+
+    /* read all section data */
+    sdata = (uint8_t **)malloc(sizeof(void *) * segment->nsects);
+    memset(sdata, 0, sizeof(void *) * segment->nsects);
+    
+	/* Load the data in section data */
+	for(i = 0; i < segment->nsects; i++) {
+        sdata[i] = load_data(fd, section_hdr[i].offset, section_hdr[i].size);
+    }
+	
+    /* text section */
+	text_sec_hdr = find_mach_sec_hdr(section_hdr, segment->nsects, SEG_TEXT, SECT_TEXT);
+	i = find_mach_sec_index(section_hdr, segment->nsects, SEG_TEXT, SECT_TEXT);
+	if (i == -1 || !text_sec_hdr)
+        error("could not find __TEXT,__text section");
+    text = sdata[i];
+	
+    /* Make sure dysym was loaded */
+    if(!(int)dysymtabcmd)
+        error("could not find __DYSYMTAB segment");
+    
+    /* read the table of content of the indirect sym */
+    tocdylib = load_data( fd, dysymtabcmd->indirectsymoff, dysymtabcmd->nindirectsyms * sizeof(uint32_t) );
+    
+    /* Make sure symtab was loaded  */
+    if(!(int)symtabcmd)
+        error("could not find __SYMTAB segment");
+    nb_syms = symtabcmd->nsyms;
+
+    symtab_std = load_data(fd, symtabcmd->symoff, symtabcmd->nsyms * sizeof(struct nlist));
+    strtab = load_data(fd, symtabcmd->stroff, symtabcmd->strsize);
+	
+	symtab = malloc(sizeof(EXE_SYM) * nb_syms);
+	
+	/* Now transform the symtab, to an extended version, with the sym size, and the C name */
+	for(i = 0, sym = symtab, syment = symtab_std; i < nb_syms; i++, sym++, syment++) {
+        struct nlist *sym_cur, *sym_next = 0;
+        unsigned int j;
+		memset(sym, 0, sizeof(*sym));
+		
+		if ( syment->n_type & N_STAB ) /* Debug symbols are skipped */
+            continue;
+			
+		memcpy(sym, syment, sizeof(*syment));
+
+#if defined(VBOX)
+        /* don't bother calcing size of internal symbol local symbols. */
+        if (strstart(find_str_by_index(sym->n_un.n_strx), ".L", NULL)) {
+            sym->st_size = 0;
+            continue;
+        }
+#endif 
+			
+		/* Find the following symbol in order to get the current symbol size */
+        for (j = 0, sym_cur = symtab_std; j < nb_syms; j++, sym_cur++) {
+            if (    sym_cur->n_sect != /*syment->n_sect*/ 1
+                ||  (sym_cur->n_type & N_STAB)
+                ||  sym_cur->n_value <= syment->n_value)
+                continue;
+            if (     sym_next
+                &&   sym_next->n_value <= sym_cur->n_value)
+                continue;
+#if defined(HOST_I386)
+            /* Ignore local labels (.Lxxx). */
+            if (strstart(find_str_by_index(sym_cur->n_un.n_strx), ".L", NULL))
+                continue;
+#endif 
+            /* a good one */
+            sym_next = sym_cur;
+        }
+		if(sym_next)
+            sym->st_size = sym_next->n_value - sym->st_value;
+		else
+            sym->st_size = text_sec_hdr->size - sym->st_value;
+	}
+	
+    /* Find Reloc */
+    relocs = load_data(fd, text_sec_hdr->reloff, text_sec_hdr->nreloc * sizeof(struct relocation_info));
+    nb_relocs = text_sec_hdr->nreloc;
+
+	close(fd);
+	return 0;
+}
+
+#endif /* CONFIG_FORMAT_MACH */
+
+#ifdef CONFIG_FORMAT_AOUT
+
+struct exec *aout_hdr;
+struct nlist *symtab_std;
+char *strtab;
+
+
+/* Utility functions */
+
+static inline char *find_str_by_index(int index)
+{
+    return strtab+index;
+}
+
+/* Used by dyngen common code */
+static char *get_sym_name(EXE_SYM *sym)
+{
+    char *name = find_str_by_index(sym->n_un.n_strx);
+
+    if (sym->n_type & N_STAB) /* Debug symbols are ignored */
+        return "debug";
+    if (name && name[0] == '_')
+        return name + 1;
+    return name;
+}
+
+static int type_to_sec_number(unsigned type)
+{
+    switch (type)
+    {
+        case 0:       case 0     |N_EXT:    case N_WEAKU:   return N_UNDF;
+        case N_ABS:   case N_ABS |N_EXT:    case N_WEAKA:   return N_ABS;
+        case N_TEXT:  case N_TEXT|N_EXT:    case N_WEAKT:   return N_TEXT;
+        case N_DATA:  case N_DATA|N_EXT:    case N_WEAKD:   return N_DATA;
+        case N_BSS:   case N_BSS |N_EXT:    case N_WEAKB:   return N_BSS;
+        case N_SETA:  case N_SETA|N_EXT:                    return N_SETA;
+        case N_SETT:  case N_SETT|N_EXT:                    return N_SETT;
+        case N_SETD:  case N_SETD|N_EXT:                    return N_SETD;
+
+        default:
+          return type;
+    }
+}
+
+/* find a sym name given its value, in a section number */
+static const char *find_sym_with_value_and_sec_number(long value, int sec, int *offset)
+{
+    int i, ret = -1;
+	
+    for (i = 0; i < nb_syms; i++) {
+        if (    !(symtab[i].n_type & N_STAB)
+            &&  type_to_sec_number(symtab[i].n_type) == sec
+            &&  symtab[i].st_value <= value
+            &&  (   ret < 0
+                 || symtab[i].st_value >= symtab[ret].st_value)) {
+            ret = i;
+        }
+    }
+    if (ret < 0) {
+        *offset = 0;
+        return 0;
+    }
+    *offset = value - symtab[ret].st_value;
+    return get_sym_name(&symtab[ret]);
+}
+
+static const char *get_rel_sym_name_and_addend(EXE_RELOC *rel, int *sslide)
+{
+    int sec;
+    int off;
+
+    *sslide = 0;
+
+    if (rel->r_extern)
+    {
+        /* ignore debug sym */
+        if (symtab[rel->r_symbolnum].n_type & N_STAB)
+            return 0;
+
+        /* The intruction contains the addend. */
+        off = *(uint32_t *)(text + rel->r_address);
+        if (rel->r_pcrel)
+            off += rel->r_address;
+        *sslide = off;
+        return get_sym_name(&symtab[rel->r_symbolnum]);
+    }
+    if (rel->r_symbolnum == 0xffffff)
+        return 0;
+
+    sec = rel->r_symbolnum & ~N_EXT;
+    /* sanity */
+    switch (sec)
+    {
+        case N_TEXT: case N_DATA: case N_BSS: case N_ABS: break;
+        default: error("invalid section %d", sec);
+    }
+
+    /* The intruction contains the addend. */
+    off = *(uint32_t *)(text + rel->r_address);
+    if (rel->r_pcrel)
+        off += rel->r_address;
+			
+    /* search it in the full symbol list, if not found */
+    return find_sym_with_value_and_sec_number(off, sec, sslide);
+}
+
+/* Used by dyngen common code */
+static const char * get_rel_sym_name(EXE_RELOC *rel)
+{
+    int ignored;
+    return get_rel_sym_name_and_addend(rel, &ignored);
+}
+
+/* Used by dyngen common code */
+static host_ulong get_rel_offset(EXE_RELOC *rel)
+{
+    return rel->r_address;
+}
+
+/* load an a.out object file */
+int load_object(const char *filename)
+{
+    FILE *pf;
+    long file_size;
+    unsigned i;
+    EXE_SYM *dst_sym;
+    struct nlist *src_sym;
+
+    /*
+     * Open the file and validate the header.
+     */
+    pf = fopen(filename, "rb");
+    if (!pf)
+        error("can't open file '%s'", filename);
+
+    /* we're optimistic, read the entire file first. */
+    if (fseek(pf, 0, SEEK_END) != 0)
+      error("Input file '%s' is not seekable", filename);
+    file_size = ftell(pf);
+    fseek(pf, 0L, SEEK_SET);
+
+    aout_hdr = malloc(file_size + 1);
+    if (!aout_hdr)
+        error("malloc(%ld) failed", file_size + 1);
+    if (fread(aout_hdr, 1, file_size, pf) != file_size)
+        error("error reading '%s'", filename);
+    fclose(pf);
+
+    /* validate the header. */
+    if (N_MAGIC(*aout_hdr) != OMAGIC)
+        error("unknown magic: %lo", N_MAGIC(*aout_hdr));
+    if (N_MACHTYPE(*aout_hdr) != M_386 && N_MACHTYPE(*aout_hdr) != 0)
+        error("unsupported machtype: %d", N_MACHTYPE(*aout_hdr));
+
+    /* setup globals. */
+    strtab = (char *)((uint8_t *)aout_hdr + N_STROFF(*aout_hdr));
+    symtab_std = (struct nlist *)((uint8_t *)aout_hdr + N_SYMOFF(*aout_hdr));
+
+    relocs = (struct relocation_info *)((uint8_t *)aout_hdr + N_TRELOFF(*aout_hdr));
+    nb_syms = aout_hdr->a_syms / sizeof(struct nlist);
+    text_shndx = 1;
+    text = (uint8_t *)aout_hdr + N_TXTOFF(*aout_hdr);
+    nb_relocs = aout_hdr->a_trsize / sizeof(relocs[0]);
+
+    /*
+     * Now transform the symtab, to an extended version, with the sym size, and the C name
+     */
+    src_sym = symtab_std;
+    dst_sym = symtab = malloc(sizeof(EXE_SYM) * nb_syms);
+    if (!dst_sym)
+        error("malloc(%zd) failed", sizeof(EXE_SYM) * nb_syms);
+    for (i = 0; i < nb_syms; i++, src_sym++, dst_sym++) {
+        struct nlist *sym_next = NULL;
+        struct nlist *sym_cur;
+        unsigned sec;
+        unsigned j;
+
+        /* copy the symbol and find the name. */
+        dst_sym->n_un.n_strx = src_sym->n_un.n_strx;
+        dst_sym->n_type = src_sym->n_type;
+        dst_sym->n_other = src_sym->n_other;
+        dst_sym->n_desc = src_sym->n_desc;
+        dst_sym->st_value = src_sym->n_value;
+        dst_sym->st_size = 0;
+        if (src_sym->n_type & N_STAB)
+            continue; /* skip debug symbols. */
+
+        /* Find the following symbol in order to get the current symbol size */
+        sec = type_to_sec_number(dst_sym->n_type);
+        for (j = 0, sym_cur = symtab_std; j < nb_syms; j++, sym_cur++) {
+            if (    type_to_sec_number(sym_cur->n_type) != sec
+                ||  (sym_cur->n_type & N_STAB)
+                ||  sym_cur->n_value <= dst_sym->st_value)
+                continue;
+            if (     sym_next
+                &&   sym_next->n_value <= sym_cur->n_value)
+                 continue;
+            /* good one */
+            sym_next = sym_cur;
+        }
+        if (sym_next)
+            dst_sym->st_size = sym_next->n_value - dst_sym->st_value;
+        else
+            dst_sym->st_size = aout_hdr->a_text - dst_sym->st_value;
+    }
+	
+    return 0;
+}
+
+#endif /* CONFIG_FORMAT_AOUT */
+
+
+void get_reloc_expr(char *name, int name_size, const char *sym_name)
+{
+    const char *p;
+
+    if (strstart(sym_name, "__op_param", &p)) {
+        snprintf(name, name_size, "param%s", p);
+    } else if (strstart(sym_name, "__op_gen_label", &p)) {
+        snprintf(name, name_size, "gen_labels[param%s]", p);
+    } else {
+#ifdef HOST_SPARC
+        if (sym_name[0] == '.')
+            snprintf(name, name_size,
+                     "(long)(&__dot_%s)",
+                     sym_name + 1);
+        else
+#endif
+            snprintf(name, name_size, "(long)(&%s)", sym_name);
+    }
+}
+
+#ifdef HOST_IA64
+
+#define PLT_ENTRY_SIZE	16	/* 1 bundle containing "brl" */
+
+struct plt_entry {
+    struct plt_entry *next;
+    const char *name;
+    unsigned long addend;
+} *plt_list;
+
+static int
+get_plt_index (const char *name, unsigned long addend)
+{
+    struct plt_entry *plt, *prev= NULL;
+    int index = 0;
+
+    /* see if we already have an entry for this target: */
+    for (plt = plt_list; plt; ++index, prev = plt, plt = plt->next)
+	if (strcmp(plt->name, name) == 0 && plt->addend == addend)
+	    return index;
+
+    /* nope; create a new PLT entry: */
+
+    plt = malloc(sizeof(*plt));
+    if (!plt) {
+	perror("malloc");
+	exit(1);
+    }
+    memset(plt, 0, sizeof(*plt));
+    plt->name = strdup(name);
+    plt->addend = addend;
+
+    /* append to plt-list: */
+    if (prev)
+	prev->next = plt;
+    else
+	plt_list = plt;
+    return index;
+}
+
+#endif
+
+#ifdef HOST_ARM
+
+int arm_emit_ldr_info(const char *name, unsigned long start_offset,
+                      FILE *outfile, uint8_t *p_start, uint8_t *p_end,
+                      ELF_RELOC *relocs, int nb_relocs)
+{
+    uint8_t *p;
+    uint32_t insn;
+    int offset, min_offset, pc_offset, data_size, spare, max_pool;
+    uint8_t data_allocated[1024];
+    unsigned int data_index;
+    int type;
+    
+    memset(data_allocated, 0, sizeof(data_allocated));
+    
+    p = p_start;
+    min_offset = p_end - p_start;
+    spare = 0x7fffffff;
+    while (p < p_start + min_offset) {
+        insn = get32((uint32_t *)p);
+        /* TODO: Armv5e ldrd.  */
+        /* TODO: VFP load.  */
+        if ((insn & 0x0d5f0000) == 0x051f0000) {
+            /* ldr reg, [pc, #im] */
+            offset = insn & 0xfff;
+            if (!(insn & 0x00800000))
+                offset = -offset;
+            max_pool = 4096;
+            type = 0;
+        } else if ((insn & 0x0e5f0f00) == 0x0c1f0100) {
+            /* FPA ldf.  */
+            offset = (insn & 0xff) << 2;
+            if (!(insn & 0x00800000))
+                offset = -offset;
+            max_pool = 1024;
+            type = 1;
+        } else if ((insn & 0x0fff0000) == 0x028f0000) {
+            /* Some gcc load a doubleword immediate with
+               add regN, pc, #imm
+               ldmia regN, {regN, regM}
+               Hope and pray the compiler never generates somethin like
+               add reg, pc, #imm1; ldr reg, [reg, #-imm2]; */
+            int r;
+
+            r = (insn & 0xf00) >> 7;
+            offset = ((insn & 0xff) >> r) | ((insn & 0xff) << (32 - r));
+            max_pool = 1024;
+            type = 2;
+        } else {
+            max_pool = 0;
+            type = -1;
+        }
+        if (type >= 0) {
+            /* PC-relative load needs fixing up.  */
+            if (spare > max_pool - offset)
+                spare = max_pool - offset;
+            if ((offset & 3) !=0)
+                error("%s:%04x: pc offset must be 32 bit aligned", 
+                      name, start_offset + p - p_start);
+            if (offset < 0)
+                error("%s:%04x: Embedded literal value",
+                      name, start_offset + p - p_start);
+            pc_offset = p - p_start + offset + 8;
+            if (pc_offset <= (p - p_start) || 
+                pc_offset >= (p_end - p_start))
+                error("%s:%04x: pc offset must point inside the function code", 
+                      name, start_offset + p - p_start);
+            if (pc_offset < min_offset)
+                min_offset = pc_offset;
+            if (outfile) {
+                /* The intruction position */
+                fprintf(outfile, "    arm_ldr_ptr->ptr = gen_code_ptr + %d;\n", 
+                        p - p_start);
+                /* The position of the constant pool data.  */
+                data_index = ((p_end - p_start) - pc_offset) >> 2;
+                fprintf(outfile, "    arm_ldr_ptr->data_ptr = arm_data_ptr - %d;\n", 
+                        data_index);
+                fprintf(outfile, "    arm_ldr_ptr->type = %d;\n", type);
+                fprintf(outfile, "    arm_ldr_ptr++;\n");
+            }
+        }
+        p += 4;
+    }
+
+    /* Copy and relocate the constant pool data.  */
+    data_size = (p_end - p_start) - min_offset;
+    if (data_size > 0 && outfile) {
+        spare += min_offset;
+        fprintf(outfile, "    arm_data_ptr -= %d;\n", data_size >> 2);
+        fprintf(outfile, "    arm_pool_ptr -= %d;\n", data_size);
+        fprintf(outfile, "    if (arm_pool_ptr > gen_code_ptr + %d)\n"
+                         "        arm_pool_ptr = gen_code_ptr + %d;\n",
+                         spare, spare);
+
+        data_index = 0;
+        for (pc_offset = min_offset;
+             pc_offset < p_end - p_start;
+             pc_offset += 4) {
+
+            ELF_RELOC *rel;
+            int i, addend, type;
+            const char *sym_name;
+            char relname[1024];
+
+            /* data value */
+            addend = get32((uint32_t *)(p_start + pc_offset));
+            relname[0] = '\0';
+            for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                if (rel->r_offset == (pc_offset + start_offset)) {
+                    sym_name = get_rel_sym_name(rel);
+                    /* the compiler leave some unnecessary references to the code */
+                    get_reloc_expr(relname, sizeof(relname), sym_name);
+                    type = ELF32_R_TYPE(rel->r_info);
+                    if (type != R_ARM_ABS32)
+                        error("%s: unsupported data relocation", name);
+                    break;
+                }
+            }
+            fprintf(outfile, "    arm_data_ptr[%d] = 0x%x",
+                    data_index, addend);
+            if (relname[0] != '\0')
+                fprintf(outfile, " + %s", relname);
+            fprintf(outfile, ";\n");
+
+            data_index++;
+        }
+    }
+
+    if (p == p_start)
+        goto arm_ret_error;
+    p -= 4;
+    insn = get32((uint32_t *)p);
+    /* The last instruction must be an ldm instruction.  There are several
+       forms generated by gcc:
+        ldmib sp, {..., pc}  (implies a sp adjustment of +4)
+        ldmia sp, {..., pc}
+        ldmea fp, {..., pc} */
+    if ((insn & 0xffff8000) == 0xe99d8000) {
+        if (outfile) {
+            fprintf(outfile,
+                    "    *(uint32_t *)(gen_code_ptr + %d) = 0xe28dd004;\n",
+                    p - p_start);
+        }
+        p += 4;
+    } else if ((insn & 0xffff8000) != 0xe89d8000
+        && (insn & 0xffff8000) != 0xe91b8000) {
+    arm_ret_error:
+        if (!outfile)
+            printf("%s: invalid epilog\n", name);
+    }
+    return p - p_start;
+}
+#endif
+
+
+#define MAX_ARGS 3
+
+/* generate op code */
+void gen_code(const char *name, host_ulong offset, host_ulong size, 
+              FILE *outfile, int gen_switch)
+{
+    int copy_size = 0;
+    uint8_t *p_start, *p_end;
+    host_ulong start_offset;
+    int nb_args, i, n;
+    uint8_t args_present[MAX_ARGS];
+    const char *sym_name, *p;
+    EXE_RELOC *rel;
+
+    /* Compute exact size excluding prologue and epilogue instructions.
+     * Increment start_offset to skip epilogue instructions, then compute
+     * copy_size the indicate the size of the remaining instructions (in
+     * bytes).
+     */
+    p_start = text + offset;
+    p_end = p_start + size;
+    start_offset = offset;
+#if defined(HOST_I386) || defined(HOST_X86_64)
+#if defined(CONFIG_FORMAT_COFF) || defined(CONFIG_FORMAT_AOUT) || defined(CONFIG_FORMAT_MACH)
+    {
+        uint8_t *p;
+        p = p_end - 1;
+        if (p == p_start)
+            error("empty code for %s", name);
+        while (*p != 0xc3) {
+            p--;
+            if (p <= p_start)
+                error("ret or jmp expected at the end of %s", name);
+        }
+        copy_size = p - p_start;
+    }
+#else
+    {
+        int len;
+        len = p_end - p_start;
+        if (len == 0)
+            error("empty code for %s", name);
+        if (p_end[-1] == 0xc3) {
+            len--;
+        } else {
+            error("ret or jmp expected at the end of %s", name);
+        }
+        copy_size = len;
+    }
+#endif    
+#elif defined(HOST_PPC)
+    {
+        uint8_t *p;
+        p = (void *)(p_end - 4);
+        if (p == p_start)
+            error("empty code for %s", name);
+        if (get32((uint32_t *)p) != 0x4e800020)
+            error("blr expected at the end of %s", name);
+        copy_size = p - p_start;
+    }
+#elif defined(HOST_S390)
+    {
+        uint8_t *p;
+        p = (void *)(p_end - 2);
+        if (p == p_start)
+            error("empty code for %s", name);
+        if (get16((uint16_t *)p) != 0x07fe && get16((uint16_t *)p) != 0x07f4)
+            error("br %%r14 expected at the end of %s", name);
+        copy_size = p - p_start;
+    }
+#elif defined(HOST_ALPHA)
+    {
+        uint8_t *p;
+        p = p_end - 4;
+#if 0
+        /* XXX: check why it occurs */
+        if (p == p_start)
+            error("empty code for %s", name);
+#endif
+        if (get32((uint32_t *)p) != 0x6bfa8001)
+            error("ret expected at the end of %s", name);
+        copy_size = p - p_start;	    
+    }
+#elif defined(HOST_IA64)
+    {
+        uint8_t *p;
+        p = (void *)(p_end - 4);
+        if (p == p_start)
+            error("empty code for %s", name);
+        /* br.ret.sptk.many b0;; */
+        /* 08 00 84 00 */
+        if (get32((uint32_t *)p) != 0x00840008)
+            error("br.ret.sptk.many b0;; expected at the end of %s", name);
+	copy_size = p_end - p_start;
+    }
+#elif defined(HOST_SPARC)
+    {
+#define INSN_SAVE       0x9de3a000
+#define INSN_RET        0x81c7e008
+#define INSN_RETL       0x81c3e008
+#define INSN_RESTORE    0x81e80000
+#define INSN_RETURN     0x81cfe008
+#define INSN_NOP        0x01000000
+#define INSN_ADD_SP     0x9c03a000 /* add %sp, nn, %sp */
+#define INSN_SUB_SP     0x9c23a000 /* sub %sp, nn, %sp */
+
+        uint32_t start_insn, end_insn1, end_insn2;
+        uint8_t *p;
+        p = (void *)(p_end - 8);
+        if (p <= p_start)
+            error("empty code for %s", name);
+        start_insn = get32((uint32_t *)(p_start + 0x0));
+        end_insn1 = get32((uint32_t *)(p + 0x0));
+        end_insn2 = get32((uint32_t *)(p + 0x4));
+        if (((start_insn & ~0x1fff) == INSN_SAVE) ||
+            (start_insn & ~0x1fff) == INSN_ADD_SP) {
+            p_start += 0x4;
+            start_offset += 0x4;
+            if (end_insn1 == INSN_RET && end_insn2 == INSN_RESTORE)
+                /* SPARC v7: ret; restore; */ ;
+            else if (end_insn1 == INSN_RETURN && end_insn2 == INSN_NOP)
+                /* SPARC v9: return; nop; */ ;
+            else if (end_insn1 == INSN_RETL && (end_insn2 & ~0x1fff) == INSN_SUB_SP)
+                /* SPARC v7: retl; sub %sp, nn, %sp; */ ;
+            else
+
+                error("ret; restore; not found at end of %s", name);
+        } else if (end_insn1 == INSN_RETL && end_insn2 == INSN_NOP) {
+            ;
+        } else {
+            error("No save at the beginning of %s", name);
+        }
+#if 0
+        /* Skip a preceeding nop, if present.  */
+        if (p > p_start) {
+            skip_insn = get32((uint32_t *)(p - 0x4));
+            if (skip_insn == INSN_NOP)
+                p -= 4;
+        }
+#endif
+        copy_size = p - p_start;
+    }
+#elif defined(HOST_SPARC64)
+    {
+#define INSN_SAVE       0x9de3a000
+#define INSN_RET        0x81c7e008
+#define INSN_RETL       0x81c3e008
+#define INSN_RESTORE    0x81e80000
+#define INSN_RETURN     0x81cfe008
+#define INSN_NOP        0x01000000
+#define INSN_ADD_SP     0x9c03a000 /* add %sp, nn, %sp */
+#define INSN_SUB_SP     0x9c23a000 /* sub %sp, nn, %sp */
+
+        uint32_t start_insn, end_insn1, end_insn2, skip_insn;
+        uint8_t *p;
+        p = (void *)(p_end - 8);
+#if 0
+        /* XXX: check why it occurs */
+        if (p <= p_start)
+            error("empty code for %s", name);
+#endif
+        start_insn = get32((uint32_t *)(p_start + 0x0));
+        end_insn1 = get32((uint32_t *)(p + 0x0));
+        end_insn2 = get32((uint32_t *)(p + 0x4));
+        if (((start_insn & ~0x1fff) == INSN_SAVE) ||
+            (start_insn & ~0x1fff) == INSN_ADD_SP) {
+            p_start += 0x4;
+            start_offset += 0x4;
+            if (end_insn1 == INSN_RET && end_insn2 == INSN_RESTORE)
+                /* SPARC v7: ret; restore; */ ;
+            else if (end_insn1 == INSN_RETURN && end_insn2 == INSN_NOP)
+                /* SPARC v9: return; nop; */ ;
+            else if (end_insn1 == INSN_RETL && (end_insn2 & ~0x1fff) == INSN_SUB_SP)
+                /* SPARC v7: retl; sub %sp, nn, %sp; */ ;
+            else
+
+                error("ret; restore; not found at end of %s", name);
+        } else if (end_insn1 == INSN_RETL && end_insn2 == INSN_NOP) {
+            ;
+        } else {
+            error("No save at the beginning of %s", name);
+        }
+        
+        /* Skip a preceeding nop, if present.  */
+        if (p > p_start) {
+            skip_insn = get32((uint32_t *)(p - 0x4));
+            if (skip_insn == 0x01000000)
+                p -= 4;
+        }
+        
+        copy_size = p - p_start;
+    }
+#elif defined(HOST_ARM)
+    {
+        uint32_t insn;
+
+        if ((p_end - p_start) <= 16)
+            error("%s: function too small", name);
+        if (get32((uint32_t *)p_start) != 0xe1a0c00d ||
+            (get32((uint32_t *)(p_start + 4)) & 0xffff0000) != 0xe92d0000 ||
+            get32((uint32_t *)(p_start + 8)) != 0xe24cb004)
+            error("%s: invalid prolog", name);
+        p_start += 12;
+        start_offset += 12;
+        insn = get32((uint32_t *)p_start);
+        if ((insn & 0xffffff00) == 0xe24dd000) {
+            /* Stack adjustment.  Assume op uses the frame pointer.  */
+            p_start -= 4;
+            start_offset -= 4;
+        }
+        copy_size = arm_emit_ldr_info(name, start_offset, NULL, p_start, p_end, 
+                                      relocs, nb_relocs);
+    }
+#elif defined(HOST_M68K)
+    {
+        uint8_t *p;
+        p = (void *)(p_end - 2);
+        if (p == p_start)
+            error("empty code for %s", name);
+        /* remove NOP's, probably added for alignment */
+        while ((get16((uint16_t *)p) == 0x4e71) &&
+               (p>p_start)) 
+            p -= 2;
+        if (get16((uint16_t *)p) != 0x4e75)
+            error("rts expected at the end of %s", name);
+        copy_size = p - p_start;
+    }
+#else
+#error unsupported CPU
+#endif
+
+    /* compute the number of arguments by looking at the relocations */
+    for(i = 0;i < MAX_ARGS; i++)
+        args_present[i] = 0;
+
+    for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+        host_ulong offset = get_rel_offset(rel);
+        if (offset >= start_offset &&
+	    offset < start_offset + (p_end - p_start)) {
+            sym_name = get_rel_sym_name(rel);
+            if(!sym_name)
+                continue;
+            if (strstart(sym_name, "__op_param", &p) ||
+                strstart(sym_name, "__op_gen_label", &p)) {
+                n = strtoul(p, NULL, 10);
+                if (n > MAX_ARGS)
+                    error("too many arguments in %s", name);
+                args_present[n - 1] = 1;
+            }
+        }
+    }
+    
+    nb_args = 0;
+    while (nb_args < MAX_ARGS && args_present[nb_args])
+        nb_args++;
+    for(i = nb_args; i < MAX_ARGS; i++) {
+        if (args_present[i])
+            error("inconsistent argument numbering in %s", name);
+    }
+
+    if (gen_switch == 2) {
+        fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size);
+    } else if (gen_switch == 1) {
+
+        /* output C code */
+        fprintf(outfile, "case INDEX_%s: {\n", name);
+        if (nb_args > 0) {
+            fprintf(outfile, "    long ");
+            for(i = 0; i < nb_args; i++) {
+                if (i != 0)
+                    fprintf(outfile, ", ");
+                fprintf(outfile, "param%d", i + 1);
+            }
+            fprintf(outfile, ";\n");
+        }
+#if defined(HOST_IA64)
+        fprintf(outfile, "    extern char %s;\n", name);
+#else
+        fprintf(outfile, "    extern void %s();\n", name);
+#endif
+
+        for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+            host_ulong offset = get_rel_offset(rel);
+            if (offset >= start_offset &&
+                offset < start_offset + (p_end - p_start)) {
+                sym_name = get_rel_sym_name(rel);
+                if(!sym_name)
+                    continue;
+                if (*sym_name && 
+#ifdef VBOX
+                    !strstart(sym_name, "remR3PhysWrite", NULL) &&
+                    !strstart(sym_name, "remR3PhysRead", NULL) &&
+#endif
+                    !strstart(sym_name, "__op_param", NULL) &&
+                    !strstart(sym_name, "__op_jmp", NULL) &&
+                    !strstart(sym_name, "__op_gen_label", NULL)) {
+#if defined(HOST_SPARC)
+		    if (sym_name[0] == '.') {
+			fprintf(outfile,
+				"extern char __dot_%s __asm__(\"%s\");\n",
+				sym_name+1, sym_name);
+			continue;
+		    }
+#endif
+#if defined(__APPLE__)
+/* set __attribute((unused)) on darwin because we wan't to avoid warning when we don't use the symbol */
+                    fprintf(outfile, "extern char %s __attribute__((unused));\n", sym_name);
+#elif defined(HOST_IA64)
+			if (ELF64_R_TYPE(rel->r_info) != R_IA64_PCREL21B)
+				/*
+				 * PCREL21 br.call targets generally
+				 * are out of range and need to go
+				 * through an "import stub".
+				 */
+				fprintf(outfile, "    extern char %s;\n",
+					sym_name);
+#else
+                    /* don't include memcpy here as this external reference wouldn't work anyway! */
+                    if (strcmp(sym_name, "memcpy"))
+                        fprintf(outfile, "extern char %s;\n", sym_name);
+#endif
+                }
+            }
+        }
+
+        fprintf(outfile, "    memcpy(gen_code_ptr, (void *)((char *)&%s+%d), %d);\n",
+					name, (int)(start_offset - offset), copy_size);
+
+        /* emit code offset information */
+        {
+            EXE_SYM *sym;
+            const char *sym_name, *p;
+            unsigned long val;
+            int n;
+
+            for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+                sym_name = get_sym_name(sym);
+                if (strstart(sym_name, "__op_label", &p)) {
+                    uint8_t *ptr;
+                    unsigned long offset;
+                    
+                    /* test if the variable refers to a label inside
+                       the code we are generating */
+#ifdef CONFIG_FORMAT_COFF
+                    if (sym->st_shndx == text_shndx) {
+                        ptr = sdata[coff_text_shndx];
+                    } else if (sym->st_shndx == data_shndx) {
+                        ptr = sdata[coff_data_shndx];
+                    } else {
+                        ptr = NULL;
+                    }
+#elif defined(CONFIG_FORMAT_MACH)
+                    if(!sym->n_sect)
+                        continue;
+                    ptr = sdata[sym->n_sect-1];
+#elif defined(CONFIG_FORMAT_AOUT)
+                    switch (type_to_sec_number(sym->n_type))
+                    {
+                        case N_TEXT:    ptr = (uint8_t *)aout_hdr + N_TXTOFF(*aout_hdr); break;
+                        case N_DATA:    ptr = (uint8_t *)aout_hdr + N_DATOFF(*aout_hdr); break;
+                        default:        ptr = NULL;
+                    }
+#else
+                    ptr = sdata[sym->st_shndx];
+#endif
+                    if (!ptr)
+                        error("__op_labelN in invalid section");
+                    offset = sym->st_value;
+#ifdef CONFIG_FORMAT_MACH
+                    offset -= section_hdr[sym->n_sect-1].addr;
+#elif defined(CONFIG_FORMAT_AOUT)
+                    if (type_to_sec_number(sym->n_type) == N_DATA)
+                        offset -= aout_hdr->a_text;
+#endif
+                    val = *(unsigned long *)(ptr + offset);
+#ifdef ELF_USES_RELOCA
+                    {
+                        int reloc_shndx, nb_relocs1, j;
+
+                        /* try to find a matching relocation */
+                        reloc_shndx = find_reloc(sym->st_shndx);
+                        if (reloc_shndx) {
+                            nb_relocs1 = shdr[reloc_shndx].sh_size / 
+                                shdr[reloc_shndx].sh_entsize;
+                            rel = (ELF_RELOC *)sdata[reloc_shndx];
+                            for(j = 0; j < nb_relocs1; j++) {
+                                if (rel->r_offset == offset) {
+				    val = rel->r_addend;
+                                    break;
+                                }
+				rel++;
+                            }
+                        }
+                    }
+#endif                    
+                    if (val >= start_offset && val <= start_offset + copy_size) {
+                        n = strtol(p, NULL, 10);
+                        fprintf(outfile, "    label_offsets[%d] = %ld + (gen_code_ptr - gen_code_buf);\n", n, (long)(val - start_offset));
+                    }
+                }
+            }
+        }
+
+        /* load parameres in variables */
+        for(i = 0; i < nb_args; i++) {
+            fprintf(outfile, "    param%d = *opparam_ptr++;\n", i + 1);
+        }
+
+        /* patch relocations */
+#if defined(HOST_I386) 
+            {
+                char name[256];
+                int type;
+                int addend;
+                int reloc_offset;
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                host_ulong offset = get_rel_offset(rel);
+                if (offset >= start_offset &&
+		    offset < start_offset + copy_size) {
+#if defined(CONFIG_FORMAT_AOUT) || defined(CONFIG_FORMAT_MACH)
+                    sym_name = get_rel_sym_name_and_addend(rel, &addend);
+#else
+                    sym_name = get_rel_sym_name(rel);
+#endif
+                    if (!sym_name)
+                        continue;
+                    reloc_offset = offset - start_offset;
+                    if (strstart(sym_name, "__op_jmp", &p)) {
+                        int n;
+                        n = strtol(p, NULL, 10);
+                        /* __op_jmp relocations are done at
+                           runtime to do translated block
+                           chaining: the offset of the instruction
+                           needs to be stored */
+                        fprintf(outfile, "    jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n",
+                                n, reloc_offset);
+                        continue;
+                    }
+
+                    get_reloc_expr(name, sizeof(name), sym_name);
+#if !defined(CONFIG_FORMAT_AOUT) && !defined(CONFIG_FORMAT_MACH)
+                    addend = get32((uint32_t *)(text + offset));
+#endif
+#ifdef CONFIG_FORMAT_ELF
+                    type = ELF32_R_TYPE(rel->r_info);
+                    switch(type) {
+                    case R_386_32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
+                                reloc_offset, name, addend);
+                        break;
+                    case R_386_PC32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s - (long)(gen_code_ptr + %d) + %d;\n", 
+                                reloc_offset, name, reloc_offset, addend);
+                        break;
+                    default:
+                        error("unsupported i386 relocation (%d)", type);
+                    }
+#elif defined(CONFIG_FORMAT_COFF)
+                    {
+                        char *temp_name;
+                        int j;
+                        EXE_SYM *sym;
+                        temp_name = get_sym_name(symtab + *(uint32_t *)(rel->r_reloc->r_symndx));
+                        if (!strcmp(temp_name, ".data")) {
+                            for (j = 0, sym = symtab; j < nb_syms; j++, sym++) {
+                                if (strstart(sym->st_name, sym_name, NULL)) {
+                                    addend -= sym->st_value;
+                                }
+                            }
+                        }
+                    }
+                    type = rel->r_type;
+                    switch(type) {
+                    case DIR32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
+                                reloc_offset, name, addend);
+                        break;
+                    case DISP32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s - (long)(gen_code_ptr + %d) + %d -4;\n", 
+                                reloc_offset, name, reloc_offset, addend);
+                        break;
+                    default:
+                        error("unsupported i386 relocation (%d)", type);
+                    }
+#elif defined(CONFIG_FORMAT_AOUT) || defined(CONFIG_FORMAT_MACH)
+                    if (rel->r_pcrel) {
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s - (long)(gen_code_ptr + %d) + %d;\n",
+                                offset - start_offset, name, offset - start_offset, addend);
+                    } else {
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n",
+                                offset - start_offset, name, addend);
+                    }
+                    (void)type;
+#else
+#error unsupport object format
+#endif
+                }
+                }
+            }
+#elif defined(HOST_X86_64)
+            {
+                char name[256];
+                int type;
+                int addend;
+                int reloc_offset;
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                if (rel->r_offset >= start_offset &&
+		    rel->r_offset < start_offset + copy_size) {
+                    sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
+                    get_reloc_expr(name, sizeof(name), sym_name);
+                    type = ELF32_R_TYPE(rel->r_info);
+                    addend = rel->r_addend;
+                    reloc_offset = rel->r_offset - start_offset;
+                    switch(type) {
+                    case R_X86_64_32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = (uint32_t)%s + %d;\n", 
+                                reloc_offset, name, addend);
+                        break;
+                    case R_X86_64_32S:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = (int32_t)%s + %d;\n", 
+                                reloc_offset, name, addend);
+                        break;
+                    case R_X86_64_PC32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s - (long)(gen_code_ptr + %d) + %d;\n", 
+                                reloc_offset, name, reloc_offset, addend);
+                        break;
+#ifdef VBOX /** @todo Re-check the sanity of this */
+                    case R_X86_64_64:
+                        fprintf(outfile, "    *(uint64_t *)(gen_code_ptr + %d) = (uint64_t)%s + %d;\n", 
+                                reloc_offset, name, addend);
+                        break;
+#endif 
+                    default:
+                        error("unsupported X86_64 relocation (%d)", type);
+                    }
+                }
+                }
+            }
+#elif defined(HOST_PPC)
+            {
+#ifdef CONFIG_FORMAT_ELF
+                char name[256];
+                int type;
+                int addend;
+                int reloc_offset;
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                    if (rel->r_offset >= start_offset &&
+			rel->r_offset < start_offset + copy_size) {
+                        sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
+                        reloc_offset = rel->r_offset - start_offset;
+                        if (strstart(sym_name, "__op_jmp", &p)) {
+                            int n;
+                            n = strtol(p, NULL, 10);
+                            /* __op_jmp relocations are done at
+                               runtime to do translated block
+                               chaining: the offset of the instruction
+                               needs to be stored */
+                            fprintf(outfile, "    jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n",
+                                    n, reloc_offset);
+                            continue;
+                        }
+                        
+                        get_reloc_expr(name, sizeof(name), sym_name);
+                        type = ELF32_R_TYPE(rel->r_info);
+                        addend = rel->r_addend;
+                        switch(type) {
+                        case R_PPC_ADDR32:
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
+                                    reloc_offset, name, addend);
+                            break;
+                        case R_PPC_ADDR16_LO:
+                            fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d) = (%s + %d);\n", 
+                                    reloc_offset, name, addend);
+                            break;
+                        case R_PPC_ADDR16_HI:
+                            fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d) = (%s + %d) >> 16;\n", 
+                                    reloc_offset, name, addend);
+                            break;
+                        case R_PPC_ADDR16_HA:
+                            fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d) = (%s + %d + 0x8000) >> 16;\n", 
+                                    reloc_offset, name, addend);
+                            break;
+                        case R_PPC_REL24:
+                            /* warning: must be at 32 MB distancy */
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = (*(uint32_t *)(gen_code_ptr + %d) & ~0x03fffffc) | ((%s - (long)(gen_code_ptr + %d) + %d) & 0x03fffffc);\n", 
+                                    reloc_offset, reloc_offset, name, reloc_offset, addend);
+                            break;
+                        default:
+                            error("unsupported powerpc relocation (%d)", type);
+                        }
+                    }
+                }
+#elif defined(CONFIG_FORMAT_MACH)
+				struct scattered_relocation_info *scarel;
+				struct relocation_info * rel;
+				char final_sym_name[256];
+				const char *sym_name;
+				const char *p;
+				int slide, sslide;
+				int i;
+	
+				for(i = 0, rel = relocs; i < nb_relocs; i++, rel++) {
+					unsigned int offset, length, value = 0;
+					unsigned int type, pcrel, isym = 0;
+					unsigned int usesym = 0;
+				
+					if(R_SCATTERED & rel->r_address) {
+						scarel = (struct scattered_relocation_info*)rel;
+						offset = (unsigned int)scarel->r_address;
+						length = scarel->r_length;
+						pcrel = scarel->r_pcrel;
+						type = scarel->r_type;
+						value = scarel->r_value;
+					} else {
+						value = isym = rel->r_symbolnum;
+						usesym = (rel->r_extern);
+						offset = rel->r_address;
+						length = rel->r_length;
+						pcrel = rel->r_pcrel;
+						type = rel->r_type;
+					}
+				
+					slide = offset - start_offset;
+		
+					if (!(offset >= start_offset && offset < start_offset + size)) 
+						continue;  /* not in our range */
+
+					sym_name = get_reloc_name(rel, &sslide);
+					
+					if(usesym && symtab[isym].n_type & N_STAB)
+						continue; /* don't handle STAB (debug sym) */
+					
+					if (sym_name && strstart(sym_name, "__op_jmp", &p)) {
+						int n;
+						n = strtol(p, NULL, 10);
+						fprintf(outfile, "    jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n",
+							n, slide);
+						continue; /* Nothing more to do */
+					}
+					
+					if(!sym_name)
+					{
+						fprintf(outfile, "/* #warning relocation not handled in %s (value 0x%x, %s, offset 0x%x, length 0x%x, %s, type 0x%x) */\n",
+						           name, value, usesym ? "use sym" : "don't use sym", offset, length, pcrel ? "pcrel":"", type);
+						continue; /* dunno how to handle without final_sym_name */
+					}
+													   
+                                        get_reloc_expr(final_sym_name, sizeof(final_sym_name), 
+                                                       sym_name);
+					switch(type) {
+					case PPC_RELOC_BR24:
+					    if (!strstart(sym_name,"__op_gen_label",&p)) {
+    						fprintf(outfile, "{\n");
+    						fprintf(outfile, "    uint32_t imm = *(uint32_t *)(gen_code_ptr + %d) & 0x3fffffc;\n", slide);
+    						fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = (*(uint32_t *)(gen_code_ptr + %d) & ~0x03fffffc) | ((imm + ((long)%s - (long)gen_code_ptr) + %d) & 0x03fffffc);\n", 
+											slide, slide, name, sslide );
+    						fprintf(outfile, "}\n");
+    					} else {
+							fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = (*(uint32_t *)(gen_code_ptr + %d) & ~0x03fffffc) | (((long)%s - (long)gen_code_ptr - %d) & 0x03fffffc);\n",
+											slide, slide, final_sym_name, slide);
+    					}
+						break;
+					case PPC_RELOC_HI16:
+						fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d + 2) = (%s + %d) >> 16;\n", 
+							slide, final_sym_name, sslide);
+						break;
+					case PPC_RELOC_LO16:
+						fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d + 2) = (%s + %d);\n", 
+					slide, final_sym_name, sslide);
+                            break;
+					case PPC_RELOC_HA16:
+						fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d + 2) = (%s + %d + 0x8000) >> 16;\n", 
+							slide, final_sym_name, sslide);
+						break;
+				default:
+					error("unsupported powerpc relocation (%d)", type);
+				}
+			}
+#else
+#error unsupport object format
+#endif
+            }
+#elif defined(HOST_S390)
+            {
+                char name[256];
+                int type;
+                int addend;
+                int reloc_offset;
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                    if (rel->r_offset >= start_offset &&
+			rel->r_offset < start_offset + copy_size) {
+                        sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
+                        get_reloc_expr(name, sizeof(name), sym_name);
+                        type = ELF32_R_TYPE(rel->r_info);
+                        addend = rel->r_addend;
+                        reloc_offset = rel->r_offset - start_offset;
+                        switch(type) {
+                        case R_390_32:
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
+                                    reloc_offset, name, addend);
+                            break;
+                        case R_390_16:
+                            fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d) = %s + %d;\n", 
+                                    reloc_offset, name, addend);
+                            break;
+                        case R_390_8:
+                            fprintf(outfile, "    *(uint8_t *)(gen_code_ptr + %d) = %s + %d;\n", 
+                                    reloc_offset, name, addend);
+                            break;
+                        default:
+                            error("unsupported s390 relocation (%d)", type);
+                        }
+                    }
+                }
+            }
+#elif defined(HOST_ALPHA)
+            {
+                for (i = 0, rel = relocs; i < nb_relocs; i++, rel++) {
+		    if (rel->r_offset >= start_offset && rel->r_offset < start_offset + copy_size) {
+			int type;
+                        long reloc_offset;
+
+			type = ELF64_R_TYPE(rel->r_info);
+			sym_name = strtab + symtab[ELF64_R_SYM(rel->r_info)].st_name;
+                        reloc_offset = rel->r_offset - start_offset;
+			switch (type) {
+			case R_ALPHA_GPDISP:
+			    /* The gp is just 32 bit, and never changes, so it's easiest to emit it
+			       as an immediate instead of constructing it from the pv or ra.  */
+			    fprintf(outfile, "    immediate_ldah(gen_code_ptr + %ld, gp);\n",
+				    reloc_offset);
+			    fprintf(outfile, "    immediate_lda(gen_code_ptr + %ld, gp);\n",
+				    reloc_offset + (int)rel->r_addend);
+			    break;
+			case R_ALPHA_LITUSE:
+			    /* jsr to literal hint. Could be used to optimize to bsr. Ignore for
+			       now, since some called functions (libc) need pv to be set up.  */
+			    break;
+			case R_ALPHA_HINT:
+			    /* Branch target prediction hint. Ignore for now.  Should be already
+			       correct for in-function jumps.  */
+			    break;
+			case R_ALPHA_LITERAL:
+			    /* Load a literal from the GOT relative to the gp.  Since there's only a
+			       single gp, nothing is to be done.  */
+			    break;
+			case R_ALPHA_GPRELHIGH:
+			    /* Handle fake relocations against __op_param symbol.  Need to emit the
+			       high part of the immediate value instead.  Other symbols need no
+			       special treatment.  */
+			    if (strstart(sym_name, "__op_param", &p))
+				fprintf(outfile, "    immediate_ldah(gen_code_ptr + %ld, param%s);\n",
+					reloc_offset, p);
+			    break;
+			case R_ALPHA_GPRELLOW:
+			    if (strstart(sym_name, "__op_param", &p))
+				fprintf(outfile, "    immediate_lda(gen_code_ptr + %ld, param%s);\n",
+					reloc_offset, p);
+			    break;
+			case R_ALPHA_BRSGP:
+			    /* PC-relative jump. Tweak offset to skip the two instructions that try to
+			       set up the gp from the pv.  */
+			    fprintf(outfile, "    fix_bsr(gen_code_ptr + %ld, (uint8_t *) &%s - (gen_code_ptr + %ld + 4) + 8);\n",
+				    reloc_offset, sym_name, reloc_offset);
+			    break;
+			default:
+			    error("unsupported Alpha relocation (%d)", type);
+			}
+		    }
+                }
+            }
+#elif defined(HOST_IA64)
+            {
+		unsigned long sym_idx;
+		long code_offset;
+                char name[256];
+                int type;
+                long addend;
+
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+		    sym_idx = ELF64_R_SYM(rel->r_info);
+                    if (rel->r_offset < start_offset
+			|| rel->r_offset >= start_offset + copy_size)
+			continue;
+		    sym_name = (strtab + symtab[sym_idx].st_name);
+		    code_offset = rel->r_offset - start_offset;
+		    if (strstart(sym_name, "__op_jmp", &p)) {
+			int n;
+			n = strtol(p, NULL, 10);
+			/* __op_jmp relocations are done at
+			   runtime to do translated block
+			   chaining: the offset of the instruction
+			   needs to be stored */
+			fprintf(outfile, "    jmp_offsets[%d] ="
+				"%ld + (gen_code_ptr - gen_code_buf);\n",
+				n, code_offset);
+			continue;
+		    }
+		    get_reloc_expr(name, sizeof(name), sym_name);
+		    type = ELF64_R_TYPE(rel->r_info);
+		    addend = rel->r_addend;
+		    switch(type) {
+		      case R_IA64_IMM64:
+			  fprintf(outfile,
+				  "    ia64_imm64(gen_code_ptr + %ld, "
+				  "%s + %ld);\n",
+				  code_offset, name, addend);
+			  break;
+		      case R_IA64_LTOFF22X:
+		      case R_IA64_LTOFF22:
+			  fprintf(outfile, "    IA64_LTOFF(gen_code_ptr + %ld,"
+				  " %s + %ld, %d);\n",
+				  code_offset, name, addend,
+				  (type == R_IA64_LTOFF22X));
+			  break;
+		      case R_IA64_LDXMOV:
+			  fprintf(outfile,
+				  "    ia64_ldxmov(gen_code_ptr + %ld,"
+				  " %s + %ld);\n", code_offset, name, addend);
+			  break;
+
+		      case R_IA64_PCREL21B:
+			  if (strstart(sym_name, "__op_gen_label", NULL)) {
+			      fprintf(outfile,
+				      "    ia64_imm21b(gen_code_ptr + %ld,"
+				      " (long) (%s + %ld -\n\t\t"
+				      "((long) gen_code_ptr + %ld)) >> 4);\n",
+				      code_offset, name, addend,
+				      code_offset & ~0xfUL);
+			  } else {
+			      fprintf(outfile,
+				      "    IA64_PLT(gen_code_ptr + %ld, "
+				      "%d);\t/* %s + %ld */\n",
+				      code_offset,
+				      get_plt_index(sym_name, addend),
+				      sym_name, addend);
+			  }
+			  break;
+		      default:
+			  error("unsupported ia64 relocation (0x%x)",
+				type);
+		    }
+                }
+		fprintf(outfile, "    ia64_nop_b(gen_code_ptr + %d);\n",
+			copy_size - 16 + 2);
+            }
+#elif defined(HOST_SPARC)
+            {
+                char name[256];
+                int type;
+                int addend;
+                int reloc_offset;
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                    if (rel->r_offset >= start_offset &&
+			rel->r_offset < start_offset + copy_size) {
+                        sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name;
+                        get_reloc_expr(name, sizeof(name), sym_name);
+                        type = ELF32_R_TYPE(rel->r_info);
+                        addend = rel->r_addend;
+                        reloc_offset = rel->r_offset - start_offset;
+                        switch(type) {
+                        case R_SPARC_32:
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
+                                    reloc_offset, name, addend);
+			    break;
+			case R_SPARC_HI22:
+                            fprintf(outfile,
+				    "    *(uint32_t *)(gen_code_ptr + %d) = "
+				    "((*(uint32_t *)(gen_code_ptr + %d)) "
+				    " & ~0x3fffff) "
+				    " | (((%s + %d) >> 10) & 0x3fffff);\n",
+                                    reloc_offset, reloc_offset, name, addend);
+			    break;
+			case R_SPARC_LO10:
+                            fprintf(outfile,
+				    "    *(uint32_t *)(gen_code_ptr + %d) = "
+				    "((*(uint32_t *)(gen_code_ptr + %d)) "
+				    " & ~0x3ff) "
+				    " | ((%s + %d) & 0x3ff);\n",
+                                    reloc_offset, reloc_offset, name, addend);
+			    break;
+			case R_SPARC_WDISP30:
+			    fprintf(outfile,
+				    "    *(uint32_t *)(gen_code_ptr + %d) = "
+				    "((*(uint32_t *)(gen_code_ptr + %d)) "
+				    " & ~0x3fffffff) "
+				    " | ((((%s + %d) - (long)(gen_code_ptr + %d))>>2) "
+				    "    & 0x3fffffff);\n",
+				    reloc_offset, reloc_offset, name, addend,
+				    reloc_offset);
+			    break;
+                        case R_SPARC_WDISP22:
+                            fprintf(outfile,
+                                    "    *(uint32_t *)(gen_code_ptr + %d) = "
+                                    "((*(uint32_t *)(gen_code_ptr + %d)) "
+                                    " & ~0x3fffff) "
+                                    " | ((((%s + %d) - (long)(gen_code_ptr + %d))>>2) "
+                                    "    & 0x3fffff);\n",
+                                    rel->r_offset - start_offset,
+                                    rel->r_offset - start_offset,
+                                    name, addend,
+                                    rel->r_offset - start_offset);
+                            break;
+                        default:
+                            error("unsupported sparc relocation (%d)", type);
+                        }
+                    }
+                }
+            }
+#elif defined(HOST_SPARC64)
+            {
+                char name[256];
+                int type;
+                int addend;
+                int reloc_offset;
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                    if (rel->r_offset >= start_offset &&
+			rel->r_offset < start_offset + copy_size) {
+                        sym_name = strtab + symtab[ELF64_R_SYM(rel->r_info)].st_name;
+                        get_reloc_expr(name, sizeof(name), sym_name);
+                        type = ELF32_R_TYPE(rel->r_info);
+                        addend = rel->r_addend;
+                        reloc_offset = rel->r_offset - start_offset;
+                        switch(type) {
+                        case R_SPARC_32:
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n",
+                                    reloc_offset, name, addend);
+			    break;
+			case R_SPARC_HI22:
+                            fprintf(outfile,
+				    "    *(uint32_t *)(gen_code_ptr + %d) = "
+				    "((*(uint32_t *)(gen_code_ptr + %d)) "
+				    " & ~0x3fffff) "
+				    " | (((%s + %d) >> 10) & 0x3fffff);\n",
+                                    reloc_offset, reloc_offset, name, addend);
+			    break;
+			case R_SPARC_LO10:
+                            fprintf(outfile,
+				    "    *(uint32_t *)(gen_code_ptr + %d) = "
+				    "((*(uint32_t *)(gen_code_ptr + %d)) "
+				    " & ~0x3ff) "
+				    " | ((%s + %d) & 0x3ff);\n",
+                                    reloc_offset, reloc_offset, name, addend);
+			    break;
+                        case R_SPARC_OLO10:
+                            addend += ELF64_R_TYPE_DATA (rel->r_info);
+                            fprintf(outfile,
+				    "    *(uint32_t *)(gen_code_ptr + %d) = "
+				    "((*(uint32_t *)(gen_code_ptr + %d)) "
+				    " & ~0x3ff) "
+				    " | ((%s + %d) & 0x3ff);\n",
+                                    reloc_offset, reloc_offset, name, addend);
+			    break;
+			case R_SPARC_WDISP30:
+			    fprintf(outfile,
+				    "    *(uint32_t *)(gen_code_ptr + %d) = "
+				    "((*(uint32_t *)(gen_code_ptr + %d)) "
+				    " & ~0x3fffffff) "
+				    " | ((((%s + %d) - (long)(gen_code_ptr + %d))>>2) "
+				    "    & 0x3fffffff);\n",
+				    reloc_offset, reloc_offset, name, addend,
+				    reloc_offset);
+			    break;
+                        case R_SPARC_WDISP22:
+                            fprintf(outfile,
+                                    "    *(uint32_t *)(gen_code_ptr + %d) = "
+                                    "((*(uint32_t *)(gen_code_ptr + %d)) "
+                                    " & ~0x3fffff) "
+                                    " | ((((%s + %d) - (long)(gen_code_ptr + %d))>>2) "
+                                    "    & 0x3fffff);\n",
+                                    reloc_offset, reloc_offset, name, addend,
+				    reloc_offset);
+                            break;
+                        default:
+			    error("unsupported sparc64 relocation (%d) for symbol %s", type, name);
+                        }
+                    }
+                }
+            }
+#elif defined(HOST_ARM)
+            {
+                char name[256];
+                int type;
+                int addend;
+                int reloc_offset;
+                uint32_t insn;
+
+                insn = get32((uint32_t *)(p_start + 4));
+                /* If prologue ends in sub sp, sp, #const then assume
+                   op has a stack frame and needs the frame pointer.  */
+                if ((insn & 0xffffff00) == 0xe24dd000) {
+                    int i;
+                    uint32_t opcode;
+                    opcode = 0xe28db000; /* add fp, sp, #0.  */
+#if 0
+/* ??? Need to undo the extra stack adjustment at the end of the op.
+   For now just leave the stack misaligned and hope it doesn't break anything
+   too important.  */
+                    if ((insn & 4) != 0) {
+                        /* Preserve doubleword stack alignment.  */
+                        fprintf(outfile,
+                                "    *(uint32_t *)(gen_code_ptr + 4)= 0x%x;\n",
+                                insn + 4);
+                        opcode -= 4;
+                    }
+#endif
+                    insn = get32((uint32_t *)(p_start - 4));
+                    /* Calculate the size of the saved registers,
+                       excluding pc.  */
+                    for (i = 0; i < 15; i++) {
+                        if (insn & (1 << i))
+                            opcode += 4;
+                    }
+                    fprintf(outfile,
+                            "    *(uint32_t *)gen_code_ptr = 0x%x;\n", opcode);
+                }
+                arm_emit_ldr_info(name, start_offset, outfile, p_start, p_end,
+                                  relocs, nb_relocs);
+
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                if (rel->r_offset >= start_offset &&
+		    rel->r_offset < start_offset + copy_size) {
+                    sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
+                    /* the compiler leave some unnecessary references to the code */
+                    if (sym_name[0] == '\0')
+                        continue;
+                    get_reloc_expr(name, sizeof(name), sym_name);
+                    type = ELF32_R_TYPE(rel->r_info);
+                    addend = get32((uint32_t *)(text + rel->r_offset));
+                    reloc_offset = rel->r_offset - start_offset;
+                    switch(type) {
+                    case R_ARM_ABS32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
+                                reloc_offset, name, addend);
+                        break;
+                    case R_ARM_PC24:
+                    case R_ARM_JUMP24:
+                    case R_ARM_CALL:
+                        fprintf(outfile, "    arm_reloc_pc24((uint32_t *)(gen_code_ptr + %d), 0x%x, %s);\n", 
+                                reloc_offset, addend, name);
+                        break;
+                    default:
+                        error("unsupported arm relocation (%d)", type);
+                    }
+                }
+                }
+            }
+#elif defined(HOST_M68K)
+            {
+                char name[256];
+                int type;
+                int addend;
+                int reloc_offset;
+		Elf32_Sym *sym;
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                if (rel->r_offset >= start_offset &&
+		    rel->r_offset < start_offset + copy_size) {
+		    sym = &(symtab[ELFW(R_SYM)(rel->r_info)]);
+                    sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
+                    get_reloc_expr(name, sizeof(name), sym_name);
+                    type = ELF32_R_TYPE(rel->r_info);
+                    addend = get32((uint32_t *)(text + rel->r_offset)) + rel->r_addend;
+                    reloc_offset = rel->r_offset - start_offset;
+                    switch(type) {
+                    case R_68K_32:
+		        fprintf(outfile, "    /* R_68K_32 RELOC, offset %x */\n", rel->r_offset) ;
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %#x;\n", 
+                                reloc_offset, name, addend );
+                        break;
+                    case R_68K_PC32:
+		        fprintf(outfile, "    /* R_68K_PC32 RELOC, offset %x */\n", rel->r_offset);
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s - (long)(gen_code_ptr + %#x) + %#x;\n", 
+                                reloc_offset, name, reloc_offset, /*sym->st_value+*/ addend);
+                        break;
+                    default:
+                        error("unsupported m68k relocation (%d)", type);
+                    }
+                }
+                }
+            }
+#else
+#error unsupported CPU
+#endif
+        fprintf(outfile, "    gen_code_ptr += %d;\n", copy_size);
+        fprintf(outfile, "}\n");
+        fprintf(outfile, "break;\n\n");
+    } else {
+        fprintf(outfile, "static inline void gen_%s(", name);
+        if (nb_args == 0) {
+            fprintf(outfile, "void");
+        } else {
+            for(i = 0; i < nb_args; i++) {
+                if (i != 0)
+                    fprintf(outfile, ", ");
+                fprintf(outfile, "long param%d", i + 1);
+            }
+        }
+        fprintf(outfile, ")\n");
+        fprintf(outfile, "{\n");
+        for(i = 0; i < nb_args; i++) {
+            fprintf(outfile, "    *gen_opparam_ptr++ = param%d;\n", i + 1);
+        }
+        fprintf(outfile, "    *gen_opc_ptr++ = INDEX_%s;\n", name);
+        fprintf(outfile, "}\n\n");
+    }
+}
+
+int gen_file(FILE *outfile, int out_type)
+{
+    int i;
+    EXE_SYM *sym;
+
+    if (out_type == OUT_INDEX_OP) {
+        fprintf(outfile, "DEF(end, 0, 0)\n");
+        fprintf(outfile, "DEF(nop, 0, 0)\n");
+        fprintf(outfile, "DEF(nop1, 1, 0)\n");
+        fprintf(outfile, "DEF(nop2, 2, 0)\n");
+        fprintf(outfile, "DEF(nop3, 3, 0)\n");
+        for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+            const char *name;
+            name = get_sym_name(sym);
+            if (strstart(name, OP_PREFIX, NULL)) {
+                gen_code(name, sym->st_value, sym->st_size, outfile, 2);
+            }
+        }
+    } else if (out_type == OUT_GEN_OP) {
+        /* generate gen_xxx functions */
+        fprintf(outfile, "#include \"dyngen-op.h\"\n");
+        for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+            const char *name;
+            name = get_sym_name(sym);
+            if (strstart(name, OP_PREFIX, NULL)) {
+#if defined(CONFIG_FORMAT_ELF) || defined(CONFIG_FORMAT_COFF)
+                if (sym->st_shndx != text_shndx)
+                    error("invalid section for opcode (0x%x)", sym->st_shndx);
+#endif
+                gen_code(name, sym->st_value, sym->st_size, outfile, 0);
+            }
+        }
+        
+    } else {
+        /* generate big code generation switch */
+
+#ifdef HOST_ARM
+        /* We need to know the size of all the ops so we can figure out when
+           to emit constant pools.  This must be consistent with opc.h.  */
+fprintf(outfile,
+"static const uint32_t arm_opc_size[] = {\n"
+"  0,\n" /* end */
+"  0,\n" /* nop */
+"  0,\n" /* nop1 */
+"  0,\n" /* nop2 */
+"  0,\n"); /* nop3 */
+        for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+            const char *name;
+            name = get_sym_name(sym);
+            if (strstart(name, OP_PREFIX, NULL)) {
+                fprintf(outfile, "  %d,\n", sym->st_size);
+            }
+	}
+fprintf(outfile,
+"};\n");
+#endif
+
+fprintf(outfile,
+"int dyngen_code(uint8_t *gen_code_buf,\n"
+"                uint16_t *label_offsets, uint16_t *jmp_offsets,\n"
+"                const uint16_t *opc_buf, const uint32_t *opparam_buf, const long *gen_labels)\n"
+"{\n"
+"    uint8_t *gen_code_ptr;\n"
+"    const uint16_t *opc_ptr;\n"
+"    const uint32_t *opparam_ptr;\n");
+
+#ifdef HOST_ARM
+/* Arm is tricky because it uses constant pools for loading immediate values.
+   We assume (and require) each function is code followed by a constant pool.
+   All the ops are small so this should be ok.  For each op we figure
+   out how much "spare" range we have in the load instructions.  This allows
+   us to insert subsequent ops in between the op and the constant pool,
+   eliminating the neeed to jump around the pool.
+
+   We currently generate:
+   
+   [ For this example we assume merging would move op1_pool out of range.
+     In practice we should be able to combine many ops before the offset
+     limits are reached. ]
+   op1_code;
+   op2_code;
+   goto op3;
+   op2_pool;
+   op1_pool;
+op3:
+   op3_code;
+   ret;
+   op3_pool;
+
+   Ideally we'd put op1_pool before op2_pool, but that requires two passes.
+ */
+fprintf(outfile,
+"    uint8_t *last_gen_code_ptr = gen_code_buf;\n"
+"    LDREntry *arm_ldr_ptr = arm_ldr_table;\n"
+"    uint32_t *arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n"
+/* Initialise the parmissible pool offset to an arbitary large value.  */
+"    uint8_t *arm_pool_ptr = gen_code_buf + 0x1000000;\n");
+#endif
+#ifdef HOST_IA64
+    {
+	long addend, not_first = 0;
+	unsigned long sym_idx;
+	int index, max_index;
+	const char *sym_name;
+	EXE_RELOC *rel;
+
+	max_index = -1;
+	for (i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+	    sym_idx = ELF64_R_SYM(rel->r_info);
+	    sym_name = (strtab + symtab[sym_idx].st_name);
+	    if (strstart(sym_name, "__op_gen_label", NULL))
+		continue;
+	    if (ELF64_R_TYPE(rel->r_info) != R_IA64_PCREL21B)
+		continue;
+
+	    addend = rel->r_addend;
+	    index = get_plt_index(sym_name, addend);
+	    if (index <= max_index)
+		continue;
+	    max_index = index;
+	    fprintf(outfile, "    extern void %s(void);\n", sym_name);
+	}
+
+	fprintf(outfile,
+		"    struct ia64_fixup *plt_fixes = NULL, "
+		"*ltoff_fixes = NULL;\n"
+		"    static long plt_target[] = {\n\t");
+
+	max_index = -1;
+	for (i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+	    sym_idx = ELF64_R_SYM(rel->r_info);
+	    sym_name = (strtab + symtab[sym_idx].st_name);
+	    if (strstart(sym_name, "__op_gen_label", NULL))
+		continue;
+	    if (ELF64_R_TYPE(rel->r_info) != R_IA64_PCREL21B)
+		continue;
+
+	    addend = rel->r_addend;
+	    index = get_plt_index(sym_name, addend);
+	    if (index <= max_index)
+		continue;
+	    max_index = index;
+
+	    if (not_first)
+		fprintf(outfile, ",\n\t");
+	    not_first = 1;
+	    if (addend)
+		fprintf(outfile, "(long) &%s + %ld", sym_name, addend);
+	    else
+		fprintf(outfile, "(long) &%s", sym_name);
+	}
+	fprintf(outfile, "\n    };\n"
+	    "    unsigned int plt_offset[%u] = { 0 };\n", max_index + 1);
+    }
+#endif
+
+fprintf(outfile,
+"\n"
+"    gen_code_ptr = gen_code_buf;\n"
+"    opc_ptr = opc_buf;\n"
+"    opparam_ptr = opparam_buf;\n");
+
+	/* Generate prologue, if needed. */ 
+
+fprintf(outfile,
+"    for(;;) {\n");
+
+#ifdef HOST_ARM
+/* Generate constant pool if needed */
+fprintf(outfile,
+"            if (gen_code_ptr + arm_opc_size[*opc_ptr] >= arm_pool_ptr) {\n"
+"                gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, "
+"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 1);\n"
+"                last_gen_code_ptr = gen_code_ptr;\n"
+"                arm_ldr_ptr = arm_ldr_table;\n"
+"                arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n"
+"                arm_pool_ptr = gen_code_ptr + 0x1000000;\n"
+"            }\n");
+#endif
+
+fprintf(outfile,
+"        switch(*opc_ptr++) {\n");
+
+        for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+            const char *name;
+            name = get_sym_name(sym);
+            if (strstart(name, OP_PREFIX, NULL)) {
+#if 0
+                printf("%4d: %s pos=0x%08x len=%d\n", 
+                       i, name, sym->st_value, sym->st_size);
+#endif
+#if defined(CONFIG_FORMAT_ELF) || defined(CONFIG_FORMAT_COFF)
+                if (sym->st_shndx != text_shndx)
+                    error("invalid section for opcode (0x%x)", sym->st_shndx);
+#endif
+                gen_code(name, sym->st_value, sym->st_size, outfile, 1);
+            }
+        }
+
+fprintf(outfile,
+"        case INDEX_op_nop:\n"
+"            break;\n"
+"        case INDEX_op_nop1:\n"
+"            opparam_ptr++;\n"
+"            break;\n"
+"        case INDEX_op_nop2:\n"
+"            opparam_ptr += 2;\n"
+"            break;\n"
+"        case INDEX_op_nop3:\n"
+"            opparam_ptr += 3;\n"
+"            break;\n"
+"        default:\n"
+"            goto the_end;\n"
+"        }\n");
+
+
+fprintf(outfile,
+"    }\n"
+" the_end:\n"
+);
+#ifdef HOST_IA64
+    fprintf(outfile,
+	    "    {\n"
+	    "      extern char code_gen_buffer[];\n"
+	    "      ia64_apply_fixes(&gen_code_ptr, ltoff_fixes, "
+	    "(uint64_t) code_gen_buffer + 2*(1<<20), plt_fixes,\n\t\t\t"
+	    "sizeof(plt_target)/sizeof(plt_target[0]),\n\t\t\t"
+	    "plt_target, plt_offset);\n    }\n");
+#endif
+
+/* generate some code patching */ 
+#ifdef HOST_ARM
+fprintf(outfile,
+"if (arm_data_ptr != arm_data_table + ARM_LDR_TABLE_SIZE)\n"
+"    gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, "
+"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 0);\n");
+#endif
+    /* flush instruction cache */
+    fprintf(outfile, "flush_icache_range((unsigned long)gen_code_buf, (unsigned long)gen_code_ptr);\n");
+
+    fprintf(outfile, "return gen_code_ptr -  gen_code_buf;\n");
+    fprintf(outfile, "}\n\n");
+
+    }
+
+    return 0;
+}
+
+void usage(void)
+{
+    printf("dyngen (c) 2003 Fabrice Bellard\n"
+           "usage: dyngen [-o outfile] [-c] objfile\n"
+           "Generate a dynamic code generator from an object file\n"
+           "-c     output enum of operations\n"
+           "-g     output gen_op_xx() functions\n"
+           );
+    exit(1);
+}
+
+int main(int argc, char **argv)
+{
+    int c, out_type;
+    const char *filename, *outfilename;
+    FILE *outfile;
+
+    outfilename = "out.c";
+    out_type = OUT_CODE;
+    for(;;) {
+        c = getopt(argc, argv, "ho:cg");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            usage();
+            break;
+        case 'o':
+            outfilename = optarg;
+            break;
+        case 'c':
+            out_type = OUT_INDEX_OP;
+            break;
+        case 'g':
+            out_type = OUT_GEN_OP;
+            break;
+        }
+    }
+    if (optind >= argc)
+        usage();
+    filename = argv[optind];
+    outfile = fopen(outfilename, "w");
+    if (!outfile)
+        error("could not open '%s'", outfilename);
+
+    load_object(filename);
+    gen_file(outfile, out_type);
+    fclose(outfile);
+    return 0;
+}
+
+/* bird added: */
+/*
+ * Local Variables:
+ *  mode: c
+ *  c-file-style: k&r
+ *  c-basic-offset: 4
+ *  tab-width: 4
+ *  indent-tabs-mode: t
+ * End:
+ */
+
Index: /trunk/src/recompiler_new/dyngen.h
===================================================================
--- /trunk/src/recompiler_new/dyngen.h	(revision 13168)
+++ /trunk/src/recompiler_new/dyngen.h	(revision 13168)
@@ -0,0 +1,474 @@
+/*
+ * dyngen helpers
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+int __op_param1, __op_param2, __op_param3;
+#if defined(__sparc__) || defined(__arm__)
+  void __op_gen_label1(){}
+  void __op_gen_label2(){}
+  void __op_gen_label3(){}
+#else
+  int __op_gen_label1, __op_gen_label2, __op_gen_label3;
+#endif
+int __op_jmp0, __op_jmp1, __op_jmp2, __op_jmp3;
+
+#ifdef __i386__
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+}
+#endif
+
+#ifdef __x86_64__
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+}
+#endif
+
+#ifdef __s390__
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+}
+#endif
+
+#ifdef __ia64__
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    while (start < stop) {
+	asm volatile ("fc %0" :: "r"(start));
+	start += 32;
+    }
+    asm volatile (";;sync.i;;srlz.i;;");
+}
+#endif
+
+#ifdef __powerpc__
+
+#define MIN_CACHE_LINE_SIZE 8 /* conservative value */
+
+static void inline flush_icache_range(unsigned long start, unsigned long stop)
+{
+    unsigned long p;
+
+    start &= ~(MIN_CACHE_LINE_SIZE - 1);
+    stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1);
+    
+    for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
+        asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
+    }
+    asm volatile ("sync" : : : "memory");
+    for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
+        asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
+    }
+    asm volatile ("sync" : : : "memory");
+    asm volatile ("isync" : : : "memory");
+}
+#endif
+
+#ifdef __alpha__
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    asm ("imb");
+}
+#endif
+
+#ifdef __sparc__
+
+static void inline flush_icache_range(unsigned long start, unsigned long stop)
+{
+	unsigned long p;
+
+	p = start & ~(8UL - 1UL);
+	stop = (stop + (8UL - 1UL)) & ~(8UL - 1UL);
+
+	for (; p < stop; p += 8)
+		__asm__ __volatile__("flush\t%0" : : "r" (p));
+}
+
+#endif
+
+#ifdef __arm__
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    register unsigned long _beg __asm ("a1") = start;
+    register unsigned long _end __asm ("a2") = stop;
+    register unsigned long _flg __asm ("a3") = 0;
+    __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
+}
+#endif
+
+#ifdef __mc68000
+#include <asm/cachectl.h>
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    cacheflush(start,FLUSH_SCOPE_LINE,FLUSH_CACHE_BOTH,stop-start+16);
+}
+#endif
+
+#ifdef __alpha__
+
+register int gp asm("$29");
+
+static inline void immediate_ldah(void *p, int val) {
+    uint32_t *dest = p;
+    long high = ((val >> 16) + ((val >> 15) & 1)) & 0xffff;
+
+    *dest &= ~0xffff;
+    *dest |= high;
+    *dest |= 31 << 16;
+}
+static inline void immediate_lda(void *dest, int val) {
+    *(uint16_t *) dest = val;
+}
+void fix_bsr(void *p, int offset) {
+    uint32_t *dest = p;
+    *dest &= ~((1 << 21) - 1);
+    *dest |= (offset >> 2) & ((1 << 21) - 1);
+}
+
+#endif /* __alpha__ */
+
+#ifdef __arm__
+
+#define ARM_LDR_TABLE_SIZE 1024
+
+typedef struct LDREntry {
+    uint8_t *ptr;
+    uint32_t *data_ptr;
+    unsigned type:2;
+} LDREntry;
+
+static LDREntry arm_ldr_table[1024];
+static uint32_t arm_data_table[ARM_LDR_TABLE_SIZE];
+
+extern char exec_loop;
+
+static inline void arm_reloc_pc24(uint32_t *ptr, uint32_t insn, int val)
+{
+    *ptr = (insn & ~0xffffff) | ((insn + ((val - (int)ptr) >> 2)) & 0xffffff);
+}
+
+static uint8_t *arm_flush_ldr(uint8_t *gen_code_ptr,
+                              LDREntry *ldr_start, LDREntry *ldr_end, 
+                              uint32_t *data_start, uint32_t *data_end, 
+                              int gen_jmp)
+{
+    LDREntry *le;
+    uint32_t *ptr;
+    int offset, data_size, target;
+    uint8_t *data_ptr;
+    uint32_t insn;
+    uint32_t mask;
+ 
+    data_size = (data_end - data_start) << 2;
+
+    if (gen_jmp) {
+        /* generate branch to skip the data */
+        if (data_size == 0)
+            return gen_code_ptr;
+        target = (long)gen_code_ptr + data_size + 4;
+        arm_reloc_pc24((uint32_t *)gen_code_ptr, 0xeafffffe, target);
+        gen_code_ptr += 4;
+    }
+   
+    /* copy the data */
+    data_ptr = gen_code_ptr;
+    memcpy(gen_code_ptr, data_start, data_size);
+    gen_code_ptr += data_size;
+    
+    /* patch the ldr to point to the data */
+    for(le = ldr_start; le < ldr_end; le++) {
+        ptr = (uint32_t *)le->ptr;
+        offset = ((unsigned long)(le->data_ptr) - (unsigned long)data_start) + 
+            (unsigned long)data_ptr - 
+            (unsigned long)ptr - 8;
+        if (offset < 0) {
+            fprintf(stderr, "Negative constant pool offset\n");
+            abort();
+        }
+        switch (le->type) {
+          case 0: /* ldr */
+            mask = ~0x00800fff;
+            if (offset >= 4096) {
+                fprintf(stderr, "Bad ldr offset\n");
+                abort();
+            }
+            break;
+          case 1: /* ldc */
+            mask = ~0x008000ff;
+            if (offset >= 1024 ) {
+                fprintf(stderr, "Bad ldc offset\n");
+                abort();
+            }
+            break;
+          case 2: /* add */
+            mask = ~0xfff;
+            if (offset >= 1024 ) {
+                fprintf(stderr, "Bad add offset\n");
+                abort();
+            }
+            break;
+          default:
+            fprintf(stderr, "Bad pc relative fixup\n");
+            abort();
+          }
+        insn = *ptr & mask;
+        switch (le->type) {
+          case 0: /* ldr */
+            insn |= offset | 0x00800000;
+            break;
+          case 1: /* ldc */
+            insn |= (offset >> 2) | 0x00800000;
+            break;
+          case 2: /* add */
+            insn |= (offset >> 2) | 0xf00;
+            break;
+          }
+        *ptr = insn;
+    }
+    return gen_code_ptr;
+}
+
+#endif /* __arm__ */
+
+#ifdef __ia64
+
+
+/* Patch instruction with "val" where "mask" has 1 bits. */
+static inline void ia64_patch (uint64_t insn_addr, uint64_t mask, uint64_t val)
+{
+    uint64_t m0, m1, v0, v1, b0, b1, *b = (uint64_t *) (insn_addr & -16);
+#   define insn_mask ((1UL << 41) - 1)
+    unsigned long shift;
+
+    b0 = b[0]; b1 = b[1];
+    shift = 5 + 41 * (insn_addr % 16); /* 5 template, 3 x 41-bit insns */
+    if (shift >= 64) {
+	m1 = mask << (shift - 64);
+	v1 = val << (shift - 64);
+    } else {
+	m0 = mask << shift; m1 = mask >> (64 - shift);
+	v0 = val  << shift; v1 = val >> (64 - shift);
+	b[0] = (b0 & ~m0) | (v0 & m0);
+    }
+    b[1] = (b1 & ~m1) | (v1 & m1);
+}
+
+static inline void ia64_patch_imm60 (uint64_t insn_addr, uint64_t val)
+{
+	ia64_patch(insn_addr,
+		   0x011ffffe000UL,
+		   (  ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
+		    | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */));
+	ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18);
+}
+
+static inline void ia64_imm64 (void *insn, uint64_t val)
+{
+    /* Ignore the slot number of the relocation; GCC and Intel
+       toolchains differed for some time on whether IMM64 relocs are
+       against slot 1 (Intel) or slot 2 (GCC).  */
+    uint64_t insn_addr = (uint64_t) insn & ~3UL;
+
+    ia64_patch(insn_addr + 2,
+	       0x01fffefe000UL,
+	       (  ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
+		| ((val & 0x0000000000200000UL) <<  0) /* bit 21 -> 21 */
+		| ((val & 0x00000000001f0000UL) <<  6) /* bit 16 -> 22 */
+		| ((val & 0x000000000000ff80UL) << 20) /* bit  7 -> 27 */
+		| ((val & 0x000000000000007fUL) << 13) /* bit  0 -> 13 */)
+	    );
+    ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
+}
+
+static inline void ia64_imm60b (void *insn, uint64_t val)
+{
+    /* Ignore the slot number of the relocation; GCC and Intel
+       toolchains differed for some time on whether IMM64 relocs are
+       against slot 1 (Intel) or slot 2 (GCC).  */
+    uint64_t insn_addr = (uint64_t) insn & ~3UL;
+
+    if (val + ((uint64_t) 1 << 59) >= (1UL << 60))
+	fprintf(stderr, "%s: value %ld out of IMM60 range\n",
+		__FUNCTION__, (int64_t) val);
+    ia64_patch_imm60(insn_addr + 2, val);
+}
+
+static inline void ia64_imm22 (void *insn, uint64_t val)
+{
+    if (val + (1 << 21) >= (1 << 22))
+	fprintf(stderr, "%s: value %li out of IMM22 range\n",
+		__FUNCTION__, (int64_t)val);
+    ia64_patch((uint64_t) insn, 0x01fffcfe000UL,
+	       (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
+		| ((val & 0x1f0000UL) <<  6) /* bit 16 -> 22 */
+		| ((val & 0x00ff80UL) << 20) /* bit  7 -> 27 */
+		| ((val & 0x00007fUL) << 13) /* bit  0 -> 13 */));
+}
+
+/* Like ia64_imm22(), but also clear bits 20-21.  For addl, this has
+   the effect of turning "addl rX=imm22,rY" into "addl
+   rX=imm22,r0".  */
+static inline void ia64_imm22_r0 (void *insn, uint64_t val)
+{
+    if (val + (1 << 21) >= (1 << 22))
+	fprintf(stderr, "%s: value %li out of IMM22 range\n",
+		__FUNCTION__, (int64_t)val);
+    ia64_patch((uint64_t) insn, 0x01fffcfe000UL | (0x3UL << 20),
+	       (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
+		| ((val & 0x1f0000UL) <<  6) /* bit 16 -> 22 */
+		| ((val & 0x00ff80UL) << 20) /* bit  7 -> 27 */
+		| ((val & 0x00007fUL) << 13) /* bit  0 -> 13 */));
+}
+
+static inline void ia64_imm21b (void *insn, uint64_t val)
+{
+    if (val + (1 << 20) >= (1 << 21))
+	fprintf(stderr, "%s: value %li out of IMM21b range\n",
+		__FUNCTION__, (int64_t)val);
+    ia64_patch((uint64_t) insn, 0x11ffffe000UL,
+	       (  ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
+		| ((val & 0x0fffffUL) << 13) /* bit  0 -> 13 */));
+}
+
+static inline void ia64_nop_b (void *insn)
+{
+    ia64_patch((uint64_t) insn, (1UL << 41) - 1, 2UL << 37);
+}
+
+static inline void ia64_ldxmov(void *insn, uint64_t val)
+{
+    if (val + (1 << 21) < (1 << 22))
+	ia64_patch((uint64_t) insn, 0x1fff80fe000UL, 8UL << 37);
+}
+
+static inline int ia64_patch_ltoff(void *insn, uint64_t val,
+				   int relaxable)
+{
+    if (relaxable && (val + (1 << 21) < (1 << 22))) {
+	ia64_imm22_r0(insn, val);
+	return 0;
+    }
+    return 1;
+}
+
+struct ia64_fixup {
+    struct ia64_fixup *next;
+    void *addr;			/* address that needs to be patched */
+    long value;
+};
+
+#define IA64_PLT(insn, plt_index)			\
+do {							\
+    struct ia64_fixup *fixup = alloca(sizeof(*fixup));	\
+    fixup->next = plt_fixes;				\
+    plt_fixes = fixup;					\
+    fixup->addr = (insn);				\
+    fixup->value = (plt_index);				\
+    plt_offset[(plt_index)] = 1;			\
+} while (0)
+
+#define IA64_LTOFF(insn, val, relaxable)			\
+do {								\
+    if (ia64_patch_ltoff(insn, val, relaxable)) {		\
+	struct ia64_fixup *fixup = alloca(sizeof(*fixup));	\
+	fixup->next = ltoff_fixes;				\
+	ltoff_fixes = fixup;					\
+	fixup->addr = (insn);					\
+	fixup->value = (val);					\
+    }								\
+} while (0)
+
+static inline void ia64_apply_fixes (uint8_t **gen_code_pp,
+				     struct ia64_fixup *ltoff_fixes,
+				     uint64_t gp,
+				     struct ia64_fixup *plt_fixes,
+				     int num_plts,
+				     unsigned long *plt_target,
+				     unsigned int *plt_offset)
+{
+    static const uint8_t plt_bundle[] = {
+	0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,	/* nop 0; movl r1=GP */
+	0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60,
+
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,	/* nop 0; brl IP */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0
+    };
+    uint8_t *gen_code_ptr = *gen_code_pp, *plt_start, *got_start, *vp;
+    struct ia64_fixup *fixup;
+    unsigned int offset = 0;
+    struct fdesc {
+	long ip;
+	long gp;
+    } *fdesc;
+    int i;
+
+    if (plt_fixes) {
+	plt_start = gen_code_ptr;
+
+	for (i = 0; i < num_plts; ++i) {
+	    if (plt_offset[i]) {
+		plt_offset[i] = offset;
+		offset += sizeof(plt_bundle);
+
+		fdesc = (struct fdesc *) plt_target[i];
+		memcpy(gen_code_ptr, plt_bundle, sizeof(plt_bundle));
+		ia64_imm64 (gen_code_ptr + 0x02, fdesc->gp);
+		ia64_imm60b(gen_code_ptr + 0x12,
+			    (fdesc->ip - (long) (gen_code_ptr + 0x10)) >> 4);
+		gen_code_ptr += sizeof(plt_bundle);
+	    }
+	}
+
+	for (fixup = plt_fixes; fixup; fixup = fixup->next)
+	    ia64_imm21b(fixup->addr,
+			((long) plt_start + plt_offset[fixup->value]
+			 - ((long) fixup->addr & ~0xf)) >> 4);
+    }
+
+    got_start = gen_code_ptr;
+
+    /* First, create the GOT: */
+    for (fixup = ltoff_fixes; fixup; fixup = fixup->next) {
+	/* first check if we already have this value in the GOT: */
+	for (vp = got_start; vp < gen_code_ptr; ++vp)
+	    if (*(uint64_t *) vp == fixup->value)
+		break;
+	if (vp == gen_code_ptr) {
+	    /* Nope, we need to put the value in the GOT: */
+	    *(uint64_t *) vp = fixup->value;
+	    gen_code_ptr += 8;
+	}
+	ia64_imm22(fixup->addr, (long) vp - gp);
+    }
+    /* Keep code ptr aligned. */
+    if ((long) gen_code_ptr & 15)
+	gen_code_ptr += 8;
+    *gen_code_pp = gen_code_ptr;
+}
+
+#endif
Index: /trunk/src/recompiler_new/elf.h
===================================================================
--- /trunk/src/recompiler_new/elf.h	(revision 13168)
+++ /trunk/src/recompiler_new/elf.h	(revision 13168)
@@ -0,0 +1,1164 @@
+#ifndef _QEMU_ELF_H
+#define _QEMU_ELF_H
+
+#include <inttypes.h>
+
+/* 32-bit ELF base types. */
+typedef uint32_t Elf32_Addr;
+typedef uint16_t Elf32_Half;
+typedef uint32_t Elf32_Off;
+typedef int32_t  Elf32_Sword;
+typedef uint32_t Elf32_Word;
+
+/* 64-bit ELF base types. */
+typedef uint64_t Elf64_Addr;
+typedef uint16_t Elf64_Half;
+typedef int16_t	 Elf64_SHalf;
+typedef uint64_t Elf64_Off;
+typedef int32_t	 Elf64_Sword;
+typedef uint32_t Elf64_Word;
+typedef uint64_t Elf64_Xword;
+typedef int64_t  Elf64_Sxword;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL    0
+#define PT_LOAD    1
+#define PT_DYNAMIC 2
+#define PT_INTERP  3
+#define PT_NOTE    4
+#define PT_SHLIB   5
+#define PT_PHDR    6
+#define PT_LOPROC  0x70000000
+#define PT_HIPROC  0x7fffffff
+#define PT_MIPS_REGINFO		0x70000000
+#define PT_MIPS_OPTIONS		0x70000001
+
+/* Flags in the e_flags field of the header */
+/* MIPS architecture level. */
+#define EF_MIPS_ARCH_1		0x00000000	/* -mips1 code.  */
+#define EF_MIPS_ARCH_2		0x10000000	/* -mips2 code.  */
+#define EF_MIPS_ARCH_3		0x20000000	/* -mips3 code.  */
+#define EF_MIPS_ARCH_4		0x30000000	/* -mips4 code.  */
+#define EF_MIPS_ARCH_5		0x40000000	/* -mips5 code.  */
+#define EF_MIPS_ARCH_32		0x50000000	/* MIPS32 code.  */
+#define EF_MIPS_ARCH_64		0x60000000	/* MIPS64 code.  */
+
+/* The ABI of a file. */
+#define EF_MIPS_ABI_O32		0x00001000	/* O32 ABI.  */
+#define EF_MIPS_ABI_O64		0x00002000	/* O32 extended for 64 bit.  */
+
+#define EF_MIPS_NOREORDER 0x00000001
+#define EF_MIPS_PIC       0x00000002
+#define EF_MIPS_CPIC      0x00000004
+#define EF_MIPS_ABI2		0x00000020
+#define EF_MIPS_OPTIONS_FIRST	0x00000080
+#define EF_MIPS_32BITMODE	0x00000100
+#define EF_MIPS_ABI		0x0000f000
+#define EF_MIPS_ARCH      0xf0000000
+
+/* These constants define the different elf file types */
+#define ET_NONE   0
+#define ET_REL    1
+#define ET_EXEC   2
+#define ET_DYN    3
+#define ET_CORE   4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* These constants define the various ELF target machines */
+#define EM_NONE  0
+#define EM_M32   1
+#define EM_SPARC 2
+#define EM_386   3
+#define EM_68K   4
+#define EM_88K   5
+#define EM_486   6   /* Perhaps disused */
+#define EM_860   7
+
+#define EM_MIPS		8	/* MIPS R3000 (officially, big-endian only) */
+
+#define EM_MIPS_RS4_BE 10	/* MIPS R4000 big-endian */
+
+#define EM_PARISC      15	/* HPPA */
+
+#define EM_SPARC32PLUS 18	/* Sun's "v8plus" */
+
+#define EM_PPC	       20	/* PowerPC */
+#define EM_PPC64       21       /* PowerPC64 */
+
+#define EM_ARM		40		/* ARM */
+
+#define EM_SH	       42	/* SuperH */
+
+#define EM_SPARCV9     43	/* SPARC v9 64-bit */
+
+#define EM_IA_64	50	/* HP/Intel IA-64 */
+
+#define EM_X86_64	62	/* AMD x86-64 */
+
+#define EM_S390		22	/* IBM S/390 */
+
+#define EM_CRIS         76      /* Axis Communications 32-bit embedded processor */
+
+#define EM_V850		87	/* NEC v850 */
+
+#define EM_H8_300H      47      /* Hitachi H8/300H */
+#define EM_H8S          48      /* Hitachi H8S     */
+
+/*
+ * This is an interim value that we will use until the committee comes
+ * up with a final number.
+ */
+#define EM_ALPHA	0x9026
+
+/* Bogus old v850 magic number, used by old tools.  */
+#define EM_CYGNUS_V850	0x9080
+
+/*
+ * This is the old interim value for S/390 architecture
+ */
+#define EM_S390_OLD     0xA390
+
+/* This is the info that is needed to parse the dynamic section of the file */
+#define DT_NULL		0
+#define DT_NEEDED	1
+#define DT_PLTRELSZ	2
+#define DT_PLTGOT	3
+#define DT_HASH		4
+#define DT_STRTAB	5
+#define DT_SYMTAB	6
+#define DT_RELA		7
+#define DT_RELASZ	8
+#define DT_RELAENT	9
+#define DT_STRSZ	10
+#define DT_SYMENT	11
+#define DT_INIT		12
+#define DT_FINI		13
+#define DT_SONAME	14
+#define DT_RPATH 	15
+#define DT_SYMBOLIC	16
+#define DT_REL	        17
+#define DT_RELSZ	18
+#define DT_RELENT	19
+#define DT_PLTREL	20
+#define DT_DEBUG	21
+#define DT_TEXTREL	22
+#define DT_JMPREL	23
+#define DT_LOPROC	0x70000000
+#define DT_HIPROC	0x7fffffff
+#define DT_MIPS_RLD_VERSION	0x70000001
+#define DT_MIPS_TIME_STAMP	0x70000002
+#define DT_MIPS_ICHECKSUM	0x70000003
+#define DT_MIPS_IVERSION	0x70000004
+#define DT_MIPS_FLAGS		0x70000005
+  #define RHF_NONE		  0
+  #define RHF_HARDWAY		  1
+  #define RHF_NOTPOT		  2
+#define DT_MIPS_BASE_ADDRESS	0x70000006
+#define DT_MIPS_CONFLICT	0x70000008
+#define DT_MIPS_LIBLIST		0x70000009
+#define DT_MIPS_LOCAL_GOTNO	0x7000000a
+#define DT_MIPS_CONFLICTNO	0x7000000b
+#define DT_MIPS_LIBLISTNO	0x70000010
+#define DT_MIPS_SYMTABNO	0x70000011
+#define DT_MIPS_UNREFEXTNO	0x70000012
+#define DT_MIPS_GOTSYM		0x70000013
+#define DT_MIPS_HIPAGENO	0x70000014
+#define DT_MIPS_RLD_MAP		0x70000016
+
+/* This info is needed when parsing the symbol table */
+#define STB_LOCAL  0
+#define STB_GLOBAL 1
+#define STB_WEAK   2
+
+#define STT_NOTYPE  0
+#define STT_OBJECT  1
+#define STT_FUNC    2
+#define STT_SECTION 3
+#define STT_FILE    4
+
+#define ELF_ST_BIND(x)		((x) >> 4)
+#define ELF_ST_TYPE(x)		(((unsigned int) x) & 0xf)
+#define ELF32_ST_BIND(x)	ELF_ST_BIND(x)
+#define ELF32_ST_TYPE(x)	ELF_ST_TYPE(x)
+#define ELF64_ST_BIND(x)	ELF_ST_BIND(x)
+#define ELF64_ST_TYPE(x)	ELF_ST_TYPE(x)
+
+/* Symbolic values for the entries in the auxiliary table
+   put on the initial stack */
+#define AT_NULL   0	/* end of vector */
+#define AT_IGNORE 1	/* entry should be ignored */
+#define AT_EXECFD 2	/* file descriptor of program */
+#define AT_PHDR   3	/* program headers for program */
+#define AT_PHENT  4	/* size of program header entry */
+#define AT_PHNUM  5	/* number of program headers */
+#define AT_PAGESZ 6	/* system page size */
+#define AT_BASE   7	/* base address of interpreter */
+#define AT_FLAGS  8	/* flags */
+#define AT_ENTRY  9	/* entry point of program */
+#define AT_NOTELF 10	/* program is not ELF */
+#define AT_UID    11	/* real uid */
+#define AT_EUID   12	/* effective uid */
+#define AT_GID    13	/* real gid */
+#define AT_EGID   14	/* effective gid */
+#define AT_PLATFORM 15  /* string identifying CPU for optimizations */
+#define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
+#define AT_CLKTCK 17	/* frequency at which times() increments */
+
+typedef struct dynamic{
+  Elf32_Sword d_tag;
+  union{
+    Elf32_Sword	d_val;
+    Elf32_Addr	d_ptr;
+  } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+  Elf64_Sxword d_tag;		/* entry tag value */
+  union {
+    Elf64_Xword d_val;
+    Elf64_Addr d_ptr;
+  } d_un;
+} Elf64_Dyn;
+
+/* The following are used with relocations */
+#define ELF32_R_SYM(x) ((x) >> 8)
+#define ELF32_R_TYPE(x) ((x) & 0xff)
+
+#define ELF64_R_SYM(i)			((i) >> 32)
+#define ELF64_R_TYPE(i)			((i) & 0xffffffff)
+#define ELF64_R_TYPE_DATA(i)            (((ELF64_R_TYPE(i) >> 8) ^ 0x00800000) - 0x00800000)
+
+#define R_386_NONE	0
+#define R_386_32	1
+#define R_386_PC32	2
+#define R_386_GOT32	3
+#define R_386_PLT32	4
+#define R_386_COPY	5
+#define R_386_GLOB_DAT	6
+#define R_386_JMP_SLOT	7
+#define R_386_RELATIVE	8
+#define R_386_GOTOFF	9
+#define R_386_GOTPC	10
+#define R_386_NUM	11
+
+#define R_MIPS_NONE		0
+#define R_MIPS_16		1
+#define R_MIPS_32		2
+#define R_MIPS_REL32		3
+#define R_MIPS_26		4
+#define R_MIPS_HI16		5
+#define R_MIPS_LO16		6
+#define R_MIPS_GPREL16		7
+#define R_MIPS_LITERAL		8
+#define R_MIPS_GOT16		9
+#define R_MIPS_PC16		10
+#define R_MIPS_CALL16		11
+#define R_MIPS_GPREL32		12
+/* The remaining relocs are defined on Irix, although they are not
+   in the MIPS ELF ABI.  */
+#define R_MIPS_UNUSED1		13
+#define R_MIPS_UNUSED2		14
+#define R_MIPS_UNUSED3		15
+#define R_MIPS_SHIFT5		16
+#define R_MIPS_SHIFT6		17
+#define R_MIPS_64		18
+#define R_MIPS_GOT_DISP		19
+#define R_MIPS_GOT_PAGE		20
+#define R_MIPS_GOT_OFST		21
+/*
+ * The following two relocation types are specified in the MIPS ABI
+ * conformance guide version 1.2 but not yet in the psABI.
+ */
+#define R_MIPS_GOTHI16		22
+#define R_MIPS_GOTLO16		23
+#define R_MIPS_SUB		24
+#define R_MIPS_INSERT_A		25
+#define R_MIPS_INSERT_B		26
+#define R_MIPS_DELETE		27
+#define R_MIPS_HIGHER		28
+#define R_MIPS_HIGHEST		29
+/*
+ * The following two relocation types are specified in the MIPS ABI
+ * conformance guide version 1.2 but not yet in the psABI.
+ */
+#define R_MIPS_CALLHI16		30
+#define R_MIPS_CALLLO16		31
+/*
+ * This range is reserved for vendor specific relocations.
+ */
+#define R_MIPS_LOVENDOR		100
+#define R_MIPS_HIVENDOR		127
+
+
+/*
+ * Sparc ELF relocation types
+ */
+#define	R_SPARC_NONE		0
+#define	R_SPARC_8		1
+#define	R_SPARC_16		2
+#define	R_SPARC_32		3
+#define	R_SPARC_DISP8		4
+#define	R_SPARC_DISP16		5
+#define	R_SPARC_DISP32		6
+#define	R_SPARC_WDISP30		7
+#define	R_SPARC_WDISP22		8
+#define	R_SPARC_HI22		9
+#define	R_SPARC_22		10
+#define	R_SPARC_13		11
+#define	R_SPARC_LO10		12
+#define	R_SPARC_GOT10		13
+#define	R_SPARC_GOT13		14
+#define	R_SPARC_GOT22		15
+#define	R_SPARC_PC10		16
+#define	R_SPARC_PC22		17
+#define	R_SPARC_WPLT30		18
+#define	R_SPARC_COPY		19
+#define	R_SPARC_GLOB_DAT	20
+#define	R_SPARC_JMP_SLOT	21
+#define	R_SPARC_RELATIVE	22
+#define	R_SPARC_UA32		23
+#define R_SPARC_PLT32		24
+#define R_SPARC_HIPLT22		25
+#define R_SPARC_LOPLT10		26
+#define R_SPARC_PCPLT32		27
+#define R_SPARC_PCPLT22		28
+#define R_SPARC_PCPLT10		29
+#define R_SPARC_10		30
+#define R_SPARC_11		31
+#define R_SPARC_64		32
+#define R_SPARC_OLO10           33
+#define R_SPARC_WDISP16		40
+#define R_SPARC_WDISP19		41
+#define R_SPARC_7		43
+#define R_SPARC_5		44
+#define R_SPARC_6		45
+
+/* Bits present in AT_HWCAP, primarily for Sparc32.  */
+
+#define HWCAP_SPARC_FLUSH       1    /* CPU supports flush instruction. */
+#define HWCAP_SPARC_STBAR       2
+#define HWCAP_SPARC_SWAP        4
+#define HWCAP_SPARC_MULDIV      8
+#define HWCAP_SPARC_V9		16
+#define HWCAP_SPARC_ULTRA3	32
+
+/*
+ * 68k ELF relocation types
+ */
+#define R_68K_NONE	0
+#define R_68K_32	1
+#define R_68K_16	2
+#define R_68K_8		3
+#define R_68K_PC32	4
+#define R_68K_PC16	5
+#define R_68K_PC8	6
+#define R_68K_GOT32	7
+#define R_68K_GOT16	8
+#define R_68K_GOT8	9
+#define R_68K_GOT32O	10
+#define R_68K_GOT16O	11
+#define R_68K_GOT8O	12
+#define R_68K_PLT32	13
+#define R_68K_PLT16	14
+#define R_68K_PLT8	15
+#define R_68K_PLT32O	16
+#define R_68K_PLT16O	17
+#define R_68K_PLT8O	18
+#define R_68K_COPY	19
+#define R_68K_GLOB_DAT	20
+#define R_68K_JMP_SLOT	21
+#define R_68K_RELATIVE	22
+
+/*
+ * Alpha ELF relocation types
+ */
+#define R_ALPHA_NONE            0       /* No reloc */
+#define R_ALPHA_REFLONG         1       /* Direct 32 bit */
+#define R_ALPHA_REFQUAD         2       /* Direct 64 bit */
+#define R_ALPHA_GPREL32         3       /* GP relative 32 bit */
+#define R_ALPHA_LITERAL         4       /* GP relative 16 bit w/optimization */
+#define R_ALPHA_LITUSE          5       /* Optimization hint for LITERAL */
+#define R_ALPHA_GPDISP          6       /* Add displacement to GP */
+#define R_ALPHA_BRADDR          7       /* PC+4 relative 23 bit shifted */
+#define R_ALPHA_HINT            8       /* PC+4 relative 16 bit shifted */
+#define R_ALPHA_SREL16          9       /* PC relative 16 bit */
+#define R_ALPHA_SREL32          10      /* PC relative 32 bit */
+#define R_ALPHA_SREL64          11      /* PC relative 64 bit */
+#define R_ALPHA_GPRELHIGH       17      /* GP relative 32 bit, high 16 bits */
+#define R_ALPHA_GPRELLOW        18      /* GP relative 32 bit, low 16 bits */
+#define R_ALPHA_GPREL16         19      /* GP relative 16 bit */
+#define R_ALPHA_COPY            24      /* Copy symbol at runtime */
+#define R_ALPHA_GLOB_DAT        25      /* Create GOT entry */
+#define R_ALPHA_JMP_SLOT        26      /* Create PLT entry */
+#define R_ALPHA_RELATIVE        27      /* Adjust by program base */
+#define R_ALPHA_BRSGP		28
+#define R_ALPHA_TLSGD           29
+#define R_ALPHA_TLS_LDM         30
+#define R_ALPHA_DTPMOD64        31
+#define R_ALPHA_GOTDTPREL       32
+#define R_ALPHA_DTPREL64        33
+#define R_ALPHA_DTPRELHI        34
+#define R_ALPHA_DTPRELLO        35
+#define R_ALPHA_DTPREL16        36
+#define R_ALPHA_GOTTPREL        37
+#define R_ALPHA_TPREL64         38
+#define R_ALPHA_TPRELHI         39
+#define R_ALPHA_TPRELLO         40
+#define R_ALPHA_TPREL16         41
+
+#define SHF_ALPHA_GPREL		0x10000000
+
+
+/* PowerPC relocations defined by the ABIs */
+#define R_PPC_NONE		0
+#define R_PPC_ADDR32		1	/* 32bit absolute address */
+#define R_PPC_ADDR24		2	/* 26bit address, 2 bits ignored.  */
+#define R_PPC_ADDR16		3	/* 16bit absolute address */
+#define R_PPC_ADDR16_LO		4	/* lower 16bit of absolute address */
+#define R_PPC_ADDR16_HI		5	/* high 16bit of absolute address */
+#define R_PPC_ADDR16_HA		6	/* adjusted high 16bit */
+#define R_PPC_ADDR14		7	/* 16bit address, 2 bits ignored */
+#define R_PPC_ADDR14_BRTAKEN	8
+#define R_PPC_ADDR14_BRNTAKEN	9
+#define R_PPC_REL24		10	/* PC relative 26 bit */
+#define R_PPC_REL14		11	/* PC relative 16 bit */
+#define R_PPC_REL14_BRTAKEN	12
+#define R_PPC_REL14_BRNTAKEN	13
+#define R_PPC_GOT16		14
+#define R_PPC_GOT16_LO		15
+#define R_PPC_GOT16_HI		16
+#define R_PPC_GOT16_HA		17
+#define R_PPC_PLTREL24		18
+#define R_PPC_COPY		19
+#define R_PPC_GLOB_DAT		20
+#define R_PPC_JMP_SLOT		21
+#define R_PPC_RELATIVE		22
+#define R_PPC_LOCAL24PC		23
+#define R_PPC_UADDR32		24
+#define R_PPC_UADDR16		25
+#define R_PPC_REL32		26
+#define R_PPC_PLT32		27
+#define R_PPC_PLTREL32		28
+#define R_PPC_PLT16_LO		29
+#define R_PPC_PLT16_HI		30
+#define R_PPC_PLT16_HA		31
+#define R_PPC_SDAREL16		32
+#define R_PPC_SECTOFF		33
+#define R_PPC_SECTOFF_LO	34
+#define R_PPC_SECTOFF_HI	35
+#define R_PPC_SECTOFF_HA	36
+/* Keep this the last entry.  */
+#define R_PPC_NUM		37
+
+/* ARM specific declarations */
+
+/* Processor specific flags for the ELF header e_flags field.  */
+#define EF_ARM_RELEXEC     0x01
+#define EF_ARM_HASENTRY    0x02
+#define EF_ARM_INTERWORK   0x04
+#define EF_ARM_APCS_26     0x08
+#define EF_ARM_APCS_FLOAT  0x10
+#define EF_ARM_PIC         0x20
+#define EF_ALIGN8          0x40		/* 8-bit structure alignment is in use */
+#define EF_NEW_ABI         0x80
+#define EF_OLD_ABI         0x100
+
+/* Additional symbol types for Thumb */
+#define STT_ARM_TFUNC      0xd
+
+/* ARM-specific values for sh_flags */
+#define SHF_ARM_ENTRYSECT  0x10000000   /* Section contains an entry point */
+#define SHF_ARM_COMDEF     0x80000000   /* Section may be multiply defined
+					   in the input to a link step */
+
+/* ARM-specific program header flags */
+#define PF_ARM_SB          0x10000000   /* Segment contains the location
+					   addressed by the static base */
+
+/* ARM relocs.  */
+#define R_ARM_NONE		0	/* No reloc */
+#define R_ARM_PC24		1	/* PC relative 26 bit branch */
+#define R_ARM_ABS32		2	/* Direct 32 bit  */
+#define R_ARM_REL32		3	/* PC relative 32 bit */
+#define R_ARM_PC13		4
+#define R_ARM_ABS16		5	/* Direct 16 bit */
+#define R_ARM_ABS12		6	/* Direct 12 bit */
+#define R_ARM_THM_ABS5		7
+#define R_ARM_ABS8		8	/* Direct 8 bit */
+#define R_ARM_SBREL32		9
+#define R_ARM_THM_PC22		10
+#define R_ARM_THM_PC8		11
+#define R_ARM_AMP_VCALL9	12
+#define R_ARM_SWI24		13
+#define R_ARM_THM_SWI8		14
+#define R_ARM_XPC25		15
+#define R_ARM_THM_XPC22		16
+#define R_ARM_COPY		20	/* Copy symbol at runtime */
+#define R_ARM_GLOB_DAT		21	/* Create GOT entry */
+#define R_ARM_JUMP_SLOT		22	/* Create PLT entry */
+#define R_ARM_RELATIVE		23	/* Adjust by program base */
+#define R_ARM_GOTOFF		24	/* 32 bit offset to GOT */
+#define R_ARM_GOTPC		25	/* 32 bit PC relative offset to GOT */
+#define R_ARM_GOT32		26	/* 32 bit GOT entry */
+#define R_ARM_PLT32		27	/* 32 bit PLT address */
+#define R_ARM_CALL              28
+#define R_ARM_JUMP24            29
+#define R_ARM_GNU_VTENTRY	100
+#define R_ARM_GNU_VTINHERIT	101
+#define R_ARM_THM_PC11		102	/* thumb unconditional branch */
+#define R_ARM_THM_PC9		103	/* thumb conditional branch */
+#define R_ARM_RXPC25		249
+#define R_ARM_RSBREL32		250
+#define R_ARM_THM_RPC22		251
+#define R_ARM_RREL32		252
+#define R_ARM_RABS22		253
+#define R_ARM_RPC24		254
+#define R_ARM_RBASE		255
+/* Keep this the last entry.  */
+#define R_ARM_NUM		256
+
+/* s390 relocations defined by the ABIs */
+#define R_390_NONE		0	/* No reloc.  */
+#define R_390_8			1	/* Direct 8 bit.  */
+#define R_390_12		2	/* Direct 12 bit.  */
+#define R_390_16		3	/* Direct 16 bit.  */
+#define R_390_32		4	/* Direct 32 bit.  */
+#define R_390_PC32		5	/* PC relative 32 bit.	*/
+#define R_390_GOT12		6	/* 12 bit GOT offset.  */
+#define R_390_GOT32		7	/* 32 bit GOT offset.  */
+#define R_390_PLT32		8	/* 32 bit PC relative PLT address.  */
+#define R_390_COPY		9	/* Copy symbol at runtime.  */
+#define R_390_GLOB_DAT		10	/* Create GOT entry.  */
+#define R_390_JMP_SLOT		11	/* Create PLT entry.  */
+#define R_390_RELATIVE		12	/* Adjust by program base.  */
+#define R_390_GOTOFF32		13	/* 32 bit offset to GOT.	 */
+#define R_390_GOTPC		14	/* 32 bit PC rel. offset to GOT.  */
+#define R_390_GOT16		15	/* 16 bit GOT offset.  */
+#define R_390_PC16		16	/* PC relative 16 bit.	*/
+#define R_390_PC16DBL		17	/* PC relative 16 bit shifted by 1.  */
+#define R_390_PLT16DBL		18	/* 16 bit PC rel. PLT shifted by 1.  */
+#define R_390_PC32DBL		19	/* PC relative 32 bit shifted by 1.  */
+#define R_390_PLT32DBL		20	/* 32 bit PC rel. PLT shifted by 1.  */
+#define R_390_GOTPCDBL		21	/* 32 bit PC rel. GOT shifted by 1.  */
+#define R_390_64		22	/* Direct 64 bit.  */
+#define R_390_PC64		23	/* PC relative 64 bit.	*/
+#define R_390_GOT64		24	/* 64 bit GOT offset.  */
+#define R_390_PLT64		25	/* 64 bit PC relative PLT address.  */
+#define R_390_GOTENT		26	/* 32 bit PC rel. to GOT entry >> 1. */
+#define R_390_GOTOFF16		27	/* 16 bit offset to GOT. */
+#define R_390_GOTOFF64		28	/* 64 bit offset to GOT. */
+#define R_390_GOTPLT12		29	/* 12 bit offset to jump slot.	*/
+#define R_390_GOTPLT16		30	/* 16 bit offset to jump slot.	*/
+#define R_390_GOTPLT32		31	/* 32 bit offset to jump slot.	*/
+#define R_390_GOTPLT64		32	/* 64 bit offset to jump slot.	*/
+#define R_390_GOTPLTENT		33	/* 32 bit rel. offset to jump slot.  */
+#define R_390_PLTOFF16		34	/* 16 bit offset from GOT to PLT. */
+#define R_390_PLTOFF32		35	/* 32 bit offset from GOT to PLT. */
+#define R_390_PLTOFF64		36	/* 16 bit offset from GOT to PLT. */
+#define R_390_TLS_LOAD		37	/* Tag for load insn in TLS code. */
+#define R_390_TLS_GDCALL	38	/* Tag for function call in general
+                                           dynamic TLS code.  */
+#define R_390_TLS_LDCALL	39	/* Tag for function call in local
+                                           dynamic TLS code.  */
+#define R_390_TLS_GD32		40	/* Direct 32 bit for general dynamic
+                                           thread local data.  */
+#define R_390_TLS_GD64		41	/* Direct 64 bit for general dynamic
+                                           thread local data.  */
+#define R_390_TLS_GOTIE12	42	/* 12 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_GOTIE32	43	/* 32 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_GOTIE64	44	/* 64 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_LDM32		45	/* Direct 32 bit for local dynamic
+                                           thread local data in LD code.  */
+#define R_390_TLS_LDM64		46	/* Direct 64 bit for local dynamic
+                                           thread local data in LD code.  */
+#define R_390_TLS_IE32		47	/* 32 bit address of GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_IE64		48	/* 64 bit address of GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_IEENT		49	/* 32 bit rel. offset to GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_LE32		50	/* 32 bit negated offset relative to
+                                           static TLS block.  */
+#define R_390_TLS_LE64		51	/* 64 bit negated offset relative to
+                                           static TLS block.  */
+#define R_390_TLS_LDO32		52	/* 32 bit offset relative to TLS
+                                           block.  */
+#define R_390_TLS_LDO64		53	/* 64 bit offset relative to TLS
+                                           block.  */
+#define R_390_TLS_DTPMOD	54	/* ID of module containing symbol.  */
+#define R_390_TLS_DTPOFF	55	/* Offset in TLS block.  */
+#define R_390_TLS_TPOFF		56	/* Negate offset in static TLS
+                                           block.  */
+/* Keep this the last entry.  */
+#define R_390_NUM	57
+
+/* x86-64 relocation types */
+#define R_X86_64_NONE		0	/* No reloc */
+#define R_X86_64_64		1	/* Direct 64 bit  */
+#define R_X86_64_PC32		2	/* PC relative 32 bit signed */
+#define R_X86_64_GOT32		3	/* 32 bit GOT entry */
+#define R_X86_64_PLT32		4	/* 32 bit PLT address */
+#define R_X86_64_COPY		5	/* Copy symbol at runtime */
+#define R_X86_64_GLOB_DAT	6	/* Create GOT entry */
+#define R_X86_64_JUMP_SLOT	7	/* Create PLT entry */
+#define R_X86_64_RELATIVE	8	/* Adjust by program base */
+#define R_X86_64_GOTPCREL	9	/* 32 bit signed pc relative
+					   offset to GOT */
+#define R_X86_64_32		10	/* Direct 32 bit zero extended */
+#define R_X86_64_32S		11	/* Direct 32 bit sign extended */
+#define R_X86_64_16		12	/* Direct 16 bit zero extended */
+#define R_X86_64_PC16		13	/* 16 bit sign extended pc relative */
+#define R_X86_64_8		14	/* Direct 8 bit sign extended  */
+#define R_X86_64_PC8		15	/* 8 bit sign extended pc relative */
+
+#define R_X86_64_NUM		16
+
+/* Legal values for e_flags field of Elf64_Ehdr.  */
+
+#define EF_ALPHA_32BIT		1	/* All addresses are below 2GB */
+
+/* HPPA specific definitions.  */
+
+/* Legal values for e_flags field of Elf32_Ehdr.  */
+
+#define EF_PARISC_TRAPNIL	0x00010000 /* Trap nil pointer dereference.  */
+#define EF_PARISC_EXT		0x00020000 /* Program uses arch. extensions. */
+#define EF_PARISC_LSB		0x00040000 /* Program expects little endian. */
+#define EF_PARISC_WIDE		0x00080000 /* Program expects wide mode.  */
+#define EF_PARISC_NO_KABP	0x00100000 /* No kernel assisted branch
+					      prediction.  */
+#define EF_PARISC_LAZYSWAP	0x00400000 /* Allow lazy swapping.  */
+#define EF_PARISC_ARCH		0x0000ffff /* Architecture version.  */
+
+/* Defined values for `e_flags & EF_PARISC_ARCH' are:  */
+
+#define EFA_PARISC_1_0		    0x020b /* PA-RISC 1.0 big-endian.  */
+#define EFA_PARISC_1_1		    0x0210 /* PA-RISC 1.1 big-endian.  */
+#define EFA_PARISC_2_0		    0x0214 /* PA-RISC 2.0 big-endian.  */
+
+/* Additional section indeces.  */
+
+#define SHN_PARISC_ANSI_COMMON	0xff00	   /* Section for tenatively declared
+					      symbols in ANSI C.  */
+#define SHN_PARISC_HUGE_COMMON	0xff01	   /* Common blocks in huge model.  */
+
+/* Legal values for sh_type field of Elf32_Shdr.  */
+
+#define SHT_PARISC_EXT		0x70000000 /* Contains product specific ext. */
+#define SHT_PARISC_UNWIND	0x70000001 /* Unwind information.  */
+#define SHT_PARISC_DOC		0x70000002 /* Debug info for optimized code. */
+
+/* Legal values for sh_flags field of Elf32_Shdr.  */
+
+#define SHF_PARISC_SHORT	0x20000000 /* Section with short addressing. */
+#define SHF_PARISC_HUGE		0x40000000 /* Section far from gp.  */
+#define SHF_PARISC_SBP		0x80000000 /* Static branch prediction code. */
+
+/* Legal values for ST_TYPE subfield of st_info (symbol type).  */
+
+#define STT_PARISC_MILLICODE	13	/* Millicode function entry point.  */
+
+#define STT_HP_OPAQUE		(STT_LOOS + 0x1)
+#define STT_HP_STUB		(STT_LOOS + 0x2)
+
+/* HPPA relocs.  */
+
+#define R_PARISC_NONE		0	/* No reloc.  */
+#define R_PARISC_DIR32		1	/* Direct 32-bit reference.  */
+#define R_PARISC_DIR21L		2	/* Left 21 bits of eff. address.  */
+#define R_PARISC_DIR17R		3	/* Right 17 bits of eff. address.  */
+#define R_PARISC_DIR17F		4	/* 17 bits of eff. address.  */
+#define R_PARISC_DIR14R		6	/* Right 14 bits of eff. address.  */
+#define R_PARISC_PCREL32	9	/* 32-bit rel. address.  */
+#define R_PARISC_PCREL21L	10	/* Left 21 bits of rel. address.  */
+#define R_PARISC_PCREL17R	11	/* Right 17 bits of rel. address.  */
+#define R_PARISC_PCREL17F	12	/* 17 bits of rel. address.  */
+#define R_PARISC_PCREL14R	14	/* Right 14 bits of rel. address.  */
+#define R_PARISC_DPREL21L	18	/* Left 21 bits of rel. address.  */
+#define R_PARISC_DPREL14R	22	/* Right 14 bits of rel. address.  */
+#define R_PARISC_GPREL21L	26	/* GP-relative, left 21 bits.  */
+#define R_PARISC_GPREL14R	30	/* GP-relative, right 14 bits.  */
+#define R_PARISC_LTOFF21L	34	/* LT-relative, left 21 bits.  */
+#define R_PARISC_LTOFF14R	38	/* LT-relative, right 14 bits.  */
+#define R_PARISC_SECREL32	41	/* 32 bits section rel. address.  */
+#define R_PARISC_SEGBASE	48	/* No relocation, set segment base.  */
+#define R_PARISC_SEGREL32	49	/* 32 bits segment rel. address.  */
+#define R_PARISC_PLTOFF21L	50	/* PLT rel. address, left 21 bits.  */
+#define R_PARISC_PLTOFF14R	54	/* PLT rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF_FPTR32	57	/* 32 bits LT-rel. function pointer. */
+#define R_PARISC_LTOFF_FPTR21L	58	/* LT-rel. fct ptr, left 21 bits. */
+#define R_PARISC_LTOFF_FPTR14R	62	/* LT-rel. fct ptr, right 14 bits. */
+#define R_PARISC_FPTR64		64	/* 64 bits function address.  */
+#define R_PARISC_PLABEL32	65	/* 32 bits function address.  */
+#define R_PARISC_PCREL64	72	/* 64 bits PC-rel. address.  */
+#define R_PARISC_PCREL22F	74	/* 22 bits PC-rel. address.  */
+#define R_PARISC_PCREL14WR	75	/* PC-rel. address, right 14 bits.  */
+#define R_PARISC_PCREL14DR	76	/* PC rel. address, right 14 bits.  */
+#define R_PARISC_PCREL16F	77	/* 16 bits PC-rel. address.  */
+#define R_PARISC_PCREL16WF	78	/* 16 bits PC-rel. address.  */
+#define R_PARISC_PCREL16DF	79	/* 16 bits PC-rel. address.  */
+#define R_PARISC_DIR64		80	/* 64 bits of eff. address.  */
+#define R_PARISC_DIR14WR	83	/* 14 bits of eff. address.  */
+#define R_PARISC_DIR14DR	84	/* 14 bits of eff. address.  */
+#define R_PARISC_DIR16F		85	/* 16 bits of eff. address.  */
+#define R_PARISC_DIR16WF	86	/* 16 bits of eff. address.  */
+#define R_PARISC_DIR16DF	87	/* 16 bits of eff. address.  */
+#define R_PARISC_GPREL64	88	/* 64 bits of GP-rel. address.  */
+#define R_PARISC_GPREL14WR	91	/* GP-rel. address, right 14 bits.  */
+#define R_PARISC_GPREL14DR	92	/* GP-rel. address, right 14 bits.  */
+#define R_PARISC_GPREL16F	93	/* 16 bits GP-rel. address.  */
+#define R_PARISC_GPREL16WF	94	/* 16 bits GP-rel. address.  */
+#define R_PARISC_GPREL16DF	95	/* 16 bits GP-rel. address.  */
+#define R_PARISC_LTOFF64	96	/* 64 bits LT-rel. address.  */
+#define R_PARISC_LTOFF14WR	99	/* LT-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF14DR	100	/* LT-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF16F	101	/* 16 bits LT-rel. address.  */
+#define R_PARISC_LTOFF16WF	102	/* 16 bits LT-rel. address.  */
+#define R_PARISC_LTOFF16DF	103	/* 16 bits LT-rel. address.  */
+#define R_PARISC_SECREL64	104	/* 64 bits section rel. address.  */
+#define R_PARISC_SEGREL64	112	/* 64 bits segment rel. address.  */
+#define R_PARISC_PLTOFF14WR	115	/* PLT-rel. address, right 14 bits.  */
+#define R_PARISC_PLTOFF14DR	116	/* PLT-rel. address, right 14 bits.  */
+#define R_PARISC_PLTOFF16F	117	/* 16 bits LT-rel. address.  */
+#define R_PARISC_PLTOFF16WF	118	/* 16 bits PLT-rel. address.  */
+#define R_PARISC_PLTOFF16DF	119	/* 16 bits PLT-rel. address.  */
+#define R_PARISC_LTOFF_FPTR64	120	/* 64 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR14WR	123	/* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR14DR	124	/* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR16F	125	/* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR16WF	126	/* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR16DF	127	/* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LORESERVE	128
+#define R_PARISC_COPY		128	/* Copy relocation.  */
+#define R_PARISC_IPLT		129	/* Dynamic reloc, imported PLT */
+#define R_PARISC_EPLT		130	/* Dynamic reloc, exported PLT */
+#define R_PARISC_TPREL32	153	/* 32 bits TP-rel. address.  */
+#define R_PARISC_TPREL21L	154	/* TP-rel. address, left 21 bits.  */
+#define R_PARISC_TPREL14R	158	/* TP-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF_TP21L	162	/* LT-TP-rel. address, left 21 bits. */
+#define R_PARISC_LTOFF_TP14R	166	/* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14F	167	/* 14 bits LT-TP-rel. address.  */
+#define R_PARISC_TPREL64	216	/* 64 bits TP-rel. address.  */
+#define R_PARISC_TPREL14WR	219	/* TP-rel. address, right 14 bits.  */
+#define R_PARISC_TPREL14DR	220	/* TP-rel. address, right 14 bits.  */
+#define R_PARISC_TPREL16F	221	/* 16 bits TP-rel. address.  */
+#define R_PARISC_TPREL16WF	222	/* 16 bits TP-rel. address.  */
+#define R_PARISC_TPREL16DF	223	/* 16 bits TP-rel. address.  */
+#define R_PARISC_LTOFF_TP64	224	/* 64 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP14WR	227	/* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14DR	228	/* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP16F	229	/* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP16WF	230	/* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP16DF	231	/* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_HIRESERVE	255
+
+/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr.  */
+
+#define PT_HP_TLS		(PT_LOOS + 0x0)
+#define PT_HP_CORE_NONE		(PT_LOOS + 0x1)
+#define PT_HP_CORE_VERSION	(PT_LOOS + 0x2)
+#define PT_HP_CORE_KERNEL	(PT_LOOS + 0x3)
+#define PT_HP_CORE_COMM		(PT_LOOS + 0x4)
+#define PT_HP_CORE_PROC		(PT_LOOS + 0x5)
+#define PT_HP_CORE_LOADABLE	(PT_LOOS + 0x6)
+#define PT_HP_CORE_STACK	(PT_LOOS + 0x7)
+#define PT_HP_CORE_SHM		(PT_LOOS + 0x8)
+#define PT_HP_CORE_MMF		(PT_LOOS + 0x9)
+#define PT_HP_PARALLEL		(PT_LOOS + 0x10)
+#define PT_HP_FASTBIND		(PT_LOOS + 0x11)
+#define PT_HP_OPT_ANNOT		(PT_LOOS + 0x12)
+#define PT_HP_HSL_ANNOT		(PT_LOOS + 0x13)
+#define PT_HP_STACK		(PT_LOOS + 0x14)
+
+#define PT_PARISC_ARCHEXT	0x70000000
+#define PT_PARISC_UNWIND	0x70000001
+
+/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr.  */
+
+#define PF_PARISC_SBP		0x08000000
+
+#define PF_HP_PAGE_SIZE		0x00100000
+#define PF_HP_FAR_SHARED	0x00200000
+#define PF_HP_NEAR_SHARED	0x00400000
+#define PF_HP_CODE		0x01000000
+#define PF_HP_MODIFY		0x02000000
+#define PF_HP_LAZYSWAP		0x04000000
+#define PF_HP_SBP		0x08000000
+
+/* IA-64 specific declarations.  */
+
+/* Processor specific flags for the Ehdr e_flags field.  */
+#define EF_IA_64_MASKOS		0x0000000f	/* os-specific flags */
+#define EF_IA_64_ABI64		0x00000010	/* 64-bit ABI */
+#define EF_IA_64_ARCH		0xff000000	/* arch. version mask */
+
+/* Processor specific values for the Phdr p_type field.  */
+#define PT_IA_64_ARCHEXT	(PT_LOPROC + 0)	/* arch extension bits */
+#define PT_IA_64_UNWIND		(PT_LOPROC + 1)	/* ia64 unwind bits */
+
+/* Processor specific flags for the Phdr p_flags field.  */
+#define PF_IA_64_NORECOV	0x80000000	/* spec insns w/o recovery */
+
+/* Processor specific values for the Shdr sh_type field.  */
+#define SHT_IA_64_EXT		(SHT_LOPROC + 0) /* extension bits */
+#define SHT_IA_64_UNWIND	(SHT_LOPROC + 1) /* unwind bits */
+
+/* Processor specific flags for the Shdr sh_flags field.  */
+#define SHF_IA_64_SHORT		0x10000000	/* section near gp */
+#define SHF_IA_64_NORECOV	0x20000000	/* spec insns w/o recovery */
+
+/* Processor specific values for the Dyn d_tag field.  */
+#define DT_IA_64_PLT_RESERVE	(DT_LOPROC + 0)
+#define DT_IA_64_NUM		1
+
+/* IA-64 relocations.  */
+#define R_IA64_NONE		0x00	/* none */
+#define R_IA64_IMM14		0x21	/* symbol + addend, add imm14 */
+#define R_IA64_IMM22		0x22	/* symbol + addend, add imm22 */
+#define R_IA64_IMM64		0x23	/* symbol + addend, mov imm64 */
+#define R_IA64_DIR32MSB		0x24	/* symbol + addend, data4 MSB */
+#define R_IA64_DIR32LSB		0x25	/* symbol + addend, data4 LSB */
+#define R_IA64_DIR64MSB		0x26	/* symbol + addend, data8 MSB */
+#define R_IA64_DIR64LSB		0x27	/* symbol + addend, data8 LSB */
+#define R_IA64_GPREL22		0x2a	/* @gprel(sym + add), add imm22 */
+#define R_IA64_GPREL64I		0x2b	/* @gprel(sym + add), mov imm64 */
+#define R_IA64_GPREL32MSB	0x2c	/* @gprel(sym + add), data4 MSB */
+#define R_IA64_GPREL32LSB	0x2d	/* @gprel(sym + add), data4 LSB */
+#define R_IA64_GPREL64MSB	0x2e	/* @gprel(sym + add), data8 MSB */
+#define R_IA64_GPREL64LSB	0x2f	/* @gprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF22		0x32	/* @ltoff(sym + add), add imm22 */
+#define R_IA64_LTOFF64I		0x33	/* @ltoff(sym + add), mov imm64 */
+#define R_IA64_PLTOFF22		0x3a	/* @pltoff(sym + add), add imm22 */
+#define R_IA64_PLTOFF64I	0x3b	/* @pltoff(sym + add), mov imm64 */
+#define R_IA64_PLTOFF64MSB	0x3e	/* @pltoff(sym + add), data8 MSB */
+#define R_IA64_PLTOFF64LSB	0x3f	/* @pltoff(sym + add), data8 LSB */
+#define R_IA64_FPTR64I		0x43	/* @fptr(sym + add), mov imm64 */
+#define R_IA64_FPTR32MSB	0x44	/* @fptr(sym + add), data4 MSB */
+#define R_IA64_FPTR32LSB	0x45	/* @fptr(sym + add), data4 LSB */
+#define R_IA64_FPTR64MSB	0x46	/* @fptr(sym + add), data8 MSB */
+#define R_IA64_FPTR64LSB	0x47	/* @fptr(sym + add), data8 LSB */
+#define R_IA64_PCREL60B		0x48	/* @pcrel(sym + add), brl */
+#define R_IA64_PCREL21B		0x49	/* @pcrel(sym + add), ptb, call */
+#define R_IA64_PCREL21M		0x4a	/* @pcrel(sym + add), chk.s */
+#define R_IA64_PCREL21F		0x4b	/* @pcrel(sym + add), fchkf */
+#define R_IA64_PCREL32MSB	0x4c	/* @pcrel(sym + add), data4 MSB */
+#define R_IA64_PCREL32LSB	0x4d	/* @pcrel(sym + add), data4 LSB */
+#define R_IA64_PCREL64MSB	0x4e	/* @pcrel(sym + add), data8 MSB */
+#define R_IA64_PCREL64LSB	0x4f	/* @pcrel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_FPTR22	0x52	/* @ltoff(@fptr(s+a)), imm22 */
+#define R_IA64_LTOFF_FPTR64I	0x53	/* @ltoff(@fptr(s+a)), imm64 */
+#define R_IA64_LTOFF_FPTR32MSB	0x54	/* @ltoff(@fptr(s+a)), data4 MSB */
+#define R_IA64_LTOFF_FPTR32LSB	0x55	/* @ltoff(@fptr(s+a)), data4 LSB */
+#define R_IA64_LTOFF_FPTR64MSB	0x56	/* @ltoff(@fptr(s+a)), data8 MSB */
+#define R_IA64_LTOFF_FPTR64LSB	0x57	/* @ltoff(@fptr(s+a)), data8 LSB */
+#define R_IA64_SEGREL32MSB	0x5c	/* @segrel(sym + add), data4 MSB */
+#define R_IA64_SEGREL32LSB	0x5d	/* @segrel(sym + add), data4 LSB */
+#define R_IA64_SEGREL64MSB	0x5e	/* @segrel(sym + add), data8 MSB */
+#define R_IA64_SEGREL64LSB	0x5f	/* @segrel(sym + add), data8 LSB */
+#define R_IA64_SECREL32MSB	0x64	/* @secrel(sym + add), data4 MSB */
+#define R_IA64_SECREL32LSB	0x65	/* @secrel(sym + add), data4 LSB */
+#define R_IA64_SECREL64MSB	0x66	/* @secrel(sym + add), data8 MSB */
+#define R_IA64_SECREL64LSB	0x67	/* @secrel(sym + add), data8 LSB */
+#define R_IA64_REL32MSB		0x6c	/* data 4 + REL */
+#define R_IA64_REL32LSB		0x6d	/* data 4 + REL */
+#define R_IA64_REL64MSB		0x6e	/* data 8 + REL */
+#define R_IA64_REL64LSB		0x6f	/* data 8 + REL */
+#define R_IA64_LTV32MSB		0x74	/* symbol + addend, data4 MSB */
+#define R_IA64_LTV32LSB		0x75	/* symbol + addend, data4 LSB */
+#define R_IA64_LTV64MSB		0x76	/* symbol + addend, data8 MSB */
+#define R_IA64_LTV64LSB		0x77	/* symbol + addend, data8 LSB */
+#define R_IA64_PCREL21BI	0x79	/* @pcrel(sym + add), 21bit inst */
+#define R_IA64_PCREL22		0x7a	/* @pcrel(sym + add), 22bit inst */
+#define R_IA64_PCREL64I		0x7b	/* @pcrel(sym + add), 64bit inst */
+#define R_IA64_IPLTMSB		0x80	/* dynamic reloc, imported PLT, MSB */
+#define R_IA64_IPLTLSB		0x81	/* dynamic reloc, imported PLT, LSB */
+#define R_IA64_COPY		0x84	/* copy relocation */
+#define R_IA64_SUB		0x85	/* Addend and symbol difference */
+#define R_IA64_LTOFF22X		0x86	/* LTOFF22, relaxable.  */
+#define R_IA64_LDXMOV		0x87	/* Use of LTOFF22X.  */
+#define R_IA64_TPREL14		0x91	/* @tprel(sym + add), imm14 */
+#define R_IA64_TPREL22		0x92	/* @tprel(sym + add), imm22 */
+#define R_IA64_TPREL64I		0x93	/* @tprel(sym + add), imm64 */
+#define R_IA64_TPREL64MSB	0x96	/* @tprel(sym + add), data8 MSB */
+#define R_IA64_TPREL64LSB	0x97	/* @tprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_TPREL22	0x9a	/* @ltoff(@tprel(s+a)), imm2 */
+#define R_IA64_DTPMOD64MSB	0xa6	/* @dtpmod(sym + add), data8 MSB */
+#define R_IA64_DTPMOD64LSB	0xa7	/* @dtpmod(sym + add), data8 LSB */
+#define R_IA64_LTOFF_DTPMOD22	0xaa	/* @ltoff(@dtpmod(sym + add)), imm22 */
+#define R_IA64_DTPREL14		0xb1	/* @dtprel(sym + add), imm14 */
+#define R_IA64_DTPREL22		0xb2	/* @dtprel(sym + add), imm22 */
+#define R_IA64_DTPREL64I	0xb3	/* @dtprel(sym + add), imm64 */
+#define R_IA64_DTPREL32MSB	0xb4	/* @dtprel(sym + add), data4 MSB */
+#define R_IA64_DTPREL32LSB	0xb5	/* @dtprel(sym + add), data4 LSB */
+#define R_IA64_DTPREL64MSB	0xb6	/* @dtprel(sym + add), data8 MSB */
+#define R_IA64_DTPREL64LSB	0xb7	/* @dtprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_DTPREL22	0xba	/* @ltoff(@dtprel(s+a)), imm22 */
+
+typedef struct elf32_rel {
+  Elf32_Addr	r_offset;
+  Elf32_Word	r_info;
+} Elf32_Rel;
+
+typedef struct elf64_rel {
+  Elf64_Addr r_offset;	/* Location at which to apply the action */
+  Elf64_Xword r_info;	/* index and type of relocation */
+} Elf64_Rel;
+
+typedef struct elf32_rela{
+  Elf32_Addr	r_offset;
+  Elf32_Word	r_info;
+  Elf32_Sword	r_addend;
+} Elf32_Rela;
+
+typedef struct elf64_rela {
+  Elf64_Addr r_offset;	/* Location at which to apply the action */
+  Elf64_Xword r_info;	/* index and type of relocation */
+  Elf64_Sxword r_addend;	/* Constant addend used to compute value */
+} Elf64_Rela;
+
+typedef struct elf32_sym{
+  Elf32_Word	st_name;
+  Elf32_Addr	st_value;
+  Elf32_Word	st_size;
+  unsigned char	st_info;
+  unsigned char	st_other;
+  Elf32_Half	st_shndx;
+} Elf32_Sym;
+
+typedef struct elf64_sym {
+  Elf64_Word st_name;		/* Symbol name, index in string tbl */
+  unsigned char	st_info;	/* Type and binding attributes */
+  unsigned char	st_other;	/* No defined meaning, 0 */
+  Elf64_Half st_shndx;		/* Associated section index */
+  Elf64_Addr st_value;		/* Value of the symbol */
+  Elf64_Xword st_size;		/* Associated symbol size */
+} Elf64_Sym;
+
+
+#define EI_NIDENT	16
+
+typedef struct elf32_hdr{
+  unsigned char	e_ident[EI_NIDENT];
+  Elf32_Half	e_type;
+  Elf32_Half	e_machine;
+  Elf32_Word	e_version;
+  Elf32_Addr	e_entry;  /* Entry point */
+  Elf32_Off	e_phoff;
+  Elf32_Off	e_shoff;
+  Elf32_Word	e_flags;
+  Elf32_Half	e_ehsize;
+  Elf32_Half	e_phentsize;
+  Elf32_Half	e_phnum;
+  Elf32_Half	e_shentsize;
+  Elf32_Half	e_shnum;
+  Elf32_Half	e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+  unsigned char	e_ident[16];		/* ELF "magic number" */
+  Elf64_Half e_type;
+  Elf64_Half e_machine;
+  Elf64_Word e_version;
+  Elf64_Addr e_entry;		/* Entry point virtual address */
+  Elf64_Off e_phoff;		/* Program header table file offset */
+  Elf64_Off e_shoff;		/* Section header table file offset */
+  Elf64_Word e_flags;
+  Elf64_Half e_ehsize;
+  Elf64_Half e_phentsize;
+  Elf64_Half e_phnum;
+  Elf64_Half e_shentsize;
+  Elf64_Half e_shnum;
+  Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+   header, p_flags. */
+#define PF_R		0x4
+#define PF_W		0x2
+#define PF_X		0x1
+
+typedef struct elf32_phdr{
+  Elf32_Word	p_type;
+  Elf32_Off	p_offset;
+  Elf32_Addr	p_vaddr;
+  Elf32_Addr	p_paddr;
+  Elf32_Word	p_filesz;
+  Elf32_Word	p_memsz;
+  Elf32_Word	p_flags;
+  Elf32_Word	p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+  Elf64_Word p_type;
+  Elf64_Word p_flags;
+  Elf64_Off p_offset;		/* Segment file offset */
+  Elf64_Addr p_vaddr;		/* Segment virtual address */
+  Elf64_Addr p_paddr;		/* Segment physical address */
+  Elf64_Xword p_filesz;		/* Segment size in file */
+  Elf64_Xword p_memsz;		/* Segment size in memory */
+  Elf64_Xword p_align;		/* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+/* sh_type */
+#define SHT_NULL	0
+#define SHT_PROGBITS	1
+#define SHT_SYMTAB	2
+#define SHT_STRTAB	3
+#define SHT_RELA	4
+#define SHT_HASH	5
+#define SHT_DYNAMIC	6
+#define SHT_NOTE	7
+#define SHT_NOBITS	8
+#define SHT_REL		9
+#define SHT_SHLIB	10
+#define SHT_DYNSYM	11
+#define SHT_NUM		12
+#define SHT_LOPROC	0x70000000
+#define SHT_HIPROC	0x7fffffff
+#define SHT_LOUSER	0x80000000
+#define SHT_HIUSER	0xffffffff
+#define SHT_MIPS_LIST		0x70000000
+#define SHT_MIPS_CONFLICT	0x70000002
+#define SHT_MIPS_GPTAB		0x70000003
+#define SHT_MIPS_UCODE		0x70000004
+
+/* sh_flags */
+#define SHF_WRITE	0x1
+#define SHF_ALLOC	0x2
+#define SHF_EXECINSTR	0x4
+#define SHF_MASKPROC	0xf0000000
+#define SHF_MIPS_GPREL	0x10000000
+
+/* special section indexes */
+#define SHN_UNDEF	0
+#define SHN_LORESERVE	0xff00
+#define SHN_LOPROC	0xff00
+#define SHN_HIPROC	0xff1f
+#define SHN_ABS		0xfff1
+#define SHN_COMMON	0xfff2
+#define SHN_HIRESERVE	0xffff
+#define SHN_MIPS_ACCOMON	0xff00
+ 
+typedef struct elf32_shdr {
+  Elf32_Word	sh_name;
+  Elf32_Word	sh_type;
+  Elf32_Word	sh_flags;
+  Elf32_Addr	sh_addr;
+  Elf32_Off	sh_offset;
+  Elf32_Word	sh_size;
+  Elf32_Word	sh_link;
+  Elf32_Word	sh_info;
+  Elf32_Word	sh_addralign;
+  Elf32_Word	sh_entsize;
+} Elf32_Shdr;
+
+typedef struct elf64_shdr {
+  Elf64_Word sh_name;		/* Section name, index in string tbl */
+  Elf64_Word sh_type;		/* Type of section */
+  Elf64_Xword sh_flags;		/* Miscellaneous section attributes */
+  Elf64_Addr sh_addr;		/* Section virtual addr at execution */
+  Elf64_Off sh_offset;		/* Section file offset */
+  Elf64_Xword sh_size;		/* Size of section in bytes */
+  Elf64_Word sh_link;		/* Index of another section */
+  Elf64_Word sh_info;		/* Additional section information */
+  Elf64_Xword sh_addralign;	/* Section alignment */
+  Elf64_Xword sh_entsize;	/* Entry size if section holds table */
+} Elf64_Shdr;
+
+#define	EI_MAG0		0		/* e_ident[] indexes */
+#define	EI_MAG1		1
+#define	EI_MAG2		2
+#define	EI_MAG3		3
+#define	EI_CLASS	4
+#define	EI_DATA		5
+#define	EI_VERSION	6
+#define	EI_PAD		7
+
+#define	ELFMAG0		0x7f		/* EI_MAG */
+#define	ELFMAG1		'E'
+#define	ELFMAG2		'L'
+#define	ELFMAG3		'F'
+#define	ELFMAG		"\177ELF"
+#define	SELFMAG		4
+
+#define	ELFCLASSNONE	0		/* EI_CLASS */
+#define	ELFCLASS32	1
+#define	ELFCLASS64	2
+#define	ELFCLASSNUM	3
+
+#define ELFDATANONE	0		/* e_ident[EI_DATA] */
+#define ELFDATA2LSB	1
+#define ELFDATA2MSB	2
+
+#define EV_NONE		0		/* e_version, EI_VERSION */
+#define EV_CURRENT	1
+#define EV_NUM		2
+
+/* Notes used in ET_CORE */
+#define NT_PRSTATUS	1
+#define NT_PRFPREG	2
+#define NT_PRPSINFO	3
+#define NT_TASKSTRUCT	4
+#define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
+
+
+/* Note header in a PT_NOTE section */
+typedef struct elf32_note {
+  Elf32_Word	n_namesz;	/* Name size */
+  Elf32_Word	n_descsz;	/* Content size */
+  Elf32_Word	n_type;		/* Content type */
+} Elf32_Nhdr;
+
+/* Note header in a PT_NOTE section */
+typedef struct elf64_note {
+  Elf64_Word n_namesz;	/* Name size */
+  Elf64_Word n_descsz;	/* Content size */
+  Elf64_Word n_type;	/* Content type */
+} Elf64_Nhdr;
+
+#if ELF_CLASS == ELFCLASS32
+
+#define elfhdr		elf32_hdr
+#define elf_phdr	elf32_phdr
+#define elf_note	elf32_note
+#define elf_shdr	elf32_shdr
+#define elf_sym		elf32_sym
+
+#ifdef ELF_USES_RELOCA
+# define ELF_RELOC      Elf32_Rela
+#else
+# define ELF_RELOC      Elf32_Rel
+#endif
+
+#else
+
+#define elfhdr		elf64_hdr
+#define elf_phdr	elf64_phdr
+#define elf_note	elf64_note
+#define elf_shdr	elf64_shdr
+#define elf_sym		elf64_sym
+
+#ifdef ELF_USES_RELOCA
+# define ELF_RELOC      Elf64_Rela
+#else
+# define ELF_RELOC      Elf64_Rel
+#endif
+
+#endif /* ELF_CLASS */
+
+#ifndef ElfW
+# if ELF_CLASS == ELFCLASS32
+#  define ElfW(x)  Elf32_ ## x
+#  define ELFW(x)  ELF32_ ## x
+# else
+#  define ElfW(x)  Elf64_ ## x
+#  define ELFW(x)  ELF64_ ## x
+# endif
+#endif
+
+
+#endif /* _QEMU_ELF_H */
Index: /trunk/src/recompiler_new/exec-all.h
===================================================================
--- /trunk/src/recompiler_new/exec-all.h	(revision 13168)
+++ /trunk/src/recompiler_new/exec-all.h	(revision 13168)
@@ -0,0 +1,690 @@
+/*
+ * internal execution defines for qemu
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+/* allow to see translation results - the slowdown should be negligible, so we leave it */
+#ifndef VBOX
+#define DEBUG_DISAS
+#endif
+
+#ifdef VBOX
+# include <VBox/tm.h>
+# include <VBox/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */
+# ifndef LOG_GROUP
+#  define LOG_GROUP LOG_GROUP_REM
+# endif
+# include <VBox/log.h>
+# include "REMInternal.h"
+# include <VBox/vm.h>
+#endif /* VBOX */
+
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s)	tostring(s)
+#define tostring(s)	#s
+#endif
+
+#if __GNUC__ < 3
+#define __builtin_expect(x, n) (x)
+#endif
+
+#ifdef __i386__
+#define REGPARM(n) __attribute((regparm(n)))
+#else
+#define REGPARM(n)
+#endif
+
+/* is_jmp field values */
+#define DISAS_NEXT    0 /* next instruction can be analyzed */
+#define DISAS_JUMP    1 /* only pc was modified dynamically */
+#define DISAS_UPDATE  2 /* cpu state was modified dynamically */
+#define DISAS_TB_JUMP 3 /* only pc was modified statically */
+
+struct TranslationBlock;
+
+/* XXX: make safe guess about sizes */
+#define MAX_OP_PER_INSTR 32
+#define OPC_BUF_SIZE 512
+#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
+
+#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * 3)
+
+extern uint16_t gen_opc_buf[OPC_BUF_SIZE];
+extern uint32_t gen_opparam_buf[OPPARAM_BUF_SIZE];
+extern long gen_labels[OPC_BUF_SIZE];
+extern int nb_gen_labels;
+extern target_ulong gen_opc_pc[OPC_BUF_SIZE];
+extern target_ulong gen_opc_npc[OPC_BUF_SIZE];
+extern uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+extern uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+extern target_ulong gen_opc_jump_pc[2];
+extern uint32_t gen_opc_hflags[OPC_BUF_SIZE];
+
+typedef void (GenOpFunc)(void);
+typedef void (GenOpFunc1)(long);
+typedef void (GenOpFunc2)(long, long);
+typedef void (GenOpFunc3)(long, long, long);
+
+#if defined(TARGET_I386)
+
+void optimize_flags_init(void);
+
+#endif
+
+extern FILE *logfile;
+extern int loglevel;
+
+int gen_intermediate_code(CPUState *env, struct TranslationBlock *tb);
+int gen_intermediate_code_pc(CPUState *env, struct TranslationBlock *tb);
+void dump_ops(const uint16_t *opc_buf, const uint32_t *opparam_buf);
+int cpu_gen_code(CPUState *env, struct TranslationBlock *tb,
+                 int max_code_size, int *gen_code_size_ptr);
+int cpu_restore_state(struct TranslationBlock *tb,
+                      CPUState *env, unsigned long searched_pc,
+                      void *puc);
+int cpu_gen_code_copy(CPUState *env, struct TranslationBlock *tb,
+                      int max_code_size, int *gen_code_size_ptr);
+int cpu_restore_state_copy(struct TranslationBlock *tb,
+                           CPUState *env, unsigned long searched_pc,
+                           void *puc);
+void cpu_resume_from_signal(CPUState *env1, void *puc);
+void cpu_exec_init(CPUState *env);
+int page_unprotect(target_ulong address, unsigned long pc, void *puc);
+void tb_invalidate_phys_page_range(target_ulong start, target_ulong end,
+                                   int is_cpu_write_access);
+void tb_invalidate_page_range(target_ulong start, target_ulong end);
+void tlb_flush_page(CPUState *env, target_ulong addr);
+void tlb_flush(CPUState *env, int flush_global);
+int tlb_set_page_exec(CPUState *env, target_ulong vaddr,
+                      target_phys_addr_t paddr, int prot,
+                      int is_user, int is_softmmu);
+static inline int tlb_set_page(CPUState *env, target_ulong vaddr,
+                               target_phys_addr_t paddr, int prot,
+                               int is_user, int is_softmmu)
+{
+    if (prot & PAGE_READ)
+        prot |= PAGE_EXEC;
+    return tlb_set_page_exec(env, vaddr, paddr, prot, is_user, is_softmmu);
+}
+
+#define CODE_GEN_MAX_SIZE        65536
+#define CODE_GEN_ALIGN           16 /* must be >= of the size of a icache line */
+
+#define CODE_GEN_PHYS_HASH_BITS     15
+#define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
+
+/* maximum total translate dcode allocated */
+
+/* NOTE: the translated code area cannot be too big because on some
+   archs the range of "fast" function calls is limited. Here is a
+   summary of the ranges:
+
+   i386  : signed 32 bits
+   arm   : signed 26 bits
+   ppc   : signed 24 bits
+   sparc : signed 32 bits
+   alpha : signed 23 bits
+*/
+
+#if defined(__alpha__)
+#define CODE_GEN_BUFFER_SIZE     (2 * 1024 * 1024)
+#elif defined(__ia64)
+#define CODE_GEN_BUFFER_SIZE     (4 * 1024 * 1024)	/* range of addl */
+#elif defined(__powerpc__)
+#define CODE_GEN_BUFFER_SIZE     (6 * 1024 * 1024)
+#else
+#define CODE_GEN_BUFFER_SIZE     (16 * 1024 * 1024)
+#endif
+
+//#define CODE_GEN_BUFFER_SIZE     (128 * 1024)
+
+/* estimated block size for TB allocation */
+/* XXX: use a per code average code fragment size and modulate it
+   according to the host CPU */
+#if defined(CONFIG_SOFTMMU)
+#define CODE_GEN_AVG_BLOCK_SIZE 128
+#else
+#define CODE_GEN_AVG_BLOCK_SIZE 64
+#endif
+
+#define CODE_GEN_MAX_BLOCKS    (CODE_GEN_BUFFER_SIZE / CODE_GEN_AVG_BLOCK_SIZE)
+
+#if defined(__powerpc__)
+#define USE_DIRECT_JUMP
+#endif
+#if defined(__i386__) && !defined(_WIN32)
+#define USE_DIRECT_JUMP
+#endif
+#ifdef VBOX /* bird: not safe in next step because of threading & cpu_interrupt. */
+#undef USE_DIRECT_JUMP
+#endif /* VBOX */
+
+typedef struct TranslationBlock {
+    target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
+    target_ulong cs_base; /* CS base for this block */
+    unsigned int flags; /* flags defining in which context the code was generated */
+    uint16_t size;      /* size of target code for this block (1 <=
+                           size <= TARGET_PAGE_SIZE) */
+    uint16_t cflags;    /* compile flags */
+#define CF_CODE_COPY   0x0001 /* block was generated in code copy mode */
+#define CF_TB_FP_USED  0x0002 /* fp ops are used in the TB */
+#define CF_FP_USED     0x0004 /* fp ops are used in the TB or in a chained TB */
+#define CF_SINGLE_INSN 0x0008 /* compile only a single instruction */
+#ifdef VBOX
+#define CF_RAW_MODE    0x0010 /* block was generated in raw mode */
+#endif
+
+    uint8_t *tc_ptr;    /* pointer to the translated code */
+    /* next matching tb for physical address. */
+    struct TranslationBlock *phys_hash_next;
+    /* first and second physical page containing code. The lower bit
+       of the pointer tells the index in page_next[] */
+    struct TranslationBlock *page_next[2];
+    target_ulong page_addr[2];
+
+    /* the following data are used to directly call another TB from
+       the code of this one. */
+    uint16_t tb_next_offset[2]; /* offset of original jump target */
+#ifdef USE_DIRECT_JUMP
+    uint16_t tb_jmp_offset[4]; /* offset of jump instruction */
+#else
+# if defined(VBOX) && defined(RT_OS_DARWIN) && defined(RT_ARCH_AMD64)
+#  error "First 4GB aren't reachable. jmp dword [tb_next] wont work."
+# endif
+    uint32_t tb_next[2]; /* address of jump generated code */
+#endif
+    /* list of TBs jumping to this one. This is a circular list using
+       the two least significant bits of the pointers to tell what is
+       the next pointer: 0 = jmp_next[0], 1 = jmp_next[1], 2 =
+       jmp_first */
+    struct TranslationBlock *jmp_next[2];
+    struct TranslationBlock *jmp_first;
+} TranslationBlock;
+
+static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
+{
+    target_ulong tmp;
+    tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
+    return (tmp >> TB_JMP_PAGE_BITS) & TB_JMP_PAGE_MASK;
+}
+
+static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
+{
+    target_ulong tmp;
+    tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
+    return (((tmp >> TB_JMP_PAGE_BITS) & TB_JMP_PAGE_MASK) |
+	    (tmp & TB_JMP_ADDR_MASK));
+}
+
+static inline unsigned int tb_phys_hash_func(unsigned long pc)
+{
+    return pc & (CODE_GEN_PHYS_HASH_SIZE - 1);
+}
+
+TranslationBlock *tb_alloc(target_ulong pc);
+void tb_flush(CPUState *env);
+void tb_link_phys(TranslationBlock *tb,
+                  target_ulong phys_pc, target_ulong phys_page2);
+
+extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+
+extern uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE];
+extern uint8_t *code_gen_ptr;
+
+#if defined(USE_DIRECT_JUMP)
+
+#if defined(__powerpc__)
+static inline void tb_set_jmp_target1(unsigned long jmp_addr, unsigned long addr)
+{
+    uint32_t val, *ptr;
+
+    /* patch the branch destination */
+    ptr = (uint32_t *)jmp_addr;
+    val = *ptr;
+    val = (val & ~0x03fffffc) | ((addr - jmp_addr) & 0x03fffffc);
+    *ptr = val;
+    /* flush icache */
+    asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory");
+    asm volatile ("sync" : : : "memory");
+    asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory");
+    asm volatile ("sync" : : : "memory");
+    asm volatile ("isync" : : : "memory");
+}
+#elif defined(__i386__)
+static inline void tb_set_jmp_target1(unsigned long jmp_addr, unsigned long addr)
+{
+    /* patch the branch destination */
+    *(uint32_t *)jmp_addr = addr - (jmp_addr + 4);
+    /* no need to flush icache explicitely */
+}
+#endif
+
+static inline void tb_set_jmp_target(TranslationBlock *tb,
+                                     int n, unsigned long addr)
+{
+    unsigned long offset;
+
+    offset = tb->tb_jmp_offset[n];
+    tb_set_jmp_target1((unsigned long)(tb->tc_ptr + offset), addr);
+    offset = tb->tb_jmp_offset[n + 2];
+    if (offset != 0xffff)
+        tb_set_jmp_target1((unsigned long)(tb->tc_ptr + offset), addr);
+}
+
+#else
+
+/* set the jump target */
+static inline void tb_set_jmp_target(TranslationBlock *tb,
+                                     int n, unsigned long addr)
+{
+    tb->tb_next[n] = addr;
+}
+
+#endif
+
+static inline void tb_add_jump(TranslationBlock *tb, int n,
+                               TranslationBlock *tb_next)
+{
+    /* NOTE: this test is only needed for thread safety */
+    if (!tb->jmp_next[n]) {
+        /* patch the native jump address */
+        tb_set_jmp_target(tb, n, (unsigned long)tb_next->tc_ptr);
+
+        /* add in TB jmp circular list */
+        tb->jmp_next[n] = tb_next->jmp_first;
+        tb_next->jmp_first = (TranslationBlock *)((long)(tb) | (n));
+    }
+}
+
+TranslationBlock *tb_find_pc(unsigned long pc_ptr);
+
+#ifndef offsetof
+#define offsetof(type, field) ((size_t) &((type *)0)->field)
+#endif
+
+#if defined(_WIN32)
+#define ASM_DATA_SECTION ".section \".data\"\n"
+#define ASM_PREVIOUS_SECTION ".section .text\n"
+#elif defined(__APPLE__)
+#define ASM_DATA_SECTION ".data\n"
+#define ASM_PREVIOUS_SECTION ".text\n"
+#else
+#define ASM_DATA_SECTION ".section \".data\"\n"
+#define ASM_PREVIOUS_SECTION ".previous\n"
+#endif
+
+#define ASM_OP_LABEL_NAME(n, opname) \
+    ASM_NAME(__op_label) #n "." ASM_NAME(opname)
+
+#if defined(__powerpc__)
+
+/* we patch the jump instruction directly */
+#define GOTO_TB(opname, tbparam, n)\
+do {\
+    asm volatile (ASM_DATA_SECTION\
+		  ASM_OP_LABEL_NAME(n, opname) ":\n"\
+		  ".long 1f\n"\
+		  ASM_PREVIOUS_SECTION \
+                  "b " ASM_NAME(__op_jmp) #n "\n"\
+		  "1:\n");\
+} while (0)
+
+#elif defined(__i386__) && defined(USE_DIRECT_JUMP)
+
+/* we patch the jump instruction directly */
+#define GOTO_TB(opname, tbparam, n)\
+do {\
+    asm volatile (".section .data\n"\
+		  ASM_OP_LABEL_NAME(n, opname) ":\n"\
+		  ".long 1f\n"\
+		  ASM_PREVIOUS_SECTION \
+                  "jmp " ASM_NAME(__op_jmp) #n "\n"\
+		  "1:\n");\
+} while (0)
+
+#else
+
+/* jump to next block operations (more portable code, does not need
+   cache flushing, but slower because of indirect jump) */
+# ifdef VBOX /* bird: GCC4 (and Ming 3.4.x?) will remove the two unused static
+                variables. I've added a dummy __asm__ statement which reference
+                the two variables to prevent this. */
+#  if __GNUC__ >= 4
+#   define GOTO_TB(opname, tbparam, n)\
+    do {\
+        static void __attribute__((unused)) *dummy ## n = &&dummy_label ## n;\
+        static void __attribute__((unused)) *__op_label ## n \
+            __asm__(ASM_OP_LABEL_NAME(n, opname)) = &&label ## n;\
+        __asm__ ("" : : "m" (__op_label ## n), "m" (dummy ## n));\
+        goto *(void *)(uintptr_t)(((TranslationBlock *)tbparam)->tb_next[n]);\
+    label ## n: ;\
+    dummy_label ## n: ;\
+    } while (0)
+#  else
+#   define GOTO_TB(opname, tbparam, n)\
+    do {\
+        static void __attribute__((unused)) *dummy ## n = &&dummy_label ## n;\
+        static void __attribute__((unused)) *__op_label ## n \
+            __asm__(ASM_OP_LABEL_NAME(n, opname)) = &&label ## n;\
+        goto *(void *)(uintptr_t)(((TranslationBlock *)tbparam)->tb_next[n]);\
+    label ## n: ;\
+    dummy_label ## n: ;\
+    } while (0)
+#  endif
+# else /* !VBOX */
+#define GOTO_TB(opname, tbparam, n)\
+do {\
+    static void __attribute__((unused)) *dummy ## n = &&dummy_label ## n;\
+    static void __attribute__((unused)) *__op_label ## n \
+        __asm__(ASM_OP_LABEL_NAME(n, opname)) = &&label ## n;\
+    goto *(void *)(((TranslationBlock *)tbparam)->tb_next[n]);\
+label ## n: ;\
+dummy_label ## n: ;\
+} while (0)
+# endif /* !VBOX */
+
+#endif
+
+extern CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
+extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
+extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
+
+#ifdef __powerpc__
+static inline int testandset (int *p)
+{
+    int ret;
+    __asm__ __volatile__ (
+                          "0:    lwarx %0,0,%1\n"
+                          "      xor. %0,%3,%0\n"
+                          "      bne 1f\n"
+                          "      stwcx. %2,0,%1\n"
+                          "      bne- 0b\n"
+                          "1:    "
+                          : "=&r" (ret)
+                          : "r" (p), "r" (1), "r" (0)
+                          : "cr0", "memory");
+    return ret;
+}
+#endif
+
+#ifdef __i386__
+static inline int testandset (int *p)
+{
+    long int readval = 0;
+
+    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+                          : "+m" (*p), "+a" (readval)
+                          : "r" (1)
+                          : "cc");
+    return readval;
+}
+#endif
+
+#ifdef __x86_64__
+static inline int testandset (int *p)
+{
+    long int readval = 0;
+
+    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+                          : "+m" (*p), "+a" (readval)
+                          : "r" (1)
+                          : "cc");
+    return readval;
+}
+#endif
+
+#ifdef __s390__
+static inline int testandset (int *p)
+{
+    int ret;
+
+    __asm__ __volatile__ ("0: cs    %0,%1,0(%2)\n"
+			  "   jl    0b"
+			  : "=&d" (ret)
+			  : "r" (1), "a" (p), "0" (*p)
+			  : "cc", "memory" );
+    return ret;
+}
+#endif
+
+#ifdef __alpha__
+static inline int testandset (int *p)
+{
+    int ret;
+    unsigned long one;
+
+    __asm__ __volatile__ ("0:	mov 1,%2\n"
+			  "	ldl_l %0,%1\n"
+			  "	stl_c %2,%1\n"
+			  "	beq %2,1f\n"
+			  ".subsection 2\n"
+			  "1:	br 0b\n"
+			  ".previous"
+			  : "=r" (ret), "=m" (*p), "=r" (one)
+			  : "m" (*p));
+    return ret;
+}
+#endif
+
+#ifdef __sparc__
+static inline int testandset (int *p)
+{
+	int ret;
+
+	__asm__ __volatile__("ldstub	[%1], %0"
+			     : "=r" (ret)
+			     : "r" (p)
+			     : "memory");
+
+	return (ret ? 1 : 0);
+}
+#endif
+
+#ifdef __arm__
+static inline int testandset (int *spinlock)
+{
+    register unsigned int ret;
+    __asm__ __volatile__("swp %0, %1, [%2]"
+                         : "=r"(ret)
+                         : "0"(1), "r"(spinlock));
+
+    return ret;
+}
+#endif
+
+#ifdef __mc68000
+static inline int testandset (int *p)
+{
+    char ret;
+    __asm__ __volatile__("tas %1; sne %0"
+                         : "=r" (ret)
+                         : "m" (p)
+                         : "cc","memory");
+    return ret;
+}
+#endif
+
+#ifdef __ia64
+#include <ia64intrin.h>
+
+static inline int testandset (int *p)
+{
+    return __sync_lock_test_and_set (p, 1);
+}
+#endif
+
+typedef int spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED 0
+
+#if defined(CONFIG_USER_ONLY)
+static inline void spin_lock(spinlock_t *lock)
+{
+    while (testandset(lock));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+    *lock = 0;
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+    return !testandset(lock);
+}
+#else
+static inline void spin_lock(spinlock_t *lock)
+{
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+    return 1;
+}
+#endif
+
+extern spinlock_t tb_lock;
+
+extern int tb_invalidated_flag;
+
+#if !defined(CONFIG_USER_ONLY)
+
+void tlb_fill(target_ulong addr, int is_write, int is_user,
+              void *retaddr);
+
+#define ACCESS_TYPE 3
+#define MEMSUFFIX _code
+#define env cpu_single_env
+
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+#undef env
+
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+static inline target_ulong get_phys_addr_code(CPUState *env, target_ulong addr)
+{
+    return addr;
+}
+#else
+# ifdef VBOX
+target_ulong remR3PhysGetPhysicalAddressCode(CPUState *env, target_ulong addr, CPUTLBEntry *pTLBEntry);
+#  if !defined(REM_PHYS_ADDR_IN_TLB)
+target_ulong remR3HCVirt2GCPhys(void *env, void *addr);
+#  endif
+# endif
+/* NOTE: this function can trigger an exception */
+/* NOTE2: the returned address is not exactly the physical address: it
+   is the offset relative to phys_ram_base */
+static inline target_ulong get_phys_addr_code(CPUState *env, target_ulong addr)
+{
+    int is_user, index, pd;
+
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+#if defined(TARGET_I386)
+    is_user = ((env->hflags & HF_CPL_MASK) == 3);
+#elif defined (TARGET_PPC)
+    is_user = msr_pr;
+#elif defined (TARGET_MIPS)
+    is_user = ((env->hflags & MIPS_HFLAG_MODE) == MIPS_HFLAG_UM);
+#elif defined (TARGET_SPARC)
+    is_user = (env->psrs == 0);
+#elif defined (TARGET_ARM)
+    is_user = ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR);
+#elif defined (TARGET_SH4)
+    is_user = ((env->sr & SR_MD) == 0);
+#else
+#error unimplemented CPU
+#endif
+    if (__builtin_expect(env->tlb_table[is_user][index].addr_code !=
+                         (addr & TARGET_PAGE_MASK), 0)) {
+        ldub_code(addr);
+    }
+    pd = env->tlb_table[is_user][index].addr_code & ~TARGET_PAGE_MASK;
+    if (pd > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
+# ifdef VBOX
+        /* deal with non-MMIO access handlers. */
+        return remR3PhysGetPhysicalAddressCode(env, addr, &env->tlb_table[is_user][index]);
+# else
+        cpu_abort(env, "Trying to execute code outside RAM or ROM at 0x%08lx\n", addr);
+# endif
+    }
+# if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+    return addr + env->tlb_table[is_user][index].addend;
+# elif defined(VBOX)
+    return remR3HCVirt2GCPhys(env, (void *)(addr + env->tlb_table[is_user][index].addend));
+# else
+    return addr + env->tlb_table[is_user][index].addend - (unsigned long)phys_ram_base;
+# endif
+}
+#endif
+
+
+#ifdef USE_KQEMU
+#define KQEMU_MODIFY_PAGE_MASK (0xff & ~(VGA_DIRTY_FLAG | CODE_DIRTY_FLAG))
+
+int kqemu_init(CPUState *env);
+int kqemu_cpu_exec(CPUState *env);
+void kqemu_flush_page(CPUState *env, target_ulong addr);
+void kqemu_flush(CPUState *env, int global);
+void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr);
+void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr);
+void kqemu_cpu_interrupt(CPUState *env);
+void kqemu_record_dump(void);
+
+static inline int kqemu_is_ok(CPUState *env)
+{
+    return(env->kqemu_enabled &&
+           (env->cr[0] & CR0_PE_MASK) &&
+           !(env->hflags & HF_INHIBIT_IRQ_MASK) &&
+           (env->eflags & IF_MASK) &&
+           !(env->eflags & VM_MASK) &&
+           (env->kqemu_enabled == 2 ||
+            ((env->hflags & HF_CPL_MASK) == 3 &&
+             (env->eflags & IOPL_MASK) != IOPL_MASK)));
+}
+
+#endif
Index: /trunk/src/recompiler_new/exec.c
===================================================================
--- /trunk/src/recompiler_new/exec.c	(revision 13168)
+++ /trunk/src/recompiler_new/exec.c	(revision 13168)
@@ -0,0 +1,2713 @@
+/*
+ *  virtual page mapping and translated block handling
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#include "config.h"
+#ifndef VBOX
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <inttypes.h>
+#else /* VBOX */
+# include <stdlib.h>
+# include <stdio.h>
+# include <inttypes.h>
+# include <iprt/alloc.h>
+# include <iprt/string.h>
+# include <iprt/param.h>
+# include <VBox/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */
+#endif /* VBOX */
+
+#include "cpu.h"
+#include "exec-all.h"
+#if defined(CONFIG_USER_ONLY)
+#include <qemu.h>
+#endif
+
+//#define DEBUG_TB_INVALIDATE
+//#define DEBUG_FLUSH
+//#define DEBUG_TLB
+//#define DEBUG_UNASSIGNED
+
+/* make various TB consistency checks */
+//#define DEBUG_TB_CHECK
+//#define DEBUG_TLB_CHECK
+
+#if !defined(CONFIG_USER_ONLY)
+/* TB consistency checks only implemented for usermode emulation.  */
+#undef DEBUG_TB_CHECK
+#endif
+
+/* threshold to flush the translated code buffer */
+#define CODE_GEN_BUFFER_MAX_SIZE (CODE_GEN_BUFFER_SIZE - CODE_GEN_MAX_SIZE)
+
+#define SMC_BITMAP_USE_THRESHOLD 10
+
+#define MMAP_AREA_START        0x00000000
+#define MMAP_AREA_END          0xa8000000
+
+#if defined(TARGET_SPARC64)
+#define TARGET_PHYS_ADDR_SPACE_BITS 41
+#elif defined(TARGET_PPC64)
+#define TARGET_PHYS_ADDR_SPACE_BITS 42
+#else
+/* Note: for compatibility with kqemu, we use 32 bits for x86_64 */
+#define TARGET_PHYS_ADDR_SPACE_BITS 32
+#endif
+
+TranslationBlock tbs[CODE_GEN_MAX_BLOCKS];
+TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+int nb_tbs;
+/* any access to the tbs or the page table must use this lock */
+spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
+
+uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE]
+#if defined(__MINGW32__)
+    __attribute__((aligned (16)));
+#else
+    __attribute__((aligned (32)));
+#endif
+uint8_t *code_gen_ptr;
+
+#ifndef VBOX
+int phys_ram_size;
+int phys_ram_fd;
+int phys_ram_size;
+#else /* VBOX */
+RTGCPHYS phys_ram_size;
+/* we have memory ranges (the high PC-BIOS mapping) which
+   causes some pages to fall outside the dirty map here. */
+uint32_t phys_ram_dirty_size;
+#endif /* VBOX */
+#if !defined(VBOX)
+uint8_t *phys_ram_base;
+#endif
+uint8_t *phys_ram_dirty;
+
+CPUState *first_cpu;
+/* current CPU in the current thread. It is only valid inside
+   cpu_exec() */
+CPUState *cpu_single_env;
+
+typedef struct PageDesc {
+    /* list of TBs intersecting this ram page */
+    TranslationBlock *first_tb;
+    /* in order to optimize self modifying code, we count the number
+       of lookups we do to a given page to use a bitmap */
+    unsigned int code_write_count;
+    uint8_t *code_bitmap;
+#if defined(CONFIG_USER_ONLY)
+    unsigned long flags;
+#endif
+} PageDesc;
+
+typedef struct PhysPageDesc {
+    /* offset in host memory of the page + io_index in the low 12 bits */
+    uint32_t phys_offset;
+} PhysPageDesc;
+
+#define L2_BITS 10
+#define L1_BITS (32 - L2_BITS - TARGET_PAGE_BITS)
+
+#define L1_SIZE (1 << L1_BITS)
+#define L2_SIZE (1 << L2_BITS)
+
+static void io_mem_init(void);
+
+unsigned long qemu_real_host_page_size;
+unsigned long qemu_host_page_bits;
+unsigned long qemu_host_page_size;
+unsigned long qemu_host_page_mask;
+
+/* XXX: for system emulation, it could just be an array */
+static PageDesc *l1_map[L1_SIZE];
+PhysPageDesc **l1_phys_map;
+
+/* io memory support */
+CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
+CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
+void *io_mem_opaque[IO_MEM_NB_ENTRIES];
+static int io_mem_nb;
+
+#ifndef VBOX
+/* log support */
+char *logfilename = "/tmp/qemu.log";
+#endif /* !VBOX */
+FILE *logfile;
+int loglevel;
+
+/* statistics */
+static int tlb_flush_count;
+static int tb_flush_count;
+#ifndef VBOX
+static int tb_phys_invalidate_count;
+#endif /* !VBOX */
+
+static void page_init(void)
+{
+    /* NOTE: we can always suppose that qemu_host_page_size >=
+       TARGET_PAGE_SIZE */
+#ifdef VBOX
+    RTMemProtect(code_gen_buffer, sizeof(code_gen_buffer),
+                 RTMEM_PROT_EXEC | RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+    qemu_real_host_page_size = PAGE_SIZE;
+#else /* !VBOX */
+#ifdef _WIN32
+    {
+        SYSTEM_INFO system_info;
+        DWORD old_protect;
+
+        GetSystemInfo(&system_info);
+        qemu_real_host_page_size = system_info.dwPageSize;
+
+        VirtualProtect(code_gen_buffer, sizeof(code_gen_buffer),
+                       PAGE_EXECUTE_READWRITE, &old_protect);
+    }
+#else
+    qemu_real_host_page_size = getpagesize();
+    {
+        unsigned long start, end;
+
+        start = (unsigned long)code_gen_buffer;
+        start &= ~(qemu_real_host_page_size - 1);
+
+        end = (unsigned long)code_gen_buffer + sizeof(code_gen_buffer);
+        end += qemu_real_host_page_size - 1;
+        end &= ~(qemu_real_host_page_size - 1);
+
+        mprotect((void *)start, end - start,
+                 PROT_READ | PROT_WRITE | PROT_EXEC);
+    }
+#endif
+#endif /* !VBOX */
+
+    if (qemu_host_page_size == 0)
+        qemu_host_page_size = qemu_real_host_page_size;
+    if (qemu_host_page_size < TARGET_PAGE_SIZE)
+        qemu_host_page_size = TARGET_PAGE_SIZE;
+    qemu_host_page_bits = 0;
+    while ((1 << qemu_host_page_bits) < qemu_host_page_size)
+        qemu_host_page_bits++;
+    qemu_host_page_mask = ~(qemu_host_page_size - 1);
+    l1_phys_map = qemu_vmalloc(L1_SIZE * sizeof(void *));
+    memset(l1_phys_map, 0, L1_SIZE * sizeof(void *));
+}
+
+static inline PageDesc *page_find_alloc(unsigned int index)
+{
+    PageDesc **lp, *p;
+
+    lp = &l1_map[index >> L2_BITS];
+    p = *lp;
+    if (!p) {
+        /* allocate if not found */
+        p = qemu_malloc(sizeof(PageDesc) * L2_SIZE);
+        memset(p, 0, sizeof(PageDesc) * L2_SIZE);
+        *lp = p;
+    }
+    return p + (index & (L2_SIZE - 1));
+}
+
+static inline PageDesc *page_find(unsigned int index)
+{
+    PageDesc *p;
+
+    p = l1_map[index >> L2_BITS];
+    if (!p)
+        return 0;
+    return p + (index & (L2_SIZE - 1));
+}
+
+static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
+{
+    void **lp, **p;
+    PhysPageDesc *pd;
+
+    p = (void **)l1_phys_map;
+#if TARGET_PHYS_ADDR_SPACE_BITS > 32
+
+#if TARGET_PHYS_ADDR_SPACE_BITS > (32 + L1_BITS)
+#error unsupported TARGET_PHYS_ADDR_SPACE_BITS
+#endif
+    lp = p + ((index >> (L1_BITS + L2_BITS)) & (L1_SIZE - 1));
+    p = *lp;
+    if (!p) {
+        /* allocate if not found */
+        if (!alloc)
+            return NULL;
+        p = qemu_vmalloc(sizeof(void *) * L1_SIZE);
+        memset(p, 0, sizeof(void *) * L1_SIZE);
+        *lp = p;
+    }
+#endif
+    lp = p + ((index >> L2_BITS) & (L1_SIZE - 1));
+    pd = *lp;
+    if (!pd) {
+        int i;
+        /* allocate if not found */
+        if (!alloc)
+            return NULL;
+        pd = qemu_vmalloc(sizeof(PhysPageDesc) * L2_SIZE);
+        *lp = pd;
+        for (i = 0; i < L2_SIZE; i++)
+          pd[i].phys_offset = IO_MEM_UNASSIGNED;
+    }
+#if defined(VBOX) && !defined(VBOX_WITH_NEW_PHYS_CODE)
+    pd = ((PhysPageDesc *)pd) + (index & (L2_SIZE - 1));
+    if (RT_UNLIKELY((pd->phys_offset & ~TARGET_PAGE_MASK) == IO_MEM_RAM_MISSING))
+        remR3GrowDynRange(pd->phys_offset & TARGET_PAGE_MASK);
+    return pd;
+#else
+    return ((PhysPageDesc *)pd) + (index & (L2_SIZE - 1));
+#endif
+}
+
+static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
+{
+    return phys_page_find_alloc(index, 0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+static void tlb_protect_code(ram_addr_t ram_addr);
+static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
+                                    target_ulong vaddr);
+#endif
+
+void cpu_exec_init(CPUState *env)
+{
+    CPUState **penv;
+    int cpu_index;
+
+    if (!code_gen_ptr) {
+        code_gen_ptr = code_gen_buffer;
+        page_init();
+        io_mem_init();
+    }
+    env->next_cpu = NULL;
+    penv = &first_cpu;
+    cpu_index = 0;
+    while (*penv != NULL) {
+        penv = (CPUState **)&(*penv)->next_cpu;
+        cpu_index++;
+    }
+    env->cpu_index = cpu_index;
+    *penv = env;
+}
+
+static inline void invalidate_page_bitmap(PageDesc *p)
+{
+    if (p->code_bitmap) {
+        qemu_free(p->code_bitmap);
+        p->code_bitmap = NULL;
+    }
+    p->code_write_count = 0;
+}
+
+/* set to NULL all the 'first_tb' fields in all PageDescs */
+static void page_flush_tb(void)
+{
+    int i, j;
+    PageDesc *p;
+
+    for(i = 0; i < L1_SIZE; i++) {
+        p = l1_map[i];
+        if (p) {
+            for(j = 0; j < L2_SIZE; j++) {
+                p->first_tb = NULL;
+                invalidate_page_bitmap(p);
+                p++;
+            }
+        }
+    }
+}
+
+/* flush all the translation blocks */
+/* XXX: tb_flush is currently not thread safe */
+void tb_flush(CPUState *env1)
+{
+    CPUState *env;
+#if defined(DEBUG_FLUSH)
+    printf("qemu: flush code_size=%d nb_tbs=%d avg_tb_size=%d\n",
+           code_gen_ptr - code_gen_buffer,
+           nb_tbs,
+           nb_tbs > 0 ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0);
+#endif
+    nb_tbs = 0;
+
+    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+    }
+
+    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
+    page_flush_tb();
+
+    code_gen_ptr = code_gen_buffer;
+    /* XXX: flush processor icache at this point if cache flush is
+       expensive */
+    tb_flush_count++;
+}
+
+#ifdef DEBUG_TB_CHECK
+
+static void tb_invalidate_check(unsigned long address)
+{
+    TranslationBlock *tb;
+    int i;
+    address &= TARGET_PAGE_MASK;
+    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
+                  address >= tb->pc + tb->size)) {
+                printf("ERROR invalidate: address=%08lx PC=%08lx size=%04x\n",
+                       address, (long)tb->pc, tb->size);
+            }
+        }
+    }
+}
+
+/* verify that all the pages have correct rights for code */
+static void tb_page_check(void)
+{
+    TranslationBlock *tb;
+    int i, flags1, flags2;
+
+    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+            flags1 = page_get_flags(tb->pc);
+            flags2 = page_get_flags(tb->pc + tb->size - 1);
+            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
+                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
+                       (long)tb->pc, tb->size, flags1, flags2);
+            }
+        }
+    }
+}
+
+void tb_jmp_check(TranslationBlock *tb)
+{
+    TranslationBlock *tb1;
+    unsigned int n1;
+
+    /* suppress any remaining jumps to this TB */
+    tb1 = tb->jmp_first;
+    for(;;) {
+        n1 = (long)tb1 & 3;
+        tb1 = (TranslationBlock *)((long)tb1 & ~3);
+        if (n1 == 2)
+            break;
+        tb1 = tb1->jmp_next[n1];
+    }
+    /* check end of list */
+    if (tb1 != tb) {
+        printf("ERROR: jmp_list from 0x%08lx\n", (long)tb);
+    }
+}
+
+#endif
+
+/* invalidate one TB */
+static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
+                             int next_offset)
+{
+    TranslationBlock *tb1;
+    for(;;) {
+        tb1 = *ptb;
+        if (tb1 == tb) {
+            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
+            break;
+        }
+        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
+    }
+}
+
+static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
+{
+    TranslationBlock *tb1;
+    unsigned int n1;
+
+    for(;;) {
+        tb1 = *ptb;
+        n1 = (long)tb1 & 3;
+        tb1 = (TranslationBlock *)((long)tb1 & ~3);
+        if (tb1 == tb) {
+            *ptb = tb1->page_next[n1];
+            break;
+        }
+        ptb = &tb1->page_next[n1];
+    }
+}
+
+static inline void tb_jmp_remove(TranslationBlock *tb, int n)
+{
+    TranslationBlock *tb1, **ptb;
+    unsigned int n1;
+
+    ptb = &tb->jmp_next[n];
+    tb1 = *ptb;
+    if (tb1) {
+        /* find tb(n) in circular list */
+        for(;;) {
+            tb1 = *ptb;
+            n1 = (long)tb1 & 3;
+            tb1 = (TranslationBlock *)((long)tb1 & ~3);
+            if (n1 == n && tb1 == tb)
+                break;
+            if (n1 == 2) {
+                ptb = &tb1->jmp_first;
+            } else {
+                ptb = &tb1->jmp_next[n1];
+            }
+        }
+        /* now we can suppress tb(n) from the list */
+        *ptb = tb->jmp_next[n];
+
+        tb->jmp_next[n] = NULL;
+    }
+}
+
+/* reset the jump entry 'n' of a TB so that it is not chained to
+   another TB */
+static inline void tb_reset_jump(TranslationBlock *tb, int n)
+{
+    tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
+}
+
+static inline void tb_phys_invalidate(TranslationBlock *tb, unsigned int page_addr)
+{
+    CPUState *env;
+    PageDesc *p;
+    unsigned int h, n1;
+    target_ulong phys_pc;
+    TranslationBlock *tb1, *tb2;
+
+    /* remove the TB from the hash list */
+    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+    h = tb_phys_hash_func(phys_pc);
+    tb_remove(&tb_phys_hash[h], tb,
+              offsetof(TranslationBlock, phys_hash_next));
+
+    /* remove the TB from the page list */
+    if (tb->page_addr[0] != page_addr) {
+        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
+        tb_page_remove(&p->first_tb, tb);
+        invalidate_page_bitmap(p);
+    }
+    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
+        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
+        tb_page_remove(&p->first_tb, tb);
+        invalidate_page_bitmap(p);
+    }
+
+    tb_invalidated_flag = 1;
+
+    /* remove the TB from the hash list */
+    h = tb_jmp_cache_hash_func(tb->pc);
+    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+        if (env->tb_jmp_cache[h] == tb)
+            env->tb_jmp_cache[h] = NULL;
+    }
+
+    /* suppress this TB from the two jump lists */
+    tb_jmp_remove(tb, 0);
+    tb_jmp_remove(tb, 1);
+
+    /* suppress any remaining jumps to this TB */
+    tb1 = tb->jmp_first;
+    for(;;) {
+        n1 = (long)tb1 & 3;
+        if (n1 == 2)
+            break;
+        tb1 = (TranslationBlock *)((long)tb1 & ~3);
+        tb2 = tb1->jmp_next[n1];
+        tb_reset_jump(tb1, n1);
+        tb1->jmp_next[n1] = NULL;
+        tb1 = tb2;
+    }
+    tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
+
+#ifndef VBOX
+    tb_phys_invalidate_count++;
+#endif /* !VBOX */
+}
+
+#ifdef VBOX
+void tb_invalidate_virt(CPUState *env, uint32_t eip)
+{
+# if 1
+    tb_flush(env);
+# else
+    uint8_t *cs_base, *pc;
+    unsigned int flags, h, phys_pc;
+    TranslationBlock *tb, **ptb;
+
+    flags = env->hflags;
+    flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
+    cs_base = env->segs[R_CS].base;
+    pc = cs_base + eip;
+
+    tb = tb_find(&ptb, (unsigned long)pc, (unsigned long)cs_base,
+                 flags);
+
+    if(tb)
+    {
+#  ifdef DEBUG
+        printf("invalidating TB (%08X) at %08X\n", tb, eip);
+#  endif
+        tb_invalidate(tb);
+        //Note: this will leak TBs, but the whole cache will be flushed
+        //      when it happens too often
+        tb->pc = 0;
+        tb->cs_base = 0;
+        tb->flags = 0;
+    }
+# endif
+}
+
+# ifdef VBOX_STRICT
+/**
+ * Gets the page offset.
+ */
+unsigned long get_phys_page_offset(target_ulong addr)
+{
+    PhysPageDesc *p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    return p ? p->phys_offset : 0;
+}
+# endif /* VBOX_STRICT */
+#endif /* VBOX */
+
+static inline void set_bits(uint8_t *tab, int start, int len)
+{
+    int end, mask, end1;
+
+    end = start + len;
+    tab += start >> 3;
+    mask = 0xff << (start & 7);
+    if ((start & ~7) == (end & ~7)) {
+        if (start < end) {
+            mask &= ~(0xff << (end & 7));
+            *tab |= mask;
+        }
+    } else {
+        *tab++ |= mask;
+        start = (start + 8) & ~7;
+        end1 = end & ~7;
+        while (start < end1) {
+            *tab++ = 0xff;
+            start += 8;
+        }
+        if (start < end) {
+            mask = ~(0xff << (end & 7));
+            *tab |= mask;
+        }
+    }
+}
+
+static void build_page_bitmap(PageDesc *p)
+{
+    int n, tb_start, tb_end;
+    TranslationBlock *tb;
+
+    p->code_bitmap = qemu_malloc(TARGET_PAGE_SIZE / 8);
+    if (!p->code_bitmap)
+        return;
+    memset(p->code_bitmap, 0, TARGET_PAGE_SIZE / 8);
+
+    tb = p->first_tb;
+    while (tb != NULL) {
+        n = (long)tb & 3;
+        tb = (TranslationBlock *)((long)tb & ~3);
+        /* NOTE: this is subtle as a TB may span two physical pages */
+        if (n == 0) {
+            /* NOTE: tb_end may be after the end of the page, but
+               it is not a problem */
+            tb_start = tb->pc & ~TARGET_PAGE_MASK;
+            tb_end = tb_start + tb->size;
+            if (tb_end > TARGET_PAGE_SIZE)
+                tb_end = TARGET_PAGE_SIZE;
+        } else {
+            tb_start = 0;
+            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+        }
+        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
+        tb = tb->page_next[n];
+    }
+}
+
+#ifdef TARGET_HAS_PRECISE_SMC
+
+static void tb_gen_code(CPUState *env,
+                        target_ulong pc, target_ulong cs_base, int flags,
+                        int cflags)
+{
+    TranslationBlock *tb;
+    uint8_t *tc_ptr;
+    target_ulong phys_pc, phys_page2, virt_page2;
+    int code_gen_size;
+
+    phys_pc = get_phys_addr_code(env, pc);
+    tb = tb_alloc(pc);
+    if (!tb) {
+        /* flush must be done */
+        tb_flush(env);
+        /* cannot fail at this point */
+        tb = tb_alloc(pc);
+    }
+    tc_ptr = code_gen_ptr;
+    tb->tc_ptr = tc_ptr;
+    tb->cs_base = cs_base;
+    tb->flags = flags;
+    tb->cflags = cflags;
+    cpu_gen_code(env, tb, CODE_GEN_MAX_SIZE, &code_gen_size);
+    code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+
+    /* check next page if needed */
+    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
+    phys_page2 = -1;
+    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
+        phys_page2 = get_phys_addr_code(env, virt_page2);
+    }
+    tb_link_phys(tb, phys_pc, phys_page2);
+}
+#endif
+
+/* invalidate all TBs which intersect with the target physical page
+   starting in range [start;end[. NOTE: start and end must refer to
+   the same physical page. 'is_cpu_write_access' should be true if called
+   from a real cpu write access: the virtual CPU will exit the current
+   TB if code is modified inside this TB. */
+void tb_invalidate_phys_page_range(target_ulong start, target_ulong end,
+                                   int is_cpu_write_access)
+{
+    int n, current_tb_modified, current_tb_not_found, current_flags;
+    CPUState *env = cpu_single_env;
+    PageDesc *p;
+    TranslationBlock *tb, *tb_next, *current_tb, *saved_tb;
+    target_ulong tb_start, tb_end;
+    target_ulong current_pc, current_cs_base;
+
+    p = page_find(start >> TARGET_PAGE_BITS);
+    if (!p)
+        return;
+    if (!p->code_bitmap &&
+        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
+        is_cpu_write_access) {
+        /* build code bitmap */
+        build_page_bitmap(p);
+    }
+
+    /* we remove all the TBs in the range [start, end[ */
+    /* XXX: see if in some cases it could be faster to invalidate all the code */
+    current_tb_not_found = is_cpu_write_access;
+    current_tb_modified = 0;
+    current_tb = NULL; /* avoid warning */
+    current_pc = 0; /* avoid warning */
+    current_cs_base = 0; /* avoid warning */
+    current_flags = 0; /* avoid warning */
+    tb = p->first_tb;
+    while (tb != NULL) {
+        n = (long)tb & 3;
+        tb = (TranslationBlock *)((long)tb & ~3);
+        tb_next = tb->page_next[n];
+        /* NOTE: this is subtle as a TB may span two physical pages */
+        if (n == 0) {
+            /* NOTE: tb_end may be after the end of the page, but
+               it is not a problem */
+            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+            tb_end = tb_start + tb->size;
+        } else {
+            tb_start = tb->page_addr[1];
+            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+        }
+        if (!(tb_end <= start || tb_start >= end)) {
+#ifdef TARGET_HAS_PRECISE_SMC
+            if (current_tb_not_found) {
+                current_tb_not_found = 0;
+                current_tb = NULL;
+                if (env->mem_write_pc) {
+                    /* now we have a real cpu fault */
+                    current_tb = tb_find_pc(env->mem_write_pc);
+                }
+            }
+            if (current_tb == tb &&
+                !(current_tb->cflags & CF_SINGLE_INSN)) {
+                /* If we are modifying the current TB, we must stop
+                its execution. We could be more precise by checking
+                that the modification is after the current PC, but it
+                would require a specialized function to partially
+                restore the CPU state */
+
+                current_tb_modified = 1;
+                cpu_restore_state(current_tb, env,
+                                  env->mem_write_pc, NULL);
+#if defined(TARGET_I386)
+                current_flags = env->hflags;
+                current_flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
+                current_cs_base = (target_ulong)env->segs[R_CS].base;
+                current_pc = current_cs_base + env->eip;
+#else
+#error unsupported CPU
+#endif
+            }
+#endif /* TARGET_HAS_PRECISE_SMC */
+            /* we need to do that to handle the case where a signal
+               occurs while doing tb_phys_invalidate() */
+            saved_tb = NULL;
+            if (env) {
+                saved_tb = env->current_tb;
+                env->current_tb = NULL;
+            }
+            tb_phys_invalidate(tb, -1);
+            if (env) {
+                env->current_tb = saved_tb;
+                if (env->interrupt_request && env->current_tb)
+                    cpu_interrupt(env, env->interrupt_request);
+            }
+        }
+        tb = tb_next;
+    }
+#if !defined(CONFIG_USER_ONLY)
+    /* if no code remaining, no need to continue to use slow writes */
+    if (!p->first_tb) {
+        invalidate_page_bitmap(p);
+        if (is_cpu_write_access) {
+            tlb_unprotect_code_phys(env, start, env->mem_write_vaddr);
+        }
+    }
+#endif
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (current_tb_modified) {
+        /* we generate a block containing just the instruction
+           modifying the memory. It will ensure that it cannot modify
+           itself */
+        env->current_tb = NULL;
+        tb_gen_code(env, current_pc, current_cs_base, current_flags,
+                    CF_SINGLE_INSN);
+        cpu_resume_from_signal(env, NULL);
+    }
+#endif
+}
+
+/* len must be <= 8 and start must be a multiple of len */
+static inline void tb_invalidate_phys_page_fast(target_ulong start, int len)
+{
+    PageDesc *p;
+    int offset, b;
+#if 0
+    if (1) {
+        if (loglevel) {
+            fprintf(logfile, "modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
+                   cpu_single_env->mem_write_vaddr, len,
+                   cpu_single_env->eip,
+                   cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
+        }
+    }
+#endif
+    p = page_find(start >> TARGET_PAGE_BITS);
+    if (!p)
+        return;
+    if (p->code_bitmap) {
+        offset = start & ~TARGET_PAGE_MASK;
+        b = p->code_bitmap[offset >> 3] >> (offset & 7);
+        if (b & ((1 << len) - 1))
+            goto do_invalidate;
+    } else {
+    do_invalidate:
+        tb_invalidate_phys_page_range(start, start + len, 1);
+    }
+}
+
+#if !defined(CONFIG_SOFTMMU)
+static void tb_invalidate_phys_page(target_ulong addr,
+                                    unsigned long pc, void *puc)
+{
+    int n, current_flags, current_tb_modified;
+    target_ulong current_pc, current_cs_base;
+    PageDesc *p;
+    TranslationBlock *tb, *current_tb;
+#ifdef TARGET_HAS_PRECISE_SMC
+    CPUState *env = cpu_single_env;
+#endif
+
+    addr &= TARGET_PAGE_MASK;
+    p = page_find(addr >> TARGET_PAGE_BITS);
+    if (!p)
+        return;
+    tb = p->first_tb;
+    current_tb_modified = 0;
+    current_tb = NULL;
+    current_pc = 0; /* avoid warning */
+    current_cs_base = 0; /* avoid warning */
+    current_flags = 0; /* avoid warning */
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (tb && pc != 0) {
+        current_tb = tb_find_pc(pc);
+    }
+#endif
+    while (tb != NULL) {
+        n = (long)tb & 3;
+        tb = (TranslationBlock *)((long)tb & ~3);
+#ifdef TARGET_HAS_PRECISE_SMC
+        if (current_tb == tb &&
+            !(current_tb->cflags & CF_SINGLE_INSN)) {
+                /* If we are modifying the current TB, we must stop
+                   its execution. We could be more precise by checking
+                   that the modification is after the current PC, but it
+                   would require a specialized function to partially
+                   restore the CPU state */
+
+            current_tb_modified = 1;
+            cpu_restore_state(current_tb, env, pc, puc);
+#if defined(TARGET_I386)
+            current_flags = env->hflags;
+            current_flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
+            current_cs_base = (target_ulong)env->segs[R_CS].base;
+            current_pc = current_cs_base + env->eip;
+#else
+#error unsupported CPU
+#endif
+        }
+#endif /* TARGET_HAS_PRECISE_SMC */
+        tb_phys_invalidate(tb, addr);
+        tb = tb->page_next[n];
+    }
+    p->first_tb = NULL;
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (current_tb_modified) {
+        /* we generate a block containing just the instruction
+           modifying the memory. It will ensure that it cannot modify
+           itself */
+        env->current_tb = NULL;
+        tb_gen_code(env, current_pc, current_cs_base, current_flags,
+                    CF_SINGLE_INSN);
+        cpu_resume_from_signal(env, puc);
+    }
+#endif
+}
+#endif
+
+/* add the tb in the target page and protect it if necessary */
+static inline void tb_alloc_page(TranslationBlock *tb,
+                                 unsigned int n, target_ulong page_addr)
+{
+    PageDesc *p;
+    TranslationBlock *last_first_tb;
+
+    tb->page_addr[n] = page_addr;
+    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS);
+    tb->page_next[n] = p->first_tb;
+    last_first_tb = p->first_tb;
+    p->first_tb = (TranslationBlock *)((long)tb | n);
+    invalidate_page_bitmap(p);
+
+#if defined(TARGET_HAS_SMC) || 1
+
+#if defined(CONFIG_USER_ONLY)
+    if (p->flags & PAGE_WRITE) {
+        target_ulong addr;
+        PageDesc *p2;
+        int prot;
+
+        /* force the host page as non writable (writes will have a
+           page fault + mprotect overhead) */
+        page_addr &= qemu_host_page_mask;
+        prot = 0;
+        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
+            addr += TARGET_PAGE_SIZE) {
+
+            p2 = page_find (addr >> TARGET_PAGE_BITS);
+            if (!p2)
+                continue;
+            prot |= p2->flags;
+            p2->flags &= ~PAGE_WRITE;
+            page_get_flags(addr);
+          }
+        mprotect(g2h(page_addr), qemu_host_page_size,
+                 (prot & PAGE_BITS) & ~PAGE_WRITE);
+#ifdef DEBUG_TB_INVALIDATE
+        printf("protecting code page: 0x%08lx\n",
+               page_addr);
+#endif
+    }
+#else
+    /* if some code is already present, then the pages are already
+       protected. So we handle the case where only the first TB is
+       allocated in a physical page */
+    if (!last_first_tb) {
+        tlb_protect_code(page_addr);
+    }
+#endif
+
+#endif /* TARGET_HAS_SMC */
+}
+
+/* Allocate a new translation block. Flush the translation buffer if
+   too many translation blocks or too much generated code. */
+TranslationBlock *tb_alloc(target_ulong pc)
+{
+    TranslationBlock *tb;
+
+    if (nb_tbs >= CODE_GEN_MAX_BLOCKS ||
+        (code_gen_ptr - code_gen_buffer) >= CODE_GEN_BUFFER_MAX_SIZE)
+        return NULL;
+    tb = &tbs[nb_tbs++];
+    tb->pc = pc;
+    tb->cflags = 0;
+    return tb;
+}
+
+/* add a new TB and link it to the physical page tables. phys_page2 is
+   (-1) to indicate that only one page contains the TB. */
+void tb_link_phys(TranslationBlock *tb,
+                  target_ulong phys_pc, target_ulong phys_page2)
+{
+    unsigned int h;
+    TranslationBlock **ptb;
+
+    /* add in the physical hash table */
+    h = tb_phys_hash_func(phys_pc);
+    ptb = &tb_phys_hash[h];
+    tb->phys_hash_next = *ptb;
+    *ptb = tb;
+
+    /* add in the page list */
+    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
+    if (phys_page2 != -1)
+        tb_alloc_page(tb, 1, phys_page2);
+    else
+        tb->page_addr[1] = -1;
+
+    tb->jmp_first = (TranslationBlock *)((long)tb | 2);
+    tb->jmp_next[0] = NULL;
+    tb->jmp_next[1] = NULL;
+#ifdef USE_CODE_COPY
+    tb->cflags &= ~CF_FP_USED;
+    if (tb->cflags & CF_TB_FP_USED)
+        tb->cflags |= CF_FP_USED;
+#endif
+
+    /* init original jump addresses */
+    if (tb->tb_next_offset[0] != 0xffff)
+        tb_reset_jump(tb, 0);
+    if (tb->tb_next_offset[1] != 0xffff)
+        tb_reset_jump(tb, 1);
+
+#ifdef DEBUG_TB_CHECK
+    tb_page_check();
+#endif
+}
+
+/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
+   tb[1].tc_ptr. Return NULL if not found */
+TranslationBlock *tb_find_pc(unsigned long tc_ptr)
+{
+    int m_min, m_max, m;
+    unsigned long v;
+    TranslationBlock *tb;
+
+    if (nb_tbs <= 0)
+        return NULL;
+    if (tc_ptr < (unsigned long)code_gen_buffer ||
+        tc_ptr >= (unsigned long)code_gen_ptr)
+        return NULL;
+    /* binary search (cf Knuth) */
+    m_min = 0;
+    m_max = nb_tbs - 1;
+    while (m_min <= m_max) {
+        m = (m_min + m_max) >> 1;
+        tb = &tbs[m];
+        v = (unsigned long)tb->tc_ptr;
+        if (v == tc_ptr)
+            return tb;
+        else if (tc_ptr < v) {
+            m_max = m - 1;
+        } else {
+            m_min = m + 1;
+        }
+    }
+    return &tbs[m_max];
+}
+
+static void tb_reset_jump_recursive(TranslationBlock *tb);
+
+static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
+{
+    TranslationBlock *tb1, *tb_next, **ptb;
+    unsigned int n1;
+
+    tb1 = tb->jmp_next[n];
+    if (tb1 != NULL) {
+        /* find head of list */
+        for(;;) {
+            n1 = (long)tb1 & 3;
+            tb1 = (TranslationBlock *)((long)tb1 & ~3);
+            if (n1 == 2)
+                break;
+            tb1 = tb1->jmp_next[n1];
+        }
+        /* we are now sure now that tb jumps to tb1 */
+        tb_next = tb1;
+
+        /* remove tb from the jmp_first list */
+        ptb = &tb_next->jmp_first;
+        for(;;) {
+            tb1 = *ptb;
+            n1 = (long)tb1 & 3;
+            tb1 = (TranslationBlock *)((long)tb1 & ~3);
+            if (n1 == n && tb1 == tb)
+                break;
+            ptb = &tb1->jmp_next[n1];
+        }
+        *ptb = tb->jmp_next[n];
+        tb->jmp_next[n] = NULL;
+
+        /* suppress the jump to next tb in generated code */
+        tb_reset_jump(tb, n);
+
+        /* suppress jumps in the tb on which we could have jumped */
+        tb_reset_jump_recursive(tb_next);
+    }
+}
+
+static void tb_reset_jump_recursive(TranslationBlock *tb)
+{
+    tb_reset_jump_recursive2(tb, 0);
+    tb_reset_jump_recursive2(tb, 1);
+}
+
+#if defined(TARGET_HAS_ICE)
+static void breakpoint_invalidate(CPUState *env, target_ulong pc)
+{
+    target_ulong addr, pd;
+    ram_addr_t ram_addr;
+    PhysPageDesc *p;
+
+    addr = cpu_get_phys_page_debug(env, pc);
+    p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        pd = IO_MEM_UNASSIGNED;
+    } else {
+        pd = p->phys_offset;
+    }
+    ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
+    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
+}
+#endif
+
+/* add a breakpoint. EXCP_DEBUG is returned by the CPU loop if a
+   breakpoint is reached */
+int cpu_breakpoint_insert(CPUState *env, target_ulong pc)
+{
+#if defined(TARGET_HAS_ICE)
+    int i;
+
+    for(i = 0; i < env->nb_breakpoints; i++) {
+        if (env->breakpoints[i] == pc)
+            return 0;
+    }
+
+    if (env->nb_breakpoints >= MAX_BREAKPOINTS)
+        return -1;
+    env->breakpoints[env->nb_breakpoints++] = pc;
+
+    breakpoint_invalidate(env, pc);
+    return 0;
+#else
+    return -1;
+#endif
+}
+
+/* remove a breakpoint */
+int cpu_breakpoint_remove(CPUState *env, target_ulong pc)
+{
+#if defined(TARGET_HAS_ICE)
+    int i;
+    for(i = 0; i < env->nb_breakpoints; i++) {
+        if (env->breakpoints[i] == pc)
+            goto found;
+    }
+    return -1;
+ found:
+    env->nb_breakpoints--;
+    if (i < env->nb_breakpoints)
+      env->breakpoints[i] = env->breakpoints[env->nb_breakpoints];
+
+    breakpoint_invalidate(env, pc);
+    return 0;
+#else
+    return -1;
+#endif
+}
+
+/* enable or disable single step mode. EXCP_DEBUG is returned by the
+   CPU loop after each instruction */
+void cpu_single_step(CPUState *env, int enabled)
+{
+#if defined(TARGET_HAS_ICE)
+    if (env->singlestep_enabled != enabled) {
+        env->singlestep_enabled = enabled;
+        /* must flush all the translated code to avoid inconsistancies */
+        /* XXX: only flush what is necessary */
+        tb_flush(env);
+    }
+#endif
+}
+
+#ifndef VBOX
+/* enable or disable low levels log */
+void cpu_set_log(int log_flags)
+{
+    loglevel = log_flags;
+    if (loglevel && !logfile) {
+        logfile = fopen(logfilename, "w");
+        if (!logfile) {
+            perror(logfilename);
+            _exit(1);
+        }
+#if !defined(CONFIG_SOFTMMU)
+        /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
+        {
+            static uint8_t logfile_buf[4096];
+            setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
+        }
+#else
+        setvbuf(logfile, NULL, _IOLBF, 0);
+#endif
+    }
+}
+
+void cpu_set_log_filename(const char *filename)
+{
+    logfilename = strdup(filename);
+}
+#endif /* !VBOX */
+
+/* mask must never be zero, except for A20 change call */
+void cpu_interrupt(CPUState *env, int mask)
+{
+    TranslationBlock *tb;
+    static int interrupt_lock;
+
+#ifdef VBOX
+    VM_ASSERT_EMT(env->pVM);
+    ASMAtomicOrS32(&env->interrupt_request, mask);
+#else /* !VBOX */
+    env->interrupt_request |= mask;
+#endif /* !VBOX */
+    /* if the cpu is currently executing code, we must unlink it and
+       all the potentially executing TB */
+    tb = env->current_tb;
+    if (tb && !testandset(&interrupt_lock)) {
+        env->current_tb = NULL;
+        tb_reset_jump_recursive(tb);
+        interrupt_lock = 0;
+    }
+}
+
+void cpu_reset_interrupt(CPUState *env, int mask)
+{
+#ifdef VBOX
+    /*
+     * Note: the current implementation can be executed by another thread without problems; make sure this remains true
+     *       for future changes!
+     */
+    ASMAtomicAndS32(&env->interrupt_request, ~mask);
+#else /* !VBOX */
+    env->interrupt_request &= ~mask;
+#endif /* !VBOX */
+}
+
+#ifndef VBOX
+CPULogItem cpu_log_items[] = {
+    { CPU_LOG_TB_OUT_ASM, "out_asm",
+      "show generated host assembly code for each compiled TB" },
+    { CPU_LOG_TB_IN_ASM, "in_asm",
+      "show target assembly code for each compiled TB" },
+    { CPU_LOG_TB_OP, "op",
+      "show micro ops for each compiled TB (only usable if 'in_asm' used)" },
+#ifdef TARGET_I386
+    { CPU_LOG_TB_OP_OPT, "op_opt",
+      "show micro ops after optimization for each compiled TB" },
+#endif
+    { CPU_LOG_INT, "int",
+      "show interrupts/exceptions in short format" },
+    { CPU_LOG_EXEC, "exec",
+      "show trace before each executed TB (lots of logs)" },
+    { CPU_LOG_TB_CPU, "cpu",
+      "show CPU state before bloc translation" },
+#ifdef TARGET_I386
+    { CPU_LOG_PCALL, "pcall",
+      "show protected mode far calls/returns/exceptions" },
+#endif
+#ifdef DEBUG_IOPORT
+    { CPU_LOG_IOPORT, "ioport",
+      "show all i/o ports accesses" },
+#endif
+    { 0, NULL, NULL },
+};
+
+static int cmp1(const char *s1, int n, const char *s2)
+{
+    if (strlen(s2) != n)
+        return 0;
+    return memcmp(s1, s2, n) == 0;
+}
+
+/* takes a comma separated list of log masks. Return 0 if error. */
+int cpu_str_to_log_mask(const char *str)
+{
+    CPULogItem *item;
+    int mask;
+    const char *p, *p1;
+
+    p = str;
+    mask = 0;
+    for(;;) {
+        p1 = strchr(p, ',');
+        if (!p1)
+            p1 = p + strlen(p);
+	if(cmp1(p,p1-p,"all")) {
+		for(item = cpu_log_items; item->mask != 0; item++) {
+			mask |= item->mask;
+		}
+	} else {
+        for(item = cpu_log_items; item->mask != 0; item++) {
+            if (cmp1(p, p1 - p, item->name))
+                goto found;
+        }
+        return 0;
+	}
+    found:
+        mask |= item->mask;
+        if (*p1 != ',')
+            break;
+        p = p1 + 1;
+    }
+    return mask;
+}
+#endif /* !VBOX */
+
+#ifndef VBOX /* VBOX: we have our own routine. */
+void cpu_abort(CPUState *env, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    fprintf(stderr, "qemu: fatal: ");
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+#ifdef TARGET_I386
+    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
+#else
+    cpu_dump_state(env, stderr, fprintf, 0);
+#endif
+    va_end(ap);
+    abort();
+}
+#endif /* !VBOX */
+
+#if !defined(CONFIG_USER_ONLY)
+
+/* NOTE: if flush_global is true, also flush global entries (not
+   implemented yet) */
+void tlb_flush(CPUState *env, int flush_global)
+{
+    int i;
+
+#if defined(DEBUG_TLB)
+    printf("tlb_flush:\n");
+#endif
+    /* must reset current TB so that interrupts cannot modify the
+       links while we are modifying them */
+    env->current_tb = NULL;
+
+    for(i = 0; i < CPU_TLB_SIZE; i++) {
+        env->tlb_table[0][i].addr_read = -1;
+        env->tlb_table[0][i].addr_write = -1;
+        env->tlb_table[0][i].addr_code = -1;
+        env->tlb_table[1][i].addr_read = -1;
+        env->tlb_table[1][i].addr_write = -1;
+        env->tlb_table[1][i].addr_code = -1;
+    }
+
+    memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+
+#if !defined(CONFIG_SOFTMMU)
+    munmap((void *)MMAP_AREA_START, MMAP_AREA_END - MMAP_AREA_START);
+#endif
+#ifdef VBOX
+    /* inform raw mode about TLB flush */
+    remR3FlushTLB(env, flush_global);
+#endif
+#ifdef USE_KQEMU
+    if (env->kqemu_enabled) {
+        kqemu_flush(env, flush_global);
+    }
+#endif
+    tlb_flush_count++;
+}
+
+static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
+{
+    if (addr == (tlb_entry->addr_read &
+                 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
+        addr == (tlb_entry->addr_write &
+                 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
+        addr == (tlb_entry->addr_code &
+                 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        tlb_entry->addr_read = -1;
+        tlb_entry->addr_write = -1;
+        tlb_entry->addr_code = -1;
+    }
+}
+
+void tlb_flush_page(CPUState *env, target_ulong addr)
+{
+    int i;
+    TranslationBlock *tb;
+
+#if defined(DEBUG_TLB)
+    printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
+#endif
+    /* must reset current TB so that interrupts cannot modify the
+       links while we are modifying them */
+    env->current_tb = NULL;
+
+    addr &= TARGET_PAGE_MASK;
+    i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    tlb_flush_entry(&env->tlb_table[0][i], addr);
+    tlb_flush_entry(&env->tlb_table[1][i], addr);
+
+    /* Discard jump cache entries for any tb which might potentially
+       overlap the flushed page.  */
+    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
+    memset (&env->tb_jmp_cache[i], 0, TB_JMP_PAGE_SIZE * sizeof(tb));
+
+    i = tb_jmp_cache_hash_page(addr);
+    memset (&env->tb_jmp_cache[i], 0, TB_JMP_PAGE_SIZE * sizeof(tb));
+
+#if !defined(CONFIG_SOFTMMU)
+    if (addr < MMAP_AREA_END)
+        munmap((void *)addr, TARGET_PAGE_SIZE);
+#endif
+#ifdef VBOX
+    /* inform raw mode about TLB page flush */
+    remR3FlushPage(env, addr);
+#endif /* VBOX */
+#ifdef USE_KQEMU
+    if (env->kqemu_enabled) {
+        kqemu_flush_page(env, addr);
+    }
+#endif
+}
+
+/* update the TLBs so that writes to code in the virtual page 'addr'
+   can be detected */
+static void tlb_protect_code(ram_addr_t ram_addr)
+{
+    cpu_physical_memory_reset_dirty(ram_addr,
+                                    ram_addr + TARGET_PAGE_SIZE,
+                                    CODE_DIRTY_FLAG);
+#if defined(VBOX) && defined(REM_MONITOR_CODE_PAGES)
+    /** @todo Retest this? This function has changed... */
+    remR3ProtectCode(cpu_single_env, ram_addr);
+#endif
+}
+
+/* update the TLB so that writes in physical page 'phys_addr' are no longer
+   tested for self modifying code */
+static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
+                                    target_ulong vaddr)
+{
+#ifdef VBOX
+    if (RT_LIKELY((ram_addr >> TARGET_PAGE_BITS) < phys_ram_dirty_size))
+#endif
+    phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] |= CODE_DIRTY_FLAG;
+}
+
+static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
+                                         unsigned long start, unsigned long length)
+{
+    unsigned long addr;
+    if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
+        addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
+        if ((addr - start) < length) {
+            tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | IO_MEM_NOTDIRTY;
+        }
+    }
+}
+
+void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
+                                     int dirty_flags)
+{
+    CPUState *env;
+    unsigned long length, start1;
+    int i, mask, len;
+    uint8_t *p;
+
+    start &= TARGET_PAGE_MASK;
+    end = TARGET_PAGE_ALIGN(end);
+
+    length = end - start;
+    if (length == 0)
+        return;
+    len = length >> TARGET_PAGE_BITS;
+#ifdef USE_KQEMU
+    /* XXX: should not depend on cpu context */
+    env = first_cpu;
+    if (env->kqemu_enabled) {
+        ram_addr_t addr;
+        addr = start;
+        for(i = 0; i < len; i++) {
+            kqemu_set_notdirty(env, addr);
+            addr += TARGET_PAGE_SIZE;
+        }
+    }
+#endif
+    mask = ~dirty_flags;
+    p = phys_ram_dirty + (start >> TARGET_PAGE_BITS);
+#ifdef VBOX
+    if (RT_LIKELY((start >> TARGET_PAGE_BITS) < phys_ram_dirty_size))
+#endif
+    for(i = 0; i < len; i++)
+        p[i] &= mask;
+
+    /* we modify the TLB cache so that the dirty bit will be set again
+       when accessing the range */
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+    start1 = start;
+#elif !defined(VBOX)
+    start1 = start + (unsigned long)phys_ram_base;
+#else
+    start1 = (unsigned long)remR3GCPhys2HCVirt(first_cpu, start);
+#endif
+    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+        for(i = 0; i < CPU_TLB_SIZE; i++)
+            tlb_reset_dirty_range(&env->tlb_table[0][i], start1, length);
+        for(i = 0; i < CPU_TLB_SIZE; i++)
+            tlb_reset_dirty_range(&env->tlb_table[1][i], start1, length);
+    }
+
+#if !defined(CONFIG_SOFTMMU)
+#ifdef VBOX /**@todo remove this check */
+# error "We shouldn't get here..."
+#endif
+    /* XXX: this is expensive */
+    {
+        VirtPageDesc *p;
+        int j;
+        target_ulong addr;
+
+        for(i = 0; i < L1_SIZE; i++) {
+            p = l1_virt_map[i];
+            if (p) {
+                addr = i << (TARGET_PAGE_BITS + L2_BITS);
+                for(j = 0; j < L2_SIZE; j++) {
+                    if (p->valid_tag == virt_valid_tag &&
+                        p->phys_addr >= start && p->phys_addr < end &&
+                        (p->prot & PROT_WRITE)) {
+                        if (addr < MMAP_AREA_END) {
+                            mprotect((void *)addr, TARGET_PAGE_SIZE,
+                                     p->prot & ~PROT_WRITE);
+                        }
+                    }
+                    addr += TARGET_PAGE_SIZE;
+                    p++;
+                }
+            }
+        }
+    }
+#endif
+}
+
+static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
+{
+    ram_addr_t ram_addr;
+
+    if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
+        /* RAM case */
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+        ram_addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
+#elif !defined(VBOX)
+        ram_addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) +
+            tlb_entry->addend - (unsigned long)phys_ram_base;
+#else
+        ram_addr = remR3HCVirt2GCPhys(first_cpu, (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend);
+#endif
+        if (!cpu_physical_memory_is_dirty(ram_addr)) {
+            tlb_entry->addr_write |= IO_MEM_NOTDIRTY;
+        }
+    }
+}
+
+/* update the TLB according to the current state of the dirty bits */
+void cpu_tlb_update_dirty(CPUState *env)
+{
+    int i;
+    for(i = 0; i < CPU_TLB_SIZE; i++)
+        tlb_update_dirty(&env->tlb_table[0][i]);
+    for(i = 0; i < CPU_TLB_SIZE; i++)
+        tlb_update_dirty(&env->tlb_table[1][i]);
+}
+
+static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry,
+                                  unsigned long start)
+{
+    unsigned long addr;
+    if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_NOTDIRTY) {
+        addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
+        if (addr == start) {
+            tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | IO_MEM_RAM;
+        }
+    }
+}
+
+/* update the TLB corresponding to virtual page vaddr and phys addr
+   addr so that it is no longer dirty */
+static inline void tlb_set_dirty(CPUState *env,
+                                 unsigned long addr, target_ulong vaddr)
+{
+    int i;
+
+    addr &= TARGET_PAGE_MASK;
+    i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    tlb_set_dirty1(&env->tlb_table[0][i], addr);
+    tlb_set_dirty1(&env->tlb_table[1][i], addr);
+}
+
+/* add a new TLB entry. At most one entry for a given virtual address
+   is permitted. Return 0 if OK or 2 if the page could not be mapped
+   (can only happen in non SOFTMMU mode for I/O pages or pages
+   conflicting with the host address space). */
+int tlb_set_page_exec(CPUState *env, target_ulong vaddr,
+                      target_phys_addr_t paddr, int prot,
+                      int is_user, int is_softmmu)
+{
+    PhysPageDesc *p;
+    unsigned long pd;
+    unsigned int index;
+    target_ulong address;
+    target_phys_addr_t addend;
+    int ret;
+    CPUTLBEntry *te;
+
+    p = phys_page_find(paddr >> TARGET_PAGE_BITS);
+    if (!p) {
+        pd = IO_MEM_UNASSIGNED;
+    } else {
+        pd = p->phys_offset;
+    }
+#if defined(DEBUG_TLB)
+    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x%08x prot=%x u=%d smmu=%d pd=0x%08lx\n",
+           vaddr, (int)paddr, prot, is_user, is_softmmu, pd);
+#endif
+
+    ret = 0;
+#if !defined(CONFIG_SOFTMMU)
+    if (is_softmmu)
+#endif
+    {
+        if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
+            /* IO memory case */
+            address = vaddr | pd;
+            addend = paddr;
+        } else {
+            /* standard memory */
+            address = vaddr;
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+            addend = pd & TARGET_PAGE_MASK;
+#elif !defined(VBOX)
+            addend = (unsigned long)phys_ram_base + (pd & TARGET_PAGE_MASK);
+#else
+            addend = (unsigned long)remR3GCPhys2HCVirt(env, pd & TARGET_PAGE_MASK);
+#endif
+        }
+
+        index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+        addend -= vaddr;
+        te = &env->tlb_table[is_user][index];
+        te->addend = addend;
+        if (prot & PAGE_READ) {
+            te->addr_read = address;
+        } else {
+            te->addr_read = -1;
+        }
+        if (prot & PAGE_EXEC) {
+            te->addr_code = address;
+        } else {
+            te->addr_code = -1;
+        }
+        if (prot & PAGE_WRITE) {
+            if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
+                (pd & IO_MEM_ROMD)) {
+                /* write access calls the I/O callback */
+                te->addr_write = vaddr |
+                    (pd & ~(TARGET_PAGE_MASK | IO_MEM_ROMD));
+            } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
+                       !cpu_physical_memory_is_dirty(pd)) {
+                te->addr_write = vaddr | IO_MEM_NOTDIRTY;
+            } else {
+                te->addr_write = address;
+            }
+        } else {
+            te->addr_write = -1;
+        }
+#ifdef VBOX
+        /* inform raw mode about TLB page change */
+        remR3FlushPage(env, vaddr);
+#endif
+    }
+#if !defined(CONFIG_SOFTMMU)
+    else {
+        if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM) {
+            /* IO access: no mapping is done as it will be handled by the
+               soft MMU */
+            if (!(env->hflags & HF_SOFTMMU_MASK))
+                ret = 2;
+        } else {
+            void *map_addr;
+
+            if (vaddr >= MMAP_AREA_END) {
+                ret = 2;
+            } else {
+                if (prot & PROT_WRITE) {
+                    if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
+#if defined(TARGET_HAS_SMC) || 1
+                        first_tb ||
+#endif
+                        ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
+                         !cpu_physical_memory_is_dirty(pd))) {
+                        /* ROM: we do as if code was inside */
+                        /* if code is present, we only map as read only and save the
+                           original mapping */
+                        VirtPageDesc *vp;
+
+                        vp = virt_page_find_alloc(vaddr >> TARGET_PAGE_BITS, 1);
+                        vp->phys_addr = pd;
+                        vp->prot = prot;
+                        vp->valid_tag = virt_valid_tag;
+                        prot &= ~PAGE_WRITE;
+                    }
+                }
+                map_addr = mmap((void *)vaddr, TARGET_PAGE_SIZE, prot,
+                                MAP_SHARED | MAP_FIXED, phys_ram_fd, (pd & TARGET_PAGE_MASK));
+                if (map_addr == MAP_FAILED) {
+                    cpu_abort(env, "mmap failed when mapped physical address 0x%08x to virtual address 0x%08x\n",
+                              paddr, vaddr);
+                }
+            }
+        }
+    }
+#endif
+    return ret;
+}
+
+/* called from signal handler: invalidate the code and unprotect the
+   page. Return TRUE if the fault was succesfully handled. */
+int page_unprotect(target_ulong addr, unsigned long pc, void *puc)
+{
+#if !defined(CONFIG_SOFTMMU)
+    VirtPageDesc *vp;
+
+#if defined(DEBUG_TLB)
+    printf("page_unprotect: addr=0x%08x\n", addr);
+#endif
+    addr &= TARGET_PAGE_MASK;
+
+    /* if it is not mapped, no need to worry here */
+    if (addr >= MMAP_AREA_END)
+        return 0;
+    vp = virt_page_find(addr >> TARGET_PAGE_BITS);
+    if (!vp)
+        return 0;
+    /* NOTE: in this case, validate_tag is _not_ tested as it
+       validates only the code TLB */
+    if (vp->valid_tag != virt_valid_tag)
+        return 0;
+    if (!(vp->prot & PAGE_WRITE))
+        return 0;
+#if defined(DEBUG_TLB)
+    printf("page_unprotect: addr=0x%08x phys_addr=0x%08x prot=%x\n",
+           addr, vp->phys_addr, vp->prot);
+#endif
+    if (mprotect((void *)addr, TARGET_PAGE_SIZE, vp->prot) < 0)
+        cpu_abort(cpu_single_env, "error mprotect addr=0x%lx prot=%d\n",
+                  (unsigned long)addr, vp->prot);
+    /* set the dirty bit */
+    phys_ram_dirty[vp->phys_addr >> TARGET_PAGE_BITS] = 0xff;
+    /* flush the code inside */
+    tb_invalidate_phys_page(vp->phys_addr, pc, puc);
+    return 1;
+#elif defined(VBOX)
+    addr &= TARGET_PAGE_MASK;
+
+    /* if it is not mapped, no need to worry here */
+    if (addr >= MMAP_AREA_END)
+        return 0;
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+#else
+
+void tlb_flush(CPUState *env, int flush_global)
+{
+}
+
+void tlb_flush_page(CPUState *env, target_ulong addr)
+{
+}
+
+int tlb_set_page_exec(CPUState *env, target_ulong vaddr,
+                      target_phys_addr_t paddr, int prot,
+                      int is_user, int is_softmmu)
+{
+    return 0;
+}
+
+#ifndef VBOX
+/* dump memory mappings */
+void page_dump(FILE *f)
+{
+    unsigned long start, end;
+    int i, j, prot, prot1;
+    PageDesc *p;
+
+    fprintf(f, "%-8s %-8s %-8s %s\n",
+            "start", "end", "size", "prot");
+    start = -1;
+    end = -1;
+    prot = 0;
+    for(i = 0; i <= L1_SIZE; i++) {
+        if (i < L1_SIZE)
+            p = l1_map[i];
+        else
+            p = NULL;
+        for(j = 0;j < L2_SIZE; j++) {
+            if (!p)
+                prot1 = 0;
+            else
+                prot1 = p[j].flags;
+            if (prot1 != prot) {
+                end = (i << (32 - L1_BITS)) | (j << TARGET_PAGE_BITS);
+                if (start != -1) {
+                    fprintf(f, "%08lx-%08lx %08lx %c%c%c\n",
+                            start, end, end - start,
+                            prot & PAGE_READ ? 'r' : '-',
+                            prot & PAGE_WRITE ? 'w' : '-',
+                            prot & PAGE_EXEC ? 'x' : '-');
+                }
+                if (prot1 != 0)
+                    start = end;
+                else
+                    start = -1;
+                prot = prot1;
+            }
+            if (!p)
+                break;
+        }
+    }
+}
+#endif /* !VBOX */
+
+int page_get_flags(target_ulong address)
+{
+    PageDesc *p;
+
+    p = page_find(address >> TARGET_PAGE_BITS);
+    if (!p)
+        return 0;
+    return p->flags;
+}
+
+/* modify the flags of a page and invalidate the code if
+   necessary. The flag PAGE_WRITE_ORG is positionned automatically
+   depending on PAGE_WRITE */
+void page_set_flags(target_ulong start, target_ulong end, int flags)
+{
+    PageDesc *p;
+    target_ulong addr;
+
+    start = start & TARGET_PAGE_MASK;
+    end = TARGET_PAGE_ALIGN(end);
+    if (flags & PAGE_WRITE)
+        flags |= PAGE_WRITE_ORG;
+#ifdef VBOX
+    AssertMsgFailed(("We shouldn't be here, and if we should, we must have an env to do the proper locking!\n"));
+#endif
+    spin_lock(&tb_lock);
+    for(addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
+        p = page_find_alloc(addr >> TARGET_PAGE_BITS);
+        /* if the write protection is set, then we invalidate the code
+           inside */
+        if (!(p->flags & PAGE_WRITE) &&
+            (flags & PAGE_WRITE) &&
+            p->first_tb) {
+            tb_invalidate_phys_page(addr, 0, NULL);
+        }
+        p->flags = flags;
+    }
+    spin_unlock(&tb_lock);
+}
+
+/* called from signal handler: invalidate the code and unprotect the
+   page. Return TRUE if the fault was succesfully handled. */
+int page_unprotect(target_ulong address, unsigned long pc, void *puc)
+{
+    unsigned int page_index, prot, pindex;
+    PageDesc *p, *p1;
+    target_ulong host_start, host_end, addr;
+
+    host_start = address & qemu_host_page_mask;
+    page_index = host_start >> TARGET_PAGE_BITS;
+    p1 = page_find(page_index);
+    if (!p1)
+        return 0;
+    host_end = host_start + qemu_host_page_size;
+    p = p1;
+    prot = 0;
+    for(addr = host_start;addr < host_end; addr += TARGET_PAGE_SIZE) {
+        prot |= p->flags;
+        p++;
+    }
+    /* if the page was really writable, then we change its
+       protection back to writable */
+    if (prot & PAGE_WRITE_ORG) {
+        pindex = (address - host_start) >> TARGET_PAGE_BITS;
+        if (!(p1[pindex].flags & PAGE_WRITE)) {
+            mprotect((void *)g2h(host_start), qemu_host_page_size,
+                     (prot & PAGE_BITS) | PAGE_WRITE);
+            p1[pindex].flags |= PAGE_WRITE;
+            /* and since the content will be modified, we must invalidate
+               the corresponding translated code. */
+            tb_invalidate_phys_page(address, pc, puc);
+#ifdef DEBUG_TB_CHECK
+            tb_invalidate_check(address);
+#endif
+            return 1;
+        }
+    }
+    return 0;
+}
+
+/* call this function when system calls directly modify a memory area */
+/* ??? This should be redundant now we have lock_user.  */
+void page_unprotect_range(target_ulong data, target_ulong data_size)
+{
+    target_ulong start, end, addr;
+
+    start = data;
+    end = start + data_size;
+    start &= TARGET_PAGE_MASK;
+    end = TARGET_PAGE_ALIGN(end);
+    for(addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
+        page_unprotect(addr, 0, NULL);
+    }
+}
+
+static inline void tlb_set_dirty(CPUState *env,
+                                 unsigned long addr, target_ulong vaddr)
+{
+}
+#endif /* defined(CONFIG_USER_ONLY) */
+
+/* register physical memory. 'size' must be a multiple of the target
+   page size. If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
+   io memory page */
+void cpu_register_physical_memory(target_phys_addr_t start_addr,
+                                  unsigned long size,
+                                  unsigned long phys_offset)
+{
+    target_phys_addr_t addr, end_addr;
+    PhysPageDesc *p;
+    CPUState *env;
+
+    size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
+    end_addr = start_addr + size;
+    for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
+        p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+        p->phys_offset = phys_offset;
+#if !defined(VBOX) || defined(VBOX_WITH_NEW_PHYS_CODE)
+        if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
+            (phys_offset & IO_MEM_ROMD))
+#else
+        if (   (phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM
+            || (phys_offset & IO_MEM_ROMD)
+            || (phys_offset & ~TARGET_PAGE_MASK) == IO_MEM_RAM_MISSING)
+#endif
+
+            phys_offset += TARGET_PAGE_SIZE;
+    }
+
+    /* since each CPU stores ram addresses in its TLB cache, we must
+       reset the modified entries */
+    /* XXX: slow ! */
+    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+        tlb_flush(env, 1);
+    }
+}
+
+/* XXX: temporary until new memory mapping API */
+uint32_t cpu_get_physical_page_desc(target_phys_addr_t addr)
+{
+    PhysPageDesc *p;
+
+    p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    if (!p)
+        return IO_MEM_UNASSIGNED;
+    return p->phys_offset;
+}
+
+static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
+{
+#ifdef DEBUG_UNASSIGNED
+    printf("Unassigned mem read  0x%08x\n", (int)addr);
+#endif
+    return 0;
+}
+
+static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef DEBUG_UNASSIGNED
+    printf("Unassigned mem write 0x%08x = 0x%x\n", (int)addr, val);
+#endif
+}
+
+static CPUReadMemoryFunc *unassigned_mem_read[3] = {
+    unassigned_mem_readb,
+    unassigned_mem_readb,
+    unassigned_mem_readb,
+};
+
+static CPUWriteMemoryFunc *unassigned_mem_write[3] = {
+    unassigned_mem_writeb,
+    unassigned_mem_writeb,
+    unassigned_mem_writeb,
+};
+
+static void notdirty_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+    unsigned long ram_addr;
+    int dirty_flags;
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+    ram_addr = addr;
+#elif !defined(VBOX)
+    ram_addr = addr - (unsigned long)phys_ram_base;
+#else
+    ram_addr = remR3HCVirt2GCPhys(first_cpu, (void *)addr);
+#endif
+#ifdef VBOX
+    if (RT_UNLIKELY((ram_addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+        dirty_flags = 0xff;
+    else
+#endif /* VBOX */
+    dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
+#if !defined(CONFIG_USER_ONLY)
+        tb_invalidate_phys_page_fast(ram_addr, 1);
+# ifdef VBOX
+        if (RT_UNLIKELY((ram_addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+            dirty_flags = 0xff;
+        else
+# endif /* VBOX */
+        dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+#endif
+    }
+    stb_p((uint8_t *)(long)addr, val);
+#ifdef USE_KQEMU
+    if (cpu_single_env->kqemu_enabled &&
+        (dirty_flags & KQEMU_MODIFY_PAGE_MASK) != KQEMU_MODIFY_PAGE_MASK)
+        kqemu_modify_page(cpu_single_env, ram_addr);
+#endif
+    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
+#ifdef VBOX
+    if (RT_LIKELY((ram_addr >> TARGET_PAGE_BITS) < phys_ram_dirty_size))
+#endif /* !VBOX */
+    phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] = dirty_flags;
+    /* we remove the notdirty callback only if the code has been
+       flushed */
+    if (dirty_flags == 0xff)
+        tlb_set_dirty(cpu_single_env, addr, cpu_single_env->mem_write_vaddr);
+}
+
+static void notdirty_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+    unsigned long ram_addr;
+    int dirty_flags;
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+    ram_addr = addr;
+#elif !defined(VBOX)
+    ram_addr = addr - (unsigned long)phys_ram_base;
+#else
+    ram_addr = remR3HCVirt2GCPhys(first_cpu, (void *)addr);
+#endif
+#ifdef VBOX
+    if (RT_UNLIKELY((ram_addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+        dirty_flags = 0xff;
+    else
+#endif /* VBOX */
+    dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
+#if !defined(CONFIG_USER_ONLY)
+        tb_invalidate_phys_page_fast(ram_addr, 2);
+# ifdef VBOX
+        if (RT_UNLIKELY((ram_addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+            dirty_flags = 0xff;
+        else
+# endif /* VBOX */
+        dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+#endif
+    }
+    stw_p((uint8_t *)(long)addr, val);
+#ifdef USE_KQEMU
+    if (cpu_single_env->kqemu_enabled &&
+        (dirty_flags & KQEMU_MODIFY_PAGE_MASK) != KQEMU_MODIFY_PAGE_MASK)
+        kqemu_modify_page(cpu_single_env, ram_addr);
+#endif
+    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
+#ifdef VBOX
+    if (RT_LIKELY((ram_addr >> TARGET_PAGE_BITS) < phys_ram_dirty_size))
+#endif
+    phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] = dirty_flags;
+    /* we remove the notdirty callback only if the code has been
+       flushed */
+    if (dirty_flags == 0xff)
+        tlb_set_dirty(cpu_single_env, addr, cpu_single_env->mem_write_vaddr);
+}
+
+static void notdirty_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+    unsigned long ram_addr;
+    int dirty_flags;
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+    ram_addr = addr;
+#elif !defined(VBOX)
+    ram_addr = addr - (unsigned long)phys_ram_base;
+#else
+    ram_addr = remR3HCVirt2GCPhys(first_cpu, (void *)addr);
+#endif
+#ifdef VBOX
+    if (RT_UNLIKELY((ram_addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+        dirty_flags = 0xff;
+    else
+#endif /* VBOX */
+    dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
+#if !defined(CONFIG_USER_ONLY)
+        tb_invalidate_phys_page_fast(ram_addr, 4);
+# ifdef VBOX
+        if (RT_UNLIKELY((ram_addr >> TARGET_PAGE_BITS) >= phys_ram_dirty_size))
+            dirty_flags = 0xff;
+        else
+# endif /* VBOX */
+        dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+#endif
+    }
+    stl_p((uint8_t *)(long)addr, val);
+#ifdef USE_KQEMU
+    if (cpu_single_env->kqemu_enabled &&
+        (dirty_flags & KQEMU_MODIFY_PAGE_MASK) != KQEMU_MODIFY_PAGE_MASK)
+        kqemu_modify_page(cpu_single_env, ram_addr);
+#endif
+    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
+#ifdef VBOX
+    if (RT_LIKELY((ram_addr >> TARGET_PAGE_BITS) < phys_ram_dirty_size))
+#endif
+    phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] = dirty_flags;
+    /* we remove the notdirty callback only if the code has been
+       flushed */
+    if (dirty_flags == 0xff)
+        tlb_set_dirty(cpu_single_env, addr, cpu_single_env->mem_write_vaddr);
+}
+
+static CPUReadMemoryFunc *error_mem_read[3] = {
+    NULL, /* never used */
+    NULL, /* never used */
+    NULL, /* never used */
+};
+
+static CPUWriteMemoryFunc *notdirty_mem_write[3] = {
+    notdirty_mem_writeb,
+    notdirty_mem_writew,
+    notdirty_mem_writel,
+};
+
+static void io_mem_init(void)
+{
+    cpu_register_io_memory(IO_MEM_ROM >> IO_MEM_SHIFT, error_mem_read, unassigned_mem_write, NULL);
+    cpu_register_io_memory(IO_MEM_UNASSIGNED >> IO_MEM_SHIFT, unassigned_mem_read, unassigned_mem_write, NULL);
+    cpu_register_io_memory(IO_MEM_NOTDIRTY >> IO_MEM_SHIFT, error_mem_read, notdirty_mem_write, NULL);
+#if defined(VBOX) && !defined(VBOX_WITH_NEW_PHYS_CODE)
+    cpu_register_io_memory(IO_MEM_RAM_MISSING >> IO_MEM_SHIFT, unassigned_mem_read, unassigned_mem_write, NULL);
+    io_mem_nb = 6;
+#else
+    io_mem_nb = 5;
+#endif
+
+#ifndef VBOX /* VBOX: we do this later when the RAM is allocated. */
+    /* alloc dirty bits array */
+    phys_ram_dirty = qemu_vmalloc(phys_ram_size >> TARGET_PAGE_BITS);
+    memset(phys_ram_dirty, 0xff, phys_ram_size >> TARGET_PAGE_BITS);
+#endif /* !VBOX */
+}
+
+/* mem_read and mem_write are arrays of functions containing the
+   function to access byte (index 0), word (index 1) and dword (index
+   2). All functions must be supplied. If io_index is non zero, the
+   corresponding io zone is modified. If it is zero, a new io zone is
+   allocated. The return value can be used with
+   cpu_register_physical_memory(). (-1) is returned if error. */
+int cpu_register_io_memory(int io_index,
+                           CPUReadMemoryFunc **mem_read,
+                           CPUWriteMemoryFunc **mem_write,
+                           void *opaque)
+{
+    int i;
+
+    if (io_index <= 0) {
+        if (io_mem_nb >= IO_MEM_NB_ENTRIES)
+            return -1;
+        io_index = io_mem_nb++;
+    } else {
+        if (io_index >= IO_MEM_NB_ENTRIES)
+            return -1;
+    }
+
+    for(i = 0;i < 3; i++) {
+        io_mem_read[io_index][i] = mem_read[i];
+        io_mem_write[io_index][i] = mem_write[i];
+    }
+    io_mem_opaque[io_index] = opaque;
+    return io_index << IO_MEM_SHIFT;
+}
+
+CPUWriteMemoryFunc **cpu_get_io_memory_write(int io_index)
+{
+    return io_mem_write[io_index >> IO_MEM_SHIFT];
+}
+
+CPUReadMemoryFunc **cpu_get_io_memory_read(int io_index)
+{
+    return io_mem_read[io_index >> IO_MEM_SHIFT];
+}
+
+/* physical memory access (slow version, mainly for debug) */
+#if defined(CONFIG_USER_ONLY)
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+                            int len, int is_write)
+{
+    int l, flags;
+    target_ulong page;
+    void * p;
+
+    while (len > 0) {
+        page = addr & TARGET_PAGE_MASK;
+        l = (page + TARGET_PAGE_SIZE) - addr;
+        if (l > len)
+            l = len;
+        flags = page_get_flags(page);
+        if (!(flags & PAGE_VALID))
+            return;
+        if (is_write) {
+            if (!(flags & PAGE_WRITE))
+                return;
+            p = lock_user(addr, len, 0);
+            memcpy(p, buf, len);
+            unlock_user(p, addr, len);
+        } else {
+            if (!(flags & PAGE_READ))
+                return;
+            p = lock_user(addr, len, 1);
+            memcpy(buf, p, len);
+            unlock_user(p, addr, 0);
+        }
+        len -= l;
+        buf += l;
+        addr += l;
+    }
+}
+
+#else
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+                            int len, int is_write)
+{
+    int l, io_index;
+    uint8_t *ptr;
+    uint32_t val;
+    target_phys_addr_t page;
+    unsigned long pd;
+    PhysPageDesc *p;
+
+    while (len > 0) {
+        page = addr & TARGET_PAGE_MASK;
+        l = (page + TARGET_PAGE_SIZE) - addr;
+        if (l > len)
+            l = len;
+        p = phys_page_find(page >> TARGET_PAGE_BITS);
+        if (!p) {
+            pd = IO_MEM_UNASSIGNED;
+        } else {
+            pd = p->phys_offset;
+        }
+
+        if (is_write) {
+            if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                /* XXX: could force cpu_single_env to NULL to avoid
+                   potential bugs */
+                if (l >= 4 && ((addr & 3) == 0)) {
+                    /* 32 bit write access */
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+                    val = ldl_p(buf);
+#else
+                    val = *(const uint32_t *)buf;
+#endif
+                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+                    l = 4;
+                } else if (l >= 2 && ((addr & 1) == 0)) {
+                    /* 16 bit write access */
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+                    val = lduw_p(buf);
+#else
+                    val = *(const uint16_t *)buf;
+#endif
+                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
+                    l = 2;
+                } else {
+                    /* 8 bit write access */
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+                    val = ldub_p(buf);
+#else
+                    val = *(const uint8_t *)buf;
+#endif
+                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
+                    l = 1;
+                }
+            } else {
+                unsigned long addr1;
+                addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+                /* RAM case */
+#ifdef VBOX
+                remR3PhysWrite(addr1, buf, l); NOREF(ptr);
+#else
+                ptr = phys_ram_base + addr1;
+                memcpy(ptr, buf, l);
+#endif
+                if (!cpu_physical_memory_is_dirty(addr1)) {
+                    /* invalidate code */
+                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
+                    /* set dirty bit */
+#ifdef VBOX
+                    if (RT_LIKELY((addr1 >> TARGET_PAGE_BITS) < phys_ram_dirty_size))
+#endif
+                    phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |=
+                        (0xff & ~CODE_DIRTY_FLAG);
+                }
+            }
+        } else {
+            if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
+                !(pd & IO_MEM_ROMD)) {
+                /* I/O case */
+                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                if (l >= 4 && ((addr & 3) == 0)) {
+                    /* 32 bit read access */
+                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+                    stl_p(buf, val);
+#else
+                    *(uint32_t *)buf = val;
+#endif
+                    l = 4;
+                } else if (l >= 2 && ((addr & 1) == 0)) {
+                    /* 16 bit read access */
+                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+                    stw_p(buf, val);
+#else
+                    *(uint16_t *)buf = val;
+#endif
+                    l = 2;
+                } else {
+                    /* 8 bit read access */
+                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+                    stb_p(buf, val);
+#else
+                    *(uint8_t *)buf = val;
+#endif
+                    l = 1;
+                }
+            } else {
+                /* RAM case */
+#ifdef VBOX
+                remR3PhysRead((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), buf, l); NOREF(ptr);
+#else
+                ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
+                    (addr & ~TARGET_PAGE_MASK);
+                memcpy(buf, ptr, l);
+#endif
+            }
+        }
+        len -= l;
+        buf += l;
+        addr += l;
+    }
+}
+
+#ifndef VBOX
+/* used for ROM loading : can write in RAM and ROM */
+void cpu_physical_memory_write_rom(target_phys_addr_t addr,
+                                   const uint8_t *buf, int len)
+{
+    int l;
+    uint8_t *ptr;
+    target_phys_addr_t page;
+    unsigned long pd;
+    PhysPageDesc *p;
+
+    while (len > 0) {
+        page = addr & TARGET_PAGE_MASK;
+        l = (page + TARGET_PAGE_SIZE) - addr;
+        if (l > len)
+            l = len;
+        p = phys_page_find(page >> TARGET_PAGE_BITS);
+        if (!p) {
+            pd = IO_MEM_UNASSIGNED;
+        } else {
+            pd = p->phys_offset;
+        }
+
+        if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
+            (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
+            !(pd & IO_MEM_ROMD)) {
+            /* do nothing */
+        } else {
+            unsigned long addr1;
+            addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+            /* ROM/RAM case */
+            ptr = phys_ram_base + addr1;
+            memcpy(ptr, buf, l);
+        }
+        len -= l;
+        buf += l;
+        addr += l;
+    }
+}
+#endif /* !VBOX */
+
+
+/* warning: addr must be aligned */
+uint32_t ldl_phys(target_phys_addr_t addr)
+{
+    int io_index;
+    uint8_t *ptr;
+    uint32_t val;
+    unsigned long pd;
+    PhysPageDesc *p;
+
+    p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        pd = IO_MEM_UNASSIGNED;
+    } else {
+        pd = p->phys_offset;
+    }
+
+    if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
+        !(pd & IO_MEM_ROMD)) {
+        /* I/O case */
+        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+        val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+    } else {
+        /* RAM case */
+#ifndef VBOX
+        ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
+            (addr & ~TARGET_PAGE_MASK);
+        val = ldl_p(ptr);
+#else
+        val = remR3PhysReadU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK)); NOREF(ptr);
+#endif
+    }
+    return val;
+}
+
+/* warning: addr must be aligned */
+uint64_t ldq_phys(target_phys_addr_t addr)
+{
+    int io_index;
+    uint8_t *ptr;
+    uint64_t val;
+    unsigned long pd;
+    PhysPageDesc *p;
+
+    p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        pd = IO_MEM_UNASSIGNED;
+    } else {
+        pd = p->phys_offset;
+    }
+
+    if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
+        !(pd & IO_MEM_ROMD)) {
+        /* I/O case */
+        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+#ifdef TARGET_WORDS_BIGENDIAN
+        val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
+        val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
+#else
+        val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+        val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
+#endif
+    } else {
+        /* RAM case */
+#ifndef VBOX
+        ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
+            (addr & ~TARGET_PAGE_MASK);
+        val = ldq_p(ptr);
+#else
+        val = remR3PhysReadU64((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK)); NOREF(ptr);
+#endif
+    }
+    return val;
+}
+
+/* XXX: optimize */
+uint32_t ldub_phys(target_phys_addr_t addr)
+{
+    uint8_t val;
+    cpu_physical_memory_read(addr, &val, 1);
+    return val;
+}
+
+/* XXX: optimize */
+uint32_t lduw_phys(target_phys_addr_t addr)
+{
+    uint16_t val;
+    cpu_physical_memory_read(addr, (uint8_t *)&val, 2);
+    return tswap16(val);
+}
+
+/* warning: addr must be aligned. The ram page is not masked as dirty
+   and the code inside is not invalidated. It is useful if the dirty
+   bits are used to track modified PTEs */
+void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
+{
+    int io_index;
+    uint8_t *ptr;
+    unsigned long pd;
+    PhysPageDesc *p;
+
+    p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        pd = IO_MEM_UNASSIGNED;
+    } else {
+        pd = p->phys_offset;
+    }
+
+    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+    } else {
+#ifndef VBOX
+        ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
+            (addr & ~TARGET_PAGE_MASK);
+        stl_p(ptr, val);
+#else
+        remR3PhysWriteU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), val); NOREF(ptr);
+#endif
+    }
+}
+
+/* warning: addr must be aligned */
+void stl_phys(target_phys_addr_t addr, uint32_t val)
+{
+    int io_index;
+    uint8_t *ptr;
+    unsigned long pd;
+    PhysPageDesc *p;
+
+    p = phys_page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        pd = IO_MEM_UNASSIGNED;
+    } else {
+        pd = p->phys_offset;
+    }
+
+    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+    } else {
+        unsigned long addr1;
+        addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+        /* RAM case */
+#ifndef VBOX
+        ptr = phys_ram_base + addr1;
+        stl_p(ptr, val);
+#else
+        remR3PhysWriteU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), val); NOREF(ptr);
+#endif
+        if (!cpu_physical_memory_is_dirty(addr1)) {
+            /* invalidate code */
+            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
+            /* set dirty bit */
+#ifdef VBOX
+            if (RT_LIKELY((addr1 >> TARGET_PAGE_BITS) < phys_ram_dirty_size))
+#endif
+            phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |=
+                (0xff & ~CODE_DIRTY_FLAG);
+        }
+    }
+}
+
+/* XXX: optimize */
+void stb_phys(target_phys_addr_t addr, uint32_t val)
+{
+    uint8_t v = val;
+    cpu_physical_memory_write(addr, &v, 1);
+}
+
+/* XXX: optimize */
+void stw_phys(target_phys_addr_t addr, uint32_t val)
+{
+    uint16_t v = tswap16(val);
+    cpu_physical_memory_write(addr, (const uint8_t *)&v, 2);
+}
+
+/* XXX: optimize */
+void stq_phys(target_phys_addr_t addr, uint64_t val)
+{
+    val = tswap64(val);
+    cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
+}
+
+#endif
+
+#ifndef VBOX
+/* virtual memory access for debug */
+int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
+                        uint8_t *buf, int len, int is_write)
+{
+    int l;
+    target_ulong page, phys_addr;
+
+    while (len > 0) {
+        page = addr & TARGET_PAGE_MASK;
+        phys_addr = cpu_get_phys_page_debug(env, page);
+        /* if no physical page mapped, return an error */
+        if (phys_addr == -1)
+            return -1;
+        l = (page + TARGET_PAGE_SIZE) - addr;
+        if (l > len)
+            l = len;
+        cpu_physical_memory_rw(phys_addr + (addr & ~TARGET_PAGE_MASK),
+                               buf, l, is_write);
+        len -= l;
+        buf += l;
+        addr += l;
+    }
+    return 0;
+}
+
+void dump_exec_info(FILE *f,
+                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+{
+    int i, target_code_size, max_target_code_size;
+    int direct_jmp_count, direct_jmp2_count, cross_page;
+    TranslationBlock *tb;
+
+    target_code_size = 0;
+    max_target_code_size = 0;
+    cross_page = 0;
+    direct_jmp_count = 0;
+    direct_jmp2_count = 0;
+    for(i = 0; i < nb_tbs; i++) {
+        tb = &tbs[i];
+        target_code_size += tb->size;
+        if (tb->size > max_target_code_size)
+            max_target_code_size = tb->size;
+        if (tb->page_addr[1] != -1)
+            cross_page++;
+        if (tb->tb_next_offset[0] != 0xffff) {
+            direct_jmp_count++;
+            if (tb->tb_next_offset[1] != 0xffff) {
+                direct_jmp2_count++;
+            }
+        }
+    }
+    /* XXX: avoid using doubles ? */
+    cpu_fprintf(f, "TB count            %d\n", nb_tbs);
+    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
+                nb_tbs ? target_code_size / nb_tbs : 0,
+                max_target_code_size);
+    cpu_fprintf(f, "TB avg host size    %d bytes (expansion ratio: %0.1f)\n",
+                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
+                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
+    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
+            cross_page,
+            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
+    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
+                direct_jmp_count,
+                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
+                direct_jmp2_count,
+                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
+    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
+    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
+    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
+}
+#endif /* !VBOX */
+
+#if !defined(CONFIG_USER_ONLY)
+
+#define MMUSUFFIX _cmmu
+#define GETPC() NULL
+#define env cpu_single_env
+#define SOFTMMU_CODE_ACCESS
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+#undef env
+
+#endif
Index: /trunk/src/recompiler_new/fpu/softfloat-macros.h
===================================================================
--- /trunk/src/recompiler_new/fpu/softfloat-macros.h	(revision 13168)
+++ /trunk/src/recompiler_new/fpu/softfloat-macros.h	(revision 13168)
@@ -0,0 +1,720 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+    bits32 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 32 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+    bits64 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 64 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'.  The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+| bits shifted off form a second 64-bit result as follows:  The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero.  This extra result is stored in the location
+| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift64ExtraRightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1 != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0.  The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128Right(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count );
+        z0 = a0>>count;
+    }
+    else {
+        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1.  The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero.  The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128RightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else if ( count < 128 ) {
+            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128ExtraRightJamming(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if ( count < 64 ) {
+            z2 = a1<<negCount;
+            z1 = ( a0<<negCount ) | ( a1>>count );
+            z0 = a0>>count;
+        }
+        else {
+            if ( count == 64 ) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if ( count < 128 ) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>( count & 63 );
+                }
+                else {
+                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= ( a2 != 0 );
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' must be less than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shortShift128Left(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1<<count;
+    *z0Ptr =
+        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+| by the number of bits given in `count'.  Any bits shifted off are lost.
+| The value of `count' must be less than 64.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shortShift192Left(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount;
+
+    z2 = a2<<count;
+    z1 = a1<<count;
+    z0 = a0<<count;
+    if ( 0 < count ) {
+        negCount = ( ( - count ) & 63 );
+        z1 |= a2>>negCount;
+        z0 |= a1>>negCount;
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+| any carry out is lost.  The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ add128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z1;
+
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+| modulo 2^192, so any carry out is lost.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ add192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = ( z2 < a2 );
+    z1 = a1 + b1;
+    carry0 = ( z1 < a1 );
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += ( z1 < carry1 );
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ sub128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ sub192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = ( a2 < b2 );
+    z1 = a1 - b1;
+    borrow0 = ( a1 < b1 );
+    z0 = a0 - b0;
+    z0 -= ( z1 < borrow1 );
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits32 aHigh, aLow, bHigh, bLow;
+    bits64 z0, zMiddleA, zMiddleB, z1;
+
+    aLow = a;
+    aHigh = a>>32;
+    bLow = b;
+    bHigh = b>>32;
+    z1 = ( (bits64) aLow ) * bLow;
+    zMiddleA = ( (bits64) aLow ) * bHigh;
+    zMiddleB = ( (bits64) aHigh ) * bLow;
+    z0 = ( (bits64) aHigh ) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += ( z1 < zMiddleA );
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ mul128By64To192(
+     bits64 a0,
+     bits64 a1,
+     bits64 b,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2, more1;
+
+    mul64To128( a1, b, &z1, &z2 );
+    mul64To128( a0, b, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product.  The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ mul128To256(
+     bits64 a0,
+     bits64 a1,
+     bits64 b0,
+     bits64 b1,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr,
+     bits64 *z3Ptr
+ )
+{
+    bits64 z0, z1, z2, z3;
+    bits64 more1, more2;
+
+    mul64To128( a1, b1, &z2, &z3 );
+    mul64To128( a1, b0, &z1, &more2 );
+    add128( z1, more2, 0, z2, &z1, &z2 );
+    mul64To128( a0, b0, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    mul64To128( a0, b1, &more1, &more2 );
+    add128( more1, more2, 0, z2, &more1, &z2 );
+    add128( z0, z1, 0, more1, &z0, &z1 );
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+    bits64 b0, b1;
+    bits64 rem0, rem1, term0, term1;
+    bits64 z;
+
+    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+    b0 = b>>32;
+    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+    mul64To128( b, z, &term0, &term1 );
+    sub128( a0, a1, term0, term1, &rem0, &rem1 );
+    while ( ( (sbits64) rem0 ) < 0 ) {
+        z -= LIT64( 0x100000000 );
+        b1 = b<<32;
+        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+    }
+    rem0 = ( rem0<<32 ) | ( rem1>>32 );
+    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+static bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+    static const bits16 sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const bits16 sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    int8 index;
+    bits32 z;
+
+    index = ( a>>27 ) & 15;
+    if ( aExp & 1 ) {
+        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+        z = ( ( a / z )<<14 ) + ( z<<15 );
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+        z = a / z + z;
+        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+    }
+    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros32( bits32 a )
+{
+    static const int8 countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < 0x10000 ) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if ( a < 0x1000000 ) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros64( bits64 a )
+{
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < ( (bits64) 1 )<<32 ) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32( a );
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 != b0 ) || ( a1 != b1 );
+
+}
+
Index: /trunk/src/recompiler_new/fpu/softfloat-native.c
===================================================================
--- /trunk/src/recompiler_new/fpu/softfloat-native.c	(revision 13168)
+++ /trunk/src/recompiler_new/fpu/softfloat-native.c	(revision 13168)
@@ -0,0 +1,384 @@
+/* Native implementation of soft float functions. Only a single status
+   context is supported */
+#include "softfloat.h"
+#include <math.h>
+
+void set_float_rounding_mode(int val STATUS_PARAM)
+{
+    STATUS(float_rounding_mode) = val;
+#if defined(_BSD) && !defined(__APPLE__) || (defined(HOST_SOLARIS) && (HOST_SOLARIS < 10 || HOST_SOLARIS == 11))
+    fpsetround(val);
+#elif defined(__arm__)
+    /* nothing to do */
+#else
+    fesetround(val);
+#endif
+}
+
+#ifdef FLOATX80
+void set_floatx80_rounding_precision(int val STATUS_PARAM)
+{
+    STATUS(floatx80_rounding_precision) = val;
+}
+#endif
+
+#if defined(_BSD) || (defined(HOST_SOLARIS) && HOST_SOLARIS < 10)
+#define lrint(d)		((int32_t)rint(d))
+#define llrint(d)		((int64_t)rint(d))
+#define lrintf(f)		((int32_t)rint(f))
+#define llrintf(f)		((int64_t)rint(f))
+#define sqrtf(f)		((float)sqrt(f))
+#define remainderf(fa, fb)	((float)remainder(fa, fb))
+#define rintf(f)		((float)rint(f))
+#endif
+
+#if defined(__powerpc__)
+
+/* correct (but slow) PowerPC rint() (glibc version is incorrect) */
+double qemu_rint(double x)
+{
+    double y = 4503599627370496.0;
+    if (fabs(x) >= y)
+        return x;
+    if (x < 0) 
+        y = -y;
+    y = (x + y) - y;
+    if (y == 0.0)
+        y = copysign(y, x);
+    return y;
+}
+
+#define rint qemu_rint
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32(int v STATUS_PARAM)
+{
+    return (float32)v;
+}
+
+float64 int32_to_float64(int v STATUS_PARAM)
+{
+    return (float64)v;
+}
+
+#ifdef FLOATX80
+floatx80 int32_to_floatx80(int v STATUS_PARAM)
+{
+    return (floatx80)v;
+}
+#endif
+float32 int64_to_float32( int64_t v STATUS_PARAM)
+{
+    return (float32)v;
+}
+float64 int64_to_float64( int64_t v STATUS_PARAM)
+{
+    return (float64)v;
+}
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64_t v STATUS_PARAM)
+{
+    return (floatx80)v;
+}
+#endif
+
+/* XXX: this code implements the x86 behaviour, not the IEEE one.  */
+#if HOST_LONG_BITS == 32
+static inline int long_to_int32(long a)
+{
+    return a;
+}
+#else
+static inline int long_to_int32(long a)
+{
+    if (a != (int32_t)a) 
+        a = 0x80000000;
+    return a;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float32_to_int32( float32 a STATUS_PARAM)
+{
+    return long_to_int32(lrintf(a));
+}
+int float32_to_int32_round_to_zero( float32 a STATUS_PARAM)
+{
+    return (int)a;
+}
+int64_t float32_to_int64( float32 a STATUS_PARAM)
+{
+    return llrintf(a);
+}
+
+int64_t float32_to_int64_round_to_zero( float32 a STATUS_PARAM)
+{
+    return (int64_t)a;
+}
+
+float64 float32_to_float64( float32 a STATUS_PARAM)
+{
+    return a;
+}
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 a STATUS_PARAM)
+{
+    return a;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 a STATUS_PARAM)
+{
+    return rintf(a);
+}
+
+float32 float32_rem( float32 a, float32 b STATUS_PARAM)
+{
+    return remainderf(a, b);
+}
+
+float32 float32_sqrt( float32 a STATUS_PARAM)
+{
+    return sqrtf(a);
+}
+int float32_compare( float32 a, float32 b STATUS_PARAM )
+{
+    if (a < b) {
+        return -1;
+    } else if (a == b) {
+        return 0;
+    } else if (a > b) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+int float32_compare_quiet( float32 a, float32 b STATUS_PARAM )
+{
+    if (isless(a, b)) {
+        return -1;
+    } else if (a == b) {
+        return 0;
+    } else if (isgreater(a, b)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+int float32_is_signaling_nan( float32 a1)
+{
+    float32u u;
+    uint32_t a;
+    u.f = a1;
+    a = u.i;
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float64_to_int32( float64 a STATUS_PARAM)
+{
+    return long_to_int32(lrint(a));
+}
+int float64_to_int32_round_to_zero( float64 a STATUS_PARAM)
+{
+    return (int)a;
+}
+int64_t float64_to_int64( float64 a STATUS_PARAM)
+{
+    return llrint(a);
+}
+int64_t float64_to_int64_round_to_zero( float64 a STATUS_PARAM)
+{
+    return (int64_t)a;
+}
+float32 float64_to_float32( float64 a STATUS_PARAM)
+{
+    return a;
+}
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 a STATUS_PARAM)
+{
+    return a;
+}
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 a STATUS_PARAM)
+{
+    return a;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_trunc_to_int( float64 a STATUS_PARAM )
+{
+    return trunc(a);
+}
+
+float64 float64_round_to_int( float64 a STATUS_PARAM )
+{
+#if defined(__arm__)
+    switch(STATUS(float_rounding_mode)) {
+    default:
+    case float_round_nearest_even:
+        asm("rndd %0, %1" : "=f" (a) : "f"(a));
+        break;
+    case float_round_down:
+        asm("rnddm %0, %1" : "=f" (a) : "f"(a));
+        break;
+    case float_round_up:
+        asm("rnddp %0, %1" : "=f" (a) : "f"(a));
+        break;
+    case float_round_to_zero:
+        asm("rnddz %0, %1" : "=f" (a) : "f"(a));
+        break;
+    }
+#else
+    return rint(a);
+#endif
+}
+
+float64 float64_rem( float64 a, float64 b STATUS_PARAM)
+{
+    return remainder(a, b);
+}
+
+float64 float64_sqrt( float64 a STATUS_PARAM)
+{
+    return sqrt(a);
+}
+int float64_compare( float64 a, float64 b STATUS_PARAM )
+{
+    if (a < b) {
+        return -1;
+    } else if (a == b) {
+        return 0;
+    } else if (a > b) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+int float64_compare_quiet( float64 a, float64 b STATUS_PARAM )
+{
+    if (isless(a, b)) {
+        return -1;
+    } else if (a == b) {
+        return 0;
+    } else if (isgreater(a, b)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+int float64_is_signaling_nan( float64 a1)
+{
+    float64u u;
+    uint64_t a;
+    u.f = a1;
+    a = u.i;
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+int float64_is_nan( float64 a1 )
+{
+    float64u u;
+    uint64_t a;
+    u.f = a1;
+    a = u.i;
+
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int floatx80_to_int32( floatx80 a STATUS_PARAM)
+{
+    return long_to_int32(lrintl(a));
+}
+int floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM)
+{
+    return (int)a;
+}
+int64_t floatx80_to_int64( floatx80 a STATUS_PARAM)
+{
+    return llrintl(a);
+}
+int64_t floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM)
+{
+    return (int64_t)a;
+}
+float32 floatx80_to_float32( floatx80 a STATUS_PARAM)
+{
+    return a;
+}
+float64 floatx80_to_float64( floatx80 a STATUS_PARAM)
+{
+    return a;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM)
+{
+    return rintl(a);
+}
+floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return remainderl(a, b);
+}
+floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM)
+{
+    return sqrtl(a);
+}
+int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    if (a < b) {
+        return -1;
+    } else if (a == b) {
+        return 0;
+    } else if (a > b) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    if (isless(a, b)) {
+        return -1;
+    } else if (a == b) {
+        return 0;
+    } else if (isgreater(a, b)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+int floatx80_is_signaling_nan( floatx80 a1)
+{
+    floatx80u u;
+    u.f = a1;
+    return ( ( u.i.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( u.i.low<<1 );
+}
+
+#endif
Index: /trunk/src/recompiler_new/fpu/softfloat-native.h
===================================================================
--- /trunk/src/recompiler_new/fpu/softfloat-native.h	(revision 13168)
+++ /trunk/src/recompiler_new/fpu/softfloat-native.h	(revision 13168)
@@ -0,0 +1,361 @@
+/* Native implementation of soft float functions */
+#include <math.h>
+
+#if (defined(_BSD) && !defined(__APPLE__)) || defined(HOST_SOLARIS)
+#include <ieeefp.h>
+#define fabsf(f) ((float)fabs(f))
+#else
+#include <fenv.h>
+#endif
+
+/*
+ * Define some C99-7.12.3 classification macros and
+ *        some C99-.12.4 for Solaris systems OS less than 10,
+ *        or Solaris 10 systems running GCC 3.x or less.
+ *   Solaris 10 with GCC4 does not need these macros as they
+ *   are defined in <iso/math_c99.h> with a compiler directive
+ */
+#if defined(HOST_SOLARIS) && (( HOST_SOLARIS <= 9 ) || ( ( HOST_SOLARIS >= 10 ) && ( __GNUC__ <= 4) ))
+/*
+ * C99 7.12.3 classification macros
+ * and
+ * C99 7.12.14 comparison macros
+ *
+ * ... do not work on Solaris 10 using GNU CC 3.4.x.
+ * Try to workaround the missing / broken C99 math macros.
+ */
+
+#define isnormal(x)             (fpclass(x) >= FP_NZERO)
+#define isgreater(x, y)         ((!unordered(x, y)) && ((x) > (y)))
+#define isgreaterequal(x, y)    ((!unordered(x, y)) && ((x) >= (y)))
+#define isless(x, y)            ((!unordered(x, y)) && ((x) < (y)))
+#define islessequal(x, y)       ((!unordered(x, y)) && ((x) <= (y)))
+#define isunordered(x,y)        unordered(x, y)
+#endif
+
+typedef float float32;
+typedef double float64;
+#ifdef FLOATX80
+typedef long double floatx80;
+#endif
+
+typedef union {
+    float32 f;
+    uint32_t i;
+} float32u;
+typedef union {
+    float64 f;
+    uint64_t i;
+} float64u;
+#ifdef FLOATX80
+typedef union {
+    floatx80 f;
+    struct {
+        uint64_t low;
+        uint16_t high;
+    } i;
+} floatx80u;
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+#if (defined(_BSD) && !defined(__APPLE__)) || defined(HOST_SOLARIS)
+enum {
+    float_round_nearest_even = FP_RN,
+    float_round_down         = FP_RM,
+    float_round_up           = FP_RP,
+    float_round_to_zero      = FP_RZ
+};
+#elif defined(__arm__)
+enum {
+    float_round_nearest_even = 0,
+    float_round_down         = 1,
+    float_round_up           = 2,
+    float_round_to_zero      = 3
+};
+#else
+enum {
+    float_round_nearest_even = FE_TONEAREST,
+    float_round_down         = FE_DOWNWARD,
+    float_round_up           = FE_UPWARD,
+    float_round_to_zero      = FE_TOWARDZERO
+};
+#endif
+
+typedef struct float_status {
+    signed char float_rounding_mode;
+#ifdef FLOATX80
+    signed char floatx80_rounding_precision;
+#endif
+} float_status;
+
+void set_float_rounding_mode(int val STATUS_PARAM);
+#ifdef FLOATX80
+void set_floatx80_rounding_precision(int val STATUS_PARAM);
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int STATUS_PARAM);
+float64 int32_to_float64( int STATUS_PARAM);
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( int STATUS_PARAM);
+#endif
+#ifdef FLOAT128
+float128 int32_to_float128( int STATUS_PARAM);
+#endif
+float32 int64_to_float32( int64_t STATUS_PARAM);
+float64 int64_to_float64( int64_t STATUS_PARAM);
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64_t STATUS_PARAM);
+#endif
+#ifdef FLOAT128
+float128 int64_to_float128( int64_t STATUS_PARAM);
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float32_to_int32( float32  STATUS_PARAM);
+int float32_to_int32_round_to_zero( float32  STATUS_PARAM);
+int64_t float32_to_int64( float32  STATUS_PARAM);
+int64_t float32_to_int64_round_to_zero( float32  STATUS_PARAM);
+float64 float32_to_float64( float32  STATUS_PARAM);
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32  STATUS_PARAM);
+#endif
+#ifdef FLOAT128
+float128 float32_to_float128( float32  STATUS_PARAM);
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32  STATUS_PARAM);
+INLINE float32 float32_add( float32 a, float32 b STATUS_PARAM)
+{
+    return a + b;
+}
+INLINE float32 float32_sub( float32 a, float32 b STATUS_PARAM)
+{
+    return a - b;
+}
+INLINE float32 float32_mul( float32 a, float32 b STATUS_PARAM)
+{
+    return a * b;
+}
+INLINE float32 float32_div( float32 a, float32 b STATUS_PARAM)
+{
+    return a / b;
+}
+float32 float32_rem( float32, float32  STATUS_PARAM);
+float32 float32_sqrt( float32  STATUS_PARAM);
+INLINE int float32_eq( float32 a, float32 b STATUS_PARAM)
+{
+    return a == b;
+}
+INLINE int float32_le( float32 a, float32 b STATUS_PARAM)
+{
+    return a <= b;
+}
+INLINE int float32_lt( float32 a, float32 b STATUS_PARAM)
+{
+    return a < b;
+}
+INLINE int float32_eq_signaling( float32 a, float32 b STATUS_PARAM)
+{
+    return a <= b && a >= b;
+}
+INLINE int float32_le_quiet( float32 a, float32 b STATUS_PARAM)
+{
+    return islessequal(a, b);
+}
+INLINE int float32_lt_quiet( float32 a, float32 b STATUS_PARAM)
+{
+    return isless(a, b);
+}
+INLINE int float32_unordered( float32 a, float32 b STATUS_PARAM)
+{
+    return isunordered(a, b);
+
+}
+int float32_compare( float32, float32 STATUS_PARAM );
+int float32_compare_quiet( float32, float32 STATUS_PARAM );
+int float32_is_signaling_nan( float32 );
+
+INLINE float32 float32_abs(float32 a)
+{
+    return fabsf(a);
+}
+
+INLINE float32 float32_chs(float32 a)
+{
+    return -a;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float64_to_int32( float64 STATUS_PARAM );
+int float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+int64_t float64_to_int64( float64 STATUS_PARAM );
+int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+float32 float64_to_float32( float64 STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 STATUS_PARAM );
+float64 float64_trunc_to_int( float64 STATUS_PARAM );
+INLINE float64 float64_add( float64 a, float64 b STATUS_PARAM)
+{
+    return a + b;
+}
+INLINE float64 float64_sub( float64 a, float64 b STATUS_PARAM)
+{
+    return a - b;
+}
+INLINE float64 float64_mul( float64 a, float64 b STATUS_PARAM)
+{
+    return a * b;
+}
+INLINE float64 float64_div( float64 a, float64 b STATUS_PARAM)
+{
+    return a / b;
+}
+float64 float64_rem( float64, float64 STATUS_PARAM );
+float64 float64_sqrt( float64 STATUS_PARAM );
+INLINE int float64_eq( float64 a, float64 b STATUS_PARAM)
+{
+    return a == b;
+}
+INLINE int float64_le( float64 a, float64 b STATUS_PARAM)
+{
+    return a <= b;
+}
+INLINE int float64_lt( float64 a, float64 b STATUS_PARAM)
+{
+    return a < b;
+}
+INLINE int float64_eq_signaling( float64 a, float64 b STATUS_PARAM)
+{
+    return a <= b && a >= b;
+}
+INLINE int float64_le_quiet( float64 a, float64 b STATUS_PARAM)
+{
+    return islessequal(a, b);
+}
+INLINE int float64_lt_quiet( float64 a, float64 b STATUS_PARAM)
+{
+    return isless(a, b);
+
+}
+INLINE int float64_unordered( float64 a, float64 b STATUS_PARAM)
+{
+    return isunordered(a, b);
+
+}
+int float64_compare( float64, float64 STATUS_PARAM );
+int float64_compare_quiet( float64, float64 STATUS_PARAM );
+int float64_is_signaling_nan( float64 );
+int float64_is_nan( float64 );
+
+INLINE float64 float64_abs(float64 a)
+{
+    return fabs(a);
+}
+
+INLINE float64 float64_chs(float64 a)
+{
+    return -a;
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int floatx80_to_int32( floatx80 STATUS_PARAM );
+int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64_t floatx80_to_int64( floatx80 STATUS_PARAM);
+int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM);
+float32 floatx80_to_float32( floatx80 STATUS_PARAM );
+float64 floatx80_to_float64( floatx80 STATUS_PARAM );
+#ifdef FLOAT128
+float128 floatx80_to_float128( floatx80 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
+INLINE floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return a + b;
+}
+INLINE floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return a - b;
+}
+INLINE floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return a * b;
+}
+INLINE floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return a / b;
+}
+floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
+INLINE int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return a == b;
+}
+INLINE int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return a <= b;
+}
+INLINE int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return a < b;
+}
+INLINE int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return a <= b && a >= b;
+}
+INLINE int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return islessequal(a, b);
+}
+INLINE int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return isless(a, b);
+
+}
+INLINE int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return isunordered(a, b);
+
+}
+int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_signaling_nan( floatx80 );
+
+INLINE floatx80 floatx80_abs(floatx80 a)
+{
+    return fabsl(a);
+}
+
+INLINE floatx80 floatx80_chs(floatx80 a)
+{
+    return -a;
+}
+#endif
Index: /trunk/src/recompiler_new/fpu/softfloat-specialize.h
===================================================================
--- /trunk/src/recompiler_new/fpu/softfloat-specialize.h	(revision 13168)
+++ /trunk/src/recompiler_new/fpu/softfloat-specialize.h	(revision 13168)
@@ -0,0 +1,464 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Underflow tininess-detection mode, statically initialized to default value.
+| (The declaration in `softfloat.h' must match the `int8' type here.)
+*----------------------------------------------------------------------------*/
+int8 float_detect_tininess = float_tininess_after_rounding;
+
+/*----------------------------------------------------------------------------
+| Raises the exceptions specified by `flags'.  Floating-point traps can be
+| defined here if desired.  It is currently not possible for such a trap
+| to substitute a result value.  If traps are not implemented, this routine
+| should be simply `float_exception_flags |= flags;'.
+*----------------------------------------------------------------------------*/
+
+void float_raise( int8 flags STATUS_PARAM )
+{
+
+    STATUS(float_exception_flags) |= flags;
+
+}
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+typedef struct {
+    flag sign;
+    bits64 high, low;
+} commonNaNT;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float32_default_nan 0xFFC00000
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float32_is_nan( float32 a )
+{
+
+    return ( 0xFF000000 < (bits32) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float32_is_signaling_nan( float32 a )
+{
+
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float32ToCommonNaN( float32 a STATUS_PARAM )
+{
+    commonNaNT z;
+
+    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR );
+    z.sign = a>>31;
+    z.low = 0;
+    z.high = ( (bits64) a )<<41;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+
+    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32NaN( float32 a, float32 b STATUS_PARAM)
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsNaN = float32_is_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+    if ( aIsSignalingNaN ) {
+        if ( bIsSignalingNaN ) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if ( aIsNaN ) {
+        if ( bIsSignalingNaN | ! bIsNaN ) return a;
+ returnLargerSignificand:
+        if ( (bits32) ( a<<1 ) < (bits32) ( b<<1 ) ) return b;
+        if ( (bits32) ( b<<1 ) < (bits32) ( a<<1 ) ) return a;
+        return ( a < b ) ? a : b;
+    }
+    else {
+        return b;
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float64_default_nan LIT64( 0xFFF8000000000000 )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float64_is_nan( float64 a )
+{
+
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float64_is_signaling_nan( float64 a )
+{
+
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float64ToCommonNaN( float64 a STATUS_PARAM)
+{
+    commonNaNT z;
+
+    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+    z.sign = a>>63;
+    z.low = 0;
+    z.high = a<<12;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+
+    return
+          ( ( (bits64) a.sign )<<63 )
+        | LIT64( 0x7FF8000000000000 )
+        | ( a.high>>12 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64NaN( float64 a, float64 b STATUS_PARAM)
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float64_is_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsNaN = float64_is_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    a |= LIT64( 0x0008000000000000 );
+    b |= LIT64( 0x0008000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+    if ( aIsSignalingNaN ) {
+        if ( bIsSignalingNaN ) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if ( aIsNaN ) {
+        if ( bIsSignalingNaN | ! bIsNaN ) return a;
+ returnLargerSignificand:
+        if ( (bits64) ( a<<1 ) < (bits64) ( b<<1 ) ) return b;
+        if ( (bits64) ( b<<1 ) < (bits64) ( a<<1 ) ) return a;
+        return ( a < b ) ? a : b;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int floatx80_is_nan( floatx80 a )
+{
+
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int floatx80_is_signaling_nan( floatx80 a )
+{
+    bits64 aLow;
+
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (bits64) ( aLow<<1 )
+        && ( a.low == aLow );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT floatx80ToCommonNaN( floatx80 a STATUS_PARAM)
+{
+    commonNaNT z;
+
+    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+    z.sign = a.high>>15;
+    z.low = 0;
+    z.high = a.low<<1;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+    floatx80 z;
+
+    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
+    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = floatx80_is_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsNaN = floatx80_is_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+    a.low |= LIT64( 0xC000000000000000 );
+    b.low |= LIT64( 0xC000000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+    if ( aIsSignalingNaN ) {
+        if ( bIsSignalingNaN ) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if ( aIsNaN ) {
+        if ( bIsSignalingNaN | ! bIsNaN ) return a;
+ returnLargerSignificand:
+        if ( a.low < b.low ) return b;
+        if ( b.low < a.low ) return a;
+        return ( a.high < b.high ) ? a : b;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.  The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
+#define float128_default_nan_low  LIT64( 0x0000000000000000 )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float128_is_nan( float128 a )
+{
+
+    return
+           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+        && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float128_is_signaling_nan( float128 a )
+{
+
+    return
+           ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+        && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float128ToCommonNaN( float128 a STATUS_PARAM)
+{
+    commonNaNT z;
+
+    if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+    z.sign = a.high>>63;
+    shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float128 commonNaNToFloat128( commonNaNT a )
+{
+    float128 z;
+
+    shift128Right( a.high, a.low, 16, &z.high, &z.low );
+    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float128 propagateFloat128NaN( float128 a, float128 b STATUS_PARAM)
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float128_is_nan( a );
+    aIsSignalingNaN = float128_is_signaling_nan( a );
+    bIsNaN = float128_is_nan( b );
+    bIsSignalingNaN = float128_is_signaling_nan( b );
+    a.high |= LIT64( 0x0000800000000000 );
+    b.high |= LIT64( 0x0000800000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+    if ( aIsSignalingNaN ) {
+        if ( bIsSignalingNaN ) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if ( aIsNaN ) {
+        if ( bIsSignalingNaN | ! bIsNaN ) return a;
+ returnLargerSignificand:
+        if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b;
+        if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a;
+        return ( a.high < b.high ) ? a : b;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
+
Index: /trunk/src/recompiler_new/fpu/softfloat.c
===================================================================
--- /trunk/src/recompiler_new/fpu/softfloat.c	(revision 13168)
+++ /trunk/src/recompiler_new/fpu/softfloat.c	(revision 13168)
@@ -0,0 +1,5331 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations.  (Can be specialized to target if
+| desired.)
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+void set_float_rounding_mode(int val STATUS_PARAM)
+{
+    STATUS(float_rounding_mode) = val;
+}
+
+void set_float_exception_flags(int val STATUS_PARAM)
+{
+    STATUS(float_exception_flags) = val;
+}
+
+#ifdef FLOATX80
+void set_floatx80_rounding_precision(int val STATUS_PARAM)
+{
+    STATUS(floatx80_rounding_precision) = val;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the largest
+| positive or negative integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int8 roundIncrement, roundBits;
+    int32 z;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x40;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x7F;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = absZ & 0x7F;
+    absZ = ( absZ + roundIncrement )>>7;
+    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    z = absZ;
+    if ( zSign ) z = - z;
+    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest positive or negative integer is
+| returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment;
+    int64 z;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    increment = ( (sbits64) absZ1 < 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            increment = 0;
+        }
+        else {
+            if ( zSign ) {
+                increment = ( roundingMode == float_round_down ) && absZ1;
+            }
+            else {
+                increment = ( roundingMode == float_round_up ) && absZ1;
+            }
+        }
+    }
+    if ( increment ) {
+        ++absZ0;
+        if ( absZ0 == 0 ) goto overflow;
+        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+    }
+    z = absZ0;
+    if ( zSign ) z = - z;
+    if ( z && ( ( z < 0 ) ^ zSign ) ) {
+ overflow:
+        float_raise( float_flag_invalid STATUS_VAR);
+        return
+              zSign ? (sbits64) LIT64( 0x8000000000000000 )
+            : LIT64( 0x7FFFFFFFFFFFFFFF );
+    }
+    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits32 extractFloat32Frac( float32 a )
+{
+
+    return a & 0x007FFFFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int16 extractFloat32Exp( float32 a )
+{
+
+    return ( a>>23 ) & 0xFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat32Sign( float32 a )
+{
+
+    return a>>31;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros32( aSig ) - 8;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+
+    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int8 roundIncrement, roundBits;
+    flag isTiny;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x40;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x7F;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x7F;
+    if ( 0xFD <= (bits16) zExp ) {
+        if (    ( 0xFD < zExp )
+             || (    ( zExp == 0xFD )
+                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+           ) {
+            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ( zSig + roundIncrement < 0x80000000 );
+            shift32RightJamming( zSig, - zExp, &zSig );
+            zExp = 0;
+            roundBits = zSig & 0x7F;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+        }
+    }
+    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+    zSig = ( zSig + roundIncrement )>>7;
+    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    if ( zSig == 0 ) zExp = 0;
+    return packFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float32
+ normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros32( zSig ) - 1;
+    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat64Frac( float64 a )
+{
+
+    return a & LIT64( 0x000FFFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int16 extractFloat64Exp( float64 a )
+{
+
+    return ( a>>52 ) & 0x7FF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat64Sign( float64 a )
+{
+
+    return a>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( aSig ) - 11;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+
+    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int16 roundIncrement, roundBits;
+    flag isTiny;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x200;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x3FF;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x3FF;
+    if ( 0x7FD <= (bits16) zExp ) {
+        if (    ( 0x7FD < zExp )
+             || (    ( zExp == 0x7FD )
+                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+           ) {
+            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+            shift64RightJamming( zSig, - zExp, &zSig );
+            zExp = 0;
+            roundBits = zSig & 0x3FF;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+        }
+    }
+    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+    zSig = ( zSig + roundIncrement )>>10;
+    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+    if ( zSig == 0 ) zExp = 0;
+    return packFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float64
+ normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( zSig ) - 1;
+    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloatx80Frac( floatx80 a )
+{
+
+    return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+
+    return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+
+    return a.high>>15;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( aSig );
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+    floatx80 z;
+
+    z.low = zSig;
+    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+ roundAndPackFloatx80(
+     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment, isTiny;
+    int64 roundIncrement, roundMask, roundBits;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    if ( roundingPrecision == 80 ) goto precision80;
+    if ( roundingPrecision == 64 ) {
+        roundIncrement = LIT64( 0x0000000000000400 );
+        roundMask = LIT64( 0x00000000000007FF );
+    }
+    else if ( roundingPrecision == 32 ) {
+        roundIncrement = LIT64( 0x0000008000000000 );
+        roundMask = LIT64( 0x000000FFFFFFFFFF );
+    }
+    else {
+        goto precision80;
+    }
+    zSig0 |= ( zSig1 != 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = roundMask;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig0 & roundMask;
+    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+           ) {
+            goto overflow;
+        }
+        if ( zExp <= 0 ) {
+            isTiny =
+                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+                || ( zExp < 0 )
+                || ( zSig0 <= zSig0 + roundIncrement );
+            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+            zSig0 += roundIncrement;
+            if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+                roundMask |= roundIncrement;
+            }
+            zSig0 &= ~ roundMask;
+            return packFloatx80( zSign, zExp, zSig0 );
+        }
+    }
+    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+    zSig0 += roundIncrement;
+    if ( zSig0 < roundIncrement ) {
+        ++zExp;
+        zSig0 = LIT64( 0x8000000000000000 );
+    }
+    roundIncrement = roundMask + 1;
+    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+        roundMask |= roundIncrement;
+    }
+    zSig0 &= ~ roundMask;
+    if ( zSig0 == 0 ) zExp = 0;
+    return packFloatx80( zSign, zExp, zSig0 );
+ precision80:
+    increment = ( (sbits64) zSig1 < 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            increment = 0;
+        }
+        else {
+            if ( zSign ) {
+                increment = ( roundingMode == float_round_down ) && zSig1;
+            }
+            else {
+                increment = ( roundingMode == float_round_up ) && zSig1;
+            }
+        }
+    }
+    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || (    ( zExp == 0x7FFE )
+                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+                  && increment
+                )
+           ) {
+            roundMask = 0;
+ overflow:
+            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            if (    ( roundingMode == float_round_to_zero )
+                 || ( zSign && ( roundingMode == float_round_up ) )
+                 || ( ! zSign && ( roundingMode == float_round_down ) )
+               ) {
+                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+            }
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        if ( zExp <= 0 ) {
+            isTiny =
+                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+                || ( zExp < 0 )
+                || ! increment
+                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+            zExp = 0;
+            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
+            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+            if ( roundNearestEven ) {
+                increment = ( (sbits64) zSig1 < 0 );
+            }
+            else {
+                if ( zSign ) {
+                    increment = ( roundingMode == float_round_down ) && zSig1;
+                }
+                else {
+                    increment = ( roundingMode == float_round_up ) && zSig1;
+                }
+            }
+            if ( increment ) {
+                ++zSig0;
+                zSig0 &=
+                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+                if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            }
+            return packFloatx80( zSign, zExp, zSig0 );
+        }
+    }
+    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+    if ( increment ) {
+        ++zSig0;
+        if ( zSig0 == 0 ) {
+            ++zExp;
+            zSig0 = LIT64( 0x8000000000000000 );
+        }
+        else {
+            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+        }
+    }
+    else {
+        if ( zSig0 == 0 ) zExp = 0;
+    }
+    return packFloatx80( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+ normalizeRoundAndPackFloatx80(
+     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ STATUS_PARAM)
+{
+    int8 shiftCount;
+
+    if ( zSig0 == 0 ) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    shiftCount = countLeadingZeros64( zSig0 );
+    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+    zExp -= shiftCount;
+    return
+        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat128Frac1( float128 a )
+{
+
+    return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat128Frac0( float128 a )
+{
+
+    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloat128Exp( float128 a )
+{
+
+    return ( a.high>>48 ) & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat128Sign( float128 a )
+{
+
+    return a.high>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat128Subnormal(
+     bits64 aSig0,
+     bits64 aSig1,
+     int32 *zExpPtr,
+     bits64 *zSig0Ptr,
+     bits64 *zSig1Ptr
+ )
+{
+    int8 shiftCount;
+
+    if ( aSig0 == 0 ) {
+        shiftCount = countLeadingZeros64( aSig1 ) - 15;
+        if ( shiftCount < 0 ) {
+            *zSig0Ptr = aSig1>>( - shiftCount );
+            *zSig1Ptr = aSig1<<( shiftCount & 63 );
+        }
+        else {
+            *zSig0Ptr = aSig1<<shiftCount;
+            *zSig1Ptr = 0;
+        }
+        *zExpPtr = - shiftCount - 63;
+    }
+    else {
+        shiftCount = countLeadingZeros64( aSig0 ) - 15;
+        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+        *zExpPtr = 1 - shiftCount;
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result.  After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result.  This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float128
+ packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+    float128 z;
+
+    z.low = zSig1;
+    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128
+ roundAndPackFloat128(
+     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment, isTiny;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    increment = ( (sbits64) zSig2 < 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            increment = 0;
+        }
+        else {
+            if ( zSign ) {
+                increment = ( roundingMode == float_round_down ) && zSig2;
+            }
+            else {
+                increment = ( roundingMode == float_round_up ) && zSig2;
+            }
+        }
+    }
+    if ( 0x7FFD <= (bits32) zExp ) {
+        if (    ( 0x7FFD < zExp )
+             || (    ( zExp == 0x7FFD )
+                  && eq128(
+                         LIT64( 0x0001FFFFFFFFFFFF ),
+                         LIT64( 0xFFFFFFFFFFFFFFFF ),
+                         zSig0,
+                         zSig1
+                     )
+                  && increment
+                )
+           ) {
+            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            if (    ( roundingMode == float_round_to_zero )
+                 || ( zSign && ( roundingMode == float_round_up ) )
+                 || ( ! zSign && ( roundingMode == float_round_down ) )
+               ) {
+                return
+                    packFloat128(
+                        zSign,
+                        0x7FFE,
+                        LIT64( 0x0000FFFFFFFFFFFF ),
+                        LIT64( 0xFFFFFFFFFFFFFFFF )
+                    );
+            }
+            return packFloat128( zSign, 0x7FFF, 0, 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ! increment
+                || lt128(
+                       zSig0,
+                       zSig1,
+                       LIT64( 0x0001FFFFFFFFFFFF ),
+                       LIT64( 0xFFFFFFFFFFFFFFFF )
+                   );
+            shift128ExtraRightJamming(
+                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+            zExp = 0;
+            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
+            if ( roundNearestEven ) {
+                increment = ( (sbits64) zSig2 < 0 );
+            }
+            else {
+                if ( zSign ) {
+                    increment = ( roundingMode == float_round_down ) && zSig2;
+                }
+                else {
+                    increment = ( roundingMode == float_round_up ) && zSig2;
+                }
+            }
+        }
+    }
+    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
+    if ( increment ) {
+        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+    }
+    else {
+        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+    }
+    return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+static float128
+ normalizeRoundAndPackFloat128(
+     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
+{
+    int8 shiftCount;
+    bits64 zSig2;
+
+    if ( zSig0 == 0 ) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    shiftCount = countLeadingZeros64( zSig0 ) - 15;
+    if ( 0 <= shiftCount ) {
+        zSig2 = 0;
+        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+    }
+    else {
+        shift128ExtraRightJamming(
+            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+    }
+    zExp -= shiftCount;
+    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int32_to_float32( int32 a STATUS_PARAM )
+{
+    flag zSign;
+
+    if ( a == 0 ) return 0;
+    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+    zSign = ( a < 0 );
+    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int32_to_float64( int32 a STATUS_PARAM )
+{
+    flag zSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig;
+
+    if ( a == 0 ) return 0;
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 21;
+    zSig = absA;
+    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
+{
+    flag zSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig;
+
+    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 32;
+    zSig = absA;
+    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int32_to_float128( int32 a STATUS_PARAM )
+{
+    flag zSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig0;
+
+    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 17;
+    zSig0 = absA;
+    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int64_to_float32( int64 a STATUS_PARAM )
+{
+    flag zSign;
+    uint64 absA;
+    int8 shiftCount;
+
+    if ( a == 0 ) return 0;
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros64( absA ) - 40;
+    if ( 0 <= shiftCount ) {
+        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
+    }
+    else {
+        shiftCount += 7;
+        if ( shiftCount < 0 ) {
+            shift64RightJamming( absA, - shiftCount, &absA );
+        }
+        else {
+            absA <<= shiftCount;
+        }
+        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int64_to_float64( int64 a STATUS_PARAM )
+{
+    flag zSign;
+
+    if ( a == 0 ) return 0;
+    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
+        return packFloat64( 1, 0x43E, 0 );
+    }
+    zSign = ( a < 0 );
+    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
+{
+    flag zSign;
+    uint64 absA;
+    int8 shiftCount;
+
+    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros64( absA );
+    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128( int64 a STATUS_PARAM )
+{
+    flag zSign;
+    uint64 absA;
+    int8 shiftCount;
+    int32 zExp;
+    bits64 zSig0, zSig1;
+
+    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros64( absA ) + 49;
+    zExp = 0x406E - shiftCount;
+    if ( 64 <= shiftCount ) {
+        zSig1 = 0;
+        zSig0 = absA;
+        shiftCount -= 64;
+    }
+    else {
+        zSig1 = absA;
+        zSig0 = 0;
+    }
+    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+    return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    bits64 aSig64;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= 0x00800000;
+    shiftCount = 0xAF - aExp;
+    aSig64 = aSig;
+    aSig64 <<= 32;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
+    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    int32 z;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    shiftCount = aExp - 0x9E;
+    if ( 0 <= shiftCount ) {
+        if ( a != 0xCF000000 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+        }
+        return (sbits32) 0x80000000;
+    }
+    else if ( aExp <= 0x7E ) {
+        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+        return 0;
+    }
+    aSig = ( aSig | 0x00800000 )<<8;
+    z = aSig>>( - shiftCount );
+    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    if ( aSign ) z = - z;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    bits64 aSig64, aSigExtra;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    shiftCount = 0xBE - aExp;
+    if ( shiftCount < 0 ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+            return LIT64( 0x7FFFFFFFFFFFFFFF );
+        }
+        return (sbits64) LIT64( 0x8000000000000000 );
+    }
+    if ( aExp ) aSig |= 0x00800000;
+    aSig64 = aSig;
+    aSig64 <<= 40;
+    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
+    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    bits64 aSig64;
+    int64 z;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    shiftCount = aExp - 0xBE;
+    if ( 0 <= shiftCount ) {
+        if ( a != 0xDF000000 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+                return LIT64( 0x7FFFFFFFFFFFFFFF );
+            }
+        }
+        return (sbits64) LIT64( 0x8000000000000000 );
+    }
+    else if ( aExp <= 0x7E ) {
+        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+        return 0;
+    }
+    aSig64 = aSig | 0x00800000;
+    aSig64 <<= 40;
+    z = aSig64>>( - shiftCount );
+    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    if ( aSign ) z = - z;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float32_to_float64( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+        --aExp;
+    }
+    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    aSig |= 0x00800000;
+    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float32_to_float128( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
+        return packFloat128( aSign, 0x7FFF, 0, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+        --aExp;
+    }
+    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the single-precision floating-point value `a' to an integer, and
+| returns the result as a single-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_round_to_int( float32 a STATUS_PARAM)
+{
+    flag aSign;
+    int16 aExp;
+    bits32 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float32 z;
+
+    aExp = extractFloat32Exp( a );
+    if ( 0x96 <= aExp ) {
+        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+            return propagateFloat32NaN( a, a STATUS_VAR );
+        }
+        return a;
+    }
+    if ( aExp <= 0x7E ) {
+        if ( (bits32) ( a<<1 ) == 0 ) return a;
+        STATUS(float_exception_flags) |= float_flag_inexact;
+        aSign = extractFloat32Sign( a );
+        switch ( STATUS(float_rounding_mode) ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+                return packFloat32( aSign, 0x7F, 0 );
+            }
+            break;
+         case float_round_down:
+            return aSign ? 0xBF800000 : 0;
+         case float_round_up:
+            return aSign ? 0x80000000 : 0x3F800000;
+        }
+        return packFloat32( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x96 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = STATUS(float_rounding_mode);
+    if ( roundingMode == float_round_nearest_even ) {
+        z += lastBitMask>>1;
+        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~ roundBitsMask;
+    if ( z != a ) STATUS(float_exception_flags) |= float_flag_inexact;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the single-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
+{
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 6;
+    bSig <<= 6;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0xFF ) {
+            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig |= 0x20000000;
+        }
+        shift32RightJamming( bSig, expDiff, &bSig );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0xFF ) {
+            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+            return packFloat32( zSign, 0xFF, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig |= 0x20000000;
+        }
+        shift32RightJamming( aSig, - expDiff, &aSig );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0xFF ) {
+            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+            return a;
+        }
+        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+        zSig = 0x40000000 + aSig + bSig;
+        zExp = aExp;
+        goto roundAndPack;
+    }
+    aSig |= 0x20000000;
+    zSig = ( aSig + bSig )<<1;
+    --zExp;
+    if ( (sbits32) zSig < 0 ) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+ roundAndPack:
+    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the single-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
+{
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 7;
+    bSig <<= 7;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0xFF ) {
+        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float32_default_nan;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+        return packFloat32( zSign ^ 1, 0xFF, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig |= 0x40000000;
+    }
+    shift32RightJamming( aSig, - expDiff, &aSig );
+    bSig |= 0x40000000;
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig |= 0x40000000;
+    }
+    shift32RightJamming( bSig, expDiff, &bSig );
+    aSig |= 0x40000000;
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the single-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_add( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat32Sigs( a, b, aSign STATUS_VAR);
+    }
+    else {
+        return subFloat32Sigs( a, b, aSign STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sub( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat32Sigs( a, b, aSign STATUS_VAR );
+    }
+    else {
+        return addFloat32Sigs( a, b, aSign STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_mul( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig;
+    bits64 zSig64;
+    bits32 zSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0xFF ) {
+        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+            return propagateFloat32NaN( a, b STATUS_VAR );
+        }
+        if ( ( bExp | bSig ) == 0 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+        if ( ( aExp | aSig ) == 0 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x7F;
+    aSig = ( aSig | 0x00800000 )<<7;
+    bSig = ( bSig | 0x00800000 )<<8;
+    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+    zSig = zSig64;
+    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+        zSig <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the single-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_div( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+        if ( bExp == 0xFF ) {
+            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+            float_raise( float_flag_invalid STATUS_VAR);
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+        return packFloat32( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+                float_raise( float_flag_invalid STATUS_VAR);
+                return float32_default_nan;
+            }
+            float_raise( float_flag_divbyzero STATUS_VAR);
+            return packFloat32( zSign, 0xFF, 0 );
+        }
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x7D;
+    aSig = ( aSig | 0x00800000 )<<7;
+    bSig = ( bSig | 0x00800000 )<<8;
+    if ( bSig <= ( aSig + aSig ) ) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+    if ( ( zSig & 0x3F ) == 0 ) {
+        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
+    }
+    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the single-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_rem( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, expDiff;
+    bits32 aSig, bSig;
+    bits32 q;
+    bits64 aSig64, bSig64, q64;
+    bits32 alternateASig;
+    sbits32 sigMean;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    if ( aExp == 0xFF ) {
+        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+            return propagateFloat32NaN( a, b STATUS_VAR );
+        }
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float32_default_nan;
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            return float32_default_nan;
+        }
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return a;
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    expDiff = aExp - bExp;
+    aSig |= 0x00800000;
+    bSig |= 0x00800000;
+    if ( expDiff < 32 ) {
+        aSig <<= 8;
+        bSig <<= 8;
+        if ( expDiff < 0 ) {
+            if ( expDiff < -1 ) return a;
+            aSig >>= 1;
+        }
+        q = ( bSig <= aSig );
+        if ( q ) aSig -= bSig;
+        if ( 0 < expDiff ) {
+            q = ( ( (bits64) aSig )<<32 ) / bSig;
+            q >>= 32 - expDiff;
+            bSig >>= 2;
+            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+        }
+        else {
+            aSig >>= 2;
+            bSig >>= 2;
+        }
+    }
+    else {
+        if ( bSig <= aSig ) aSig -= bSig;
+        aSig64 = ( (bits64) aSig )<<40;
+        bSig64 = ( (bits64) bSig )<<40;
+        expDiff -= 64;
+        while ( 0 < expDiff ) {
+            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+            aSig64 = - ( ( bSig * q64 )<<38 );
+            expDiff -= 62;
+        }
+        expDiff += 64;
+        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+        q = q64>>( 64 - expDiff );
+        bSig <<= 6;
+        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+    }
+    do {
+        alternateASig = aSig;
+        ++q;
+        aSig -= bSig;
+    } while ( 0 <= (sbits32) aSig );
+    sigMean = aSig + alternateASig;
+    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+        aSig = alternateASig;
+    }
+    zSign = ( (sbits32) aSig < 0 );
+    if ( zSign ) aSig = - aSig;
+    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sqrt( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, zExp;
+    bits32 aSig, zSig;
+    bits64 rem, term;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, 0 STATUS_VAR );
+        if ( ! aSign ) return a;
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float32_default_nan;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig ) == 0 ) return a;
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float32_default_nan;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return 0;
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+    aSig = ( aSig | 0x00800000 )<<8;
+    zSig = estimateSqrt32( aExp, aSig ) + 2;
+    if ( ( zSig & 0x7F ) <= 5 ) {
+        if ( zSig < 2 ) {
+            zSig = 0x7FFFFFFF;
+            goto roundAndPack;
+        }
+        aSig >>= aExp & 1;
+        term = ( (bits64) zSig ) * zSig;
+        rem = ( ( (bits64) aSig )<<32 ) - term;
+        while ( (sbits64) rem < 0 ) {
+            --zSig;
+            rem += ( ( (bits64) zSig )<<1 ) | 1;
+        }
+        zSig |= ( rem != 0 );
+    }
+    shift32RightJamming( zSig, 1, &zSig );
+ roundAndPack:
+    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_eq( float32 a, float32 b STATUS_PARAM )
+{
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_le( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_lt( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
+{
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x42C - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( 0x41E < aExp ) {
+        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+        goto invalid;
+    }
+    else if ( aExp < 0x3FF ) {
+        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x433 - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_raise( float_flag_invalid STATUS_VAR);
+        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig, aSigExtra;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x433 - aExp;
+    if ( shiftCount <= 0 ) {
+        if ( 0x43E < aExp ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            if (    ! aSign
+                 || (    ( aExp == 0x7FF )
+                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
+               ) {
+                return LIT64( 0x7FFFFFFFFFFFFFFF );
+            }
+            return (sbits64) LIT64( 0x8000000000000000 );
+        }
+        aSigExtra = 0;
+        aSig <<= - shiftCount;
+    }
+    else {
+        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+    }
+    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig;
+    int64 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = aExp - 0x433;
+    if ( 0 <= shiftCount ) {
+        if ( 0x43E <= aExp ) {
+            if ( a != LIT64( 0xC3E0000000000000 ) ) {
+                float_raise( float_flag_invalid STATUS_VAR);
+                if (    ! aSign
+                     || (    ( aExp == 0x7FF )
+                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
+                   ) {
+                    return LIT64( 0x7FFFFFFFFFFFFFFF );
+                }
+            }
+            return (sbits64) LIT64( 0x8000000000000000 );
+        }
+        z = aSig<<shiftCount;
+    }
+    else {
+        if ( aExp < 0x3FE ) {
+            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+            return 0;
+        }
+        z = aSig>>( - shiftCount );
+        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+            STATUS(float_exception_flags) |= float_flag_inexact;
+        }
+    }
+    if ( aSign ) z = - z;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the single-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float64_to_float32( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    bits32 zSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    shift64RightJamming( aSig, 22, &aSig );
+    zSig = aSig;
+    if ( aExp || zSig ) {
+        zSig |= 0x40000000;
+        aExp -= 0x381;
+    }
+    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    return
+        packFloatx80(
+            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the quadruple-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float64_to_float128( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig, zSig0, zSig1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
+        return packFloat128( aSign, 0x7FFF, 0, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+        --aExp;
+    }
+    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
+    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the double-precision floating-point value `a' to an integer, and
+| returns the result as a double-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_round_to_int( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float64 z;
+
+    aExp = extractFloat64Exp( a );
+    if ( 0x433 <= aExp ) {
+        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+            return propagateFloat64NaN( a, a STATUS_VAR );
+        }
+        return a;
+    }
+    if ( aExp < 0x3FF ) {
+        if ( (bits64) ( a<<1 ) == 0 ) return a;
+        STATUS(float_exception_flags) |= float_flag_inexact;
+        aSign = extractFloat64Sign( a );
+        switch ( STATUS(float_rounding_mode) ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+                return packFloat64( aSign, 0x3FF, 0 );
+            }
+            break;
+         case float_round_down:
+            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
+         case float_round_up:
+            return
+            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
+        }
+        return packFloat64( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x433 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = STATUS(float_rounding_mode);
+    if ( roundingMode == float_round_nearest_even ) {
+        z += lastBitMask>>1;
+        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~ roundBitsMask;
+    if ( z != a ) STATUS(float_exception_flags) |= float_flag_inexact;
+    return z;
+
+}
+
+float64 float64_trunc_to_int( float64 a STATUS_PARAM)
+{
+    int oldmode;
+    float64 res;
+    oldmode = STATUS(float_rounding_mode);
+    STATUS(float_rounding_mode) = float_round_to_zero;
+    res = float64_round_to_int(a STATUS_VAR);
+    STATUS(float_rounding_mode) = oldmode;
+    return res;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the double-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
+{
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 9;
+    bSig <<= 9;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FF ) {
+            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig |= LIT64( 0x2000000000000000 );
+        }
+        shift64RightJamming( bSig, expDiff, &bSig );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FF ) {
+            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+            return packFloat64( zSign, 0x7FF, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig |= LIT64( 0x2000000000000000 );
+        }
+        shift64RightJamming( aSig, - expDiff, &aSig );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FF ) {
+            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+            return a;
+        }
+        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+        zExp = aExp;
+        goto roundAndPack;
+    }
+    aSig |= LIT64( 0x2000000000000000 );
+    zSig = ( aSig + bSig )<<1;
+    --zExp;
+    if ( (sbits64) zSig < 0 ) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+ roundAndPack:
+    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
+{
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 10;
+    bSig <<= 10;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FF ) {
+        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float64_default_nan;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+        return packFloat64( zSign ^ 1, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig |= LIT64( 0x4000000000000000 );
+    }
+    shift64RightJamming( aSig, - expDiff, &aSig );
+    bSig |= LIT64( 0x4000000000000000 );
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig |= LIT64( 0x4000000000000000 );
+    }
+    shift64RightJamming( bSig, expDiff, &bSig );
+    aSig |= LIT64( 0x4000000000000000 );
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the double-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_add( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat64Sigs( a, b, aSign STATUS_VAR );
+    }
+    else {
+        return subFloat64Sigs( a, b, aSign STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sub( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat64Sigs( a, b, aSign STATUS_VAR );
+    }
+    else {
+        return addFloat64Sigs( a, b, aSign STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_mul( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FF ) {
+        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+            return propagateFloat64NaN( a, b STATUS_VAR );
+        }
+        if ( ( bExp | bSig ) == 0 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+        if ( ( aExp | aSig ) == 0 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x3FF;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    zSig0 |= ( zSig1 != 0 );
+    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+        zSig0 <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the double-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_div( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    bits64 rem0, rem1;
+    bits64 term0, term1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+        if ( bExp == 0x7FF ) {
+            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+            float_raise( float_flag_invalid STATUS_VAR);
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+        return packFloat64( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+                float_raise( float_flag_invalid STATUS_VAR);
+                return float64_default_nan;
+            }
+            float_raise( float_flag_divbyzero STATUS_VAR);
+            return packFloat64( zSign, 0x7FF, 0 );
+        }
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x3FD;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    if ( bSig <= ( aSig + aSig ) ) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = estimateDiv128To64( aSig, 0, bSig );
+    if ( ( zSig & 0x1FF ) <= 2 ) {
+        mul64To128( bSig, zSig, &term0, &term1 );
+        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+        while ( (sbits64) rem0 < 0 ) {
+            --zSig;
+            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+        }
+        zSig |= ( rem1 != 0 );
+    }
+    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the double-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_rem( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, expDiff;
+    bits64 aSig, bSig;
+    bits64 q, alternateASig;
+    sbits64 sigMean;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    if ( aExp == 0x7FF ) {
+        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+            return propagateFloat64NaN( a, b STATUS_VAR );
+        }
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float64_default_nan;
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            return float64_default_nan;
+        }
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return a;
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    expDiff = aExp - bExp;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    if ( expDiff < 0 ) {
+        if ( expDiff < -1 ) return a;
+        aSig >>= 1;
+    }
+    q = ( bSig <= aSig );
+    if ( q ) aSig -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig, 0, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        aSig = - ( ( bSig>>2 ) * q );
+        expDiff -= 62;
+    }
+    expDiff += 64;
+    if ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig, 0, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        bSig >>= 2;
+        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+    }
+    else {
+        aSig >>= 2;
+        bSig >>= 2;
+    }
+    do {
+        alternateASig = aSig;
+        ++q;
+        aSig -= bSig;
+    } while ( 0 <= (sbits64) aSig );
+    sigMean = aSig + alternateASig;
+    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+        aSig = alternateASig;
+    }
+    zSign = ( (sbits64) aSig < 0 );
+    if ( zSign ) aSig = - aSig;
+    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sqrt( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, zExp;
+    bits64 aSig, zSig, doubleZSig;
+    bits64 rem0, rem1, term0, term1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
+        if ( ! aSign ) return a;
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float64_default_nan;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig ) == 0 ) return a;
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float64_default_nan;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return 0;
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+    aSig |= LIT64( 0x0010000000000000 );
+    zSig = estimateSqrt32( aExp, aSig>>21 );
+    aSig <<= 9 - ( aExp & 1 );
+    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
+    if ( ( zSig & 0x1FF ) <= 5 ) {
+        doubleZSig = zSig<<1;
+        mul64To128( zSig, zSig, &term0, &term1 );
+        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+        while ( (sbits64) rem0 < 0 ) {
+            --zSig;
+            doubleZSig -= 2;
+            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
+        }
+        zSig |= ( ( rem0 | rem1 ) != 0 );
+    }
+    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_eq( float64 a, float64 b STATUS_PARAM )
+{
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_le( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_lt( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  The invalid exception is raised
+| if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
+{
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN, the
+| largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+    shiftCount = 0x4037 - aExp;
+    if ( shiftCount <= 0 ) shiftCount = 1;
+    shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( 0x401E < aExp ) {
+        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+        goto invalid;
+    }
+    else if ( aExp < 0x3FFF ) {
+        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+        return 0;
+    }
+    shiftCount = 0x403E - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_raise( float_flag_invalid STATUS_VAR);
+        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN,
+| the largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig, aSigExtra;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    shiftCount = 0x403E - aExp;
+    if ( shiftCount <= 0 ) {
+        if ( shiftCount ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            if (    ! aSign
+                 || (    ( aExp == 0x7FFF )
+                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
+               ) {
+                return LIT64( 0x7FFFFFFFFFFFFFFF );
+            }
+            return (sbits64) LIT64( 0x8000000000000000 );
+        }
+        aSigExtra = 0;
+    }
+    else {
+        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+    }
+    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig;
+    int64 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    shiftCount = aExp - 0x403E;
+    if ( 0 <= shiftCount ) {
+        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
+        if ( ( a.high != 0xC03E ) || aSig ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
+                return LIT64( 0x7FFFFFFFFFFFFFFF );
+            }
+        }
+        return (sbits64) LIT64( 0x8000000000000000 );
+    }
+    else if ( aExp < 0x3FFF ) {
+        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+        return 0;
+    }
+    z = aSig>>( - shiftCount );
+    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    if ( aSign ) z = - z;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) {
+            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
+        }
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    shift64RightJamming( aSig, 33, &aSig );
+    if ( aExp || aSig ) aExp -= 0x3F81;
+    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig, zSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) {
+            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
+        }
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    shift64RightJamming( aSig, 1, &zSig );
+    if ( aExp || aSig ) aExp -= 0x3C01;
+    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig, zSig0, zSig1;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
+        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
+    }
+    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+    return packFloat128( aSign, aExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended quadruple-precision floating-point
+| value.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    floatx80 z;
+
+    aExp = extractFloatx80Exp( a );
+    if ( 0x403E <= aExp ) {
+        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+            return propagateFloatx80NaN( a, a STATUS_VAR );
+        }
+        return a;
+    }
+    if ( aExp < 0x3FFF ) {
+        if (    ( aExp == 0 )
+             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+            return a;
+        }
+        STATUS(float_exception_flags) |= float_flag_inexact;
+        aSign = extractFloatx80Sign( a );
+        switch ( STATUS(float_rounding_mode) ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+               ) {
+                return
+                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+            }
+            break;
+         case float_round_down:
+            return
+                  aSign ?
+                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+                : packFloatx80( 0, 0, 0 );
+         case float_round_up:
+            return
+                  aSign ? packFloatx80( 1, 0, 0 )
+                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+        }
+        return packFloatx80( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x403E - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = STATUS(float_rounding_mode);
+    if ( roundingMode == float_round_nearest_even ) {
+        z.low += lastBitMask>>1;
+        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z.low += roundBitsMask;
+        }
+    }
+    z.low &= ~ roundBitsMask;
+    if ( z.low == 0 ) {
+        ++z.high;
+        z.low = LIT64( 0x8000000000000000 );
+    }
+    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
+| negated before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FFF ) {
+            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            return a;
+        }
+        if ( bExp == 0 ) --expDiff;
+        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FFF ) {
+            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        if ( aExp == 0 ) ++expDiff;
+        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FFF ) {
+            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+                return propagateFloatx80NaN( a, b STATUS_VAR );
+            }
+            return a;
+        }
+        zSig1 = 0;
+        zSig0 = aSig + bSig;
+        if ( aExp == 0 ) {
+            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+            goto roundAndPack;
+        }
+        zExp = aExp;
+        goto shiftRight1;
+    }
+    zSig0 = aSig + bSig;
+    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+ shiftRight1:
+    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+    zSig0 |= LIT64( 0x8000000000000000 );
+    ++zExp;
+ roundAndPack:
+    return
+        roundAndPackFloatx80(
+            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+            return propagateFloatx80NaN( a, b STATUS_VAR );
+        }
+        float_raise( float_flag_invalid STATUS_VAR);
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    zSig1 = 0;
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) ++expDiff;
+    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ bBigger:
+    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        return a;
+    }
+    if ( bExp == 0 ) --expDiff;
+    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ aBigger:
+    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+    zExp = aExp;
+ normalizeRoundAndPack:
+    return
+        normalizeRoundAndPackFloatx80(
+            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign == bSign ) {
+        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
+    }
+    else {
+        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign == bSign ) {
+        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
+    }
+    else {
+        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if (    (bits64) ( aSig<<1 )
+             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+            return propagateFloatx80NaN( a, b STATUS_VAR );
+        }
+        if ( ( bExp | bSig ) == 0 ) goto invalid;
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid STATUS_VAR);
+            z.low = floatx80_default_nan_low;
+            z.high = floatx80_default_nan_high;
+            return z;
+        }
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x3FFE;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    if ( 0 < (sbits64) zSig0 ) {
+        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+        --zExp;
+    }
+    return
+        roundAndPackFloatx80(
+            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    bits64 rem0, rem1, rem2, term0, term1, term2;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( bExp == 0x7FFF ) {
+            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            goto invalid;
+        }
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        return packFloatx80( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+                float_raise( float_flag_invalid STATUS_VAR);
+                z.low = floatx80_default_nan_low;
+                z.high = floatx80_default_nan_high;
+                return z;
+            }
+            float_raise( float_flag_divbyzero STATUS_VAR);
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x3FFE;
+    rem1 = 0;
+    if ( bSig <= aSig ) {
+        shift128Right( aSig, 0, 1, &aSig, &rem1 );
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+    mul64To128( bSig, zSig0, &term0, &term1 );
+    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+    }
+    zSig1 = estimateDiv128To64( rem1, 0, bSig );
+    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+        mul64To128( bSig, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+        }
+        zSig1 |= ( ( rem1 | rem2 ) != 0 );
+    }
+    return
+        roundAndPackFloatx80(
+            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, expDiff;
+    bits64 aSig0, aSig1, bSig;
+    bits64 q, term0, term1, alternateASig0, alternateASig1;
+    floatx80 z;
+
+    aSig0 = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    if ( aExp == 0x7FFF ) {
+        if (    (bits64) ( aSig0<<1 )
+             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+            return propagateFloatx80NaN( a, b STATUS_VAR );
+        }
+        goto invalid;
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid STATUS_VAR);
+            z.low = floatx80_default_nan_low;
+            z.high = floatx80_default_nan_high;
+            return z;
+        }
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    }
+    bSig |= LIT64( 0x8000000000000000 );
+    zSign = aSign;
+    expDiff = aExp - bExp;
+    aSig1 = 0;
+    if ( expDiff < 0 ) {
+        if ( expDiff < -1 ) return a;
+        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+        expDiff = 0;
+    }
+    q = ( bSig <= aSig0 );
+    if ( q ) aSig0 -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        mul64To128( bSig, q, &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+        expDiff -= 62;
+    }
+    expDiff += 64;
+    if ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+        while ( le128( term0, term1, aSig0, aSig1 ) ) {
+            ++q;
+            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        }
+    }
+    else {
+        term1 = 0;
+        term0 = bSig;
+    }
+    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+              && ( q & 1 ) )
+       ) {
+        aSig0 = alternateASig0;
+        aSig1 = alternateASig1;
+        zSign = ! zSign;
+    }
+    return
+        normalizeRoundAndPackFloatx80(
+            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, zExp;
+    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    floatx80 z;
+
+    aSig0 = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
+        if ( ! aSign ) return a;
+        goto invalid;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig0 ) == 0 ) return a;
+ invalid:
+        float_raise( float_flag_invalid STATUS_VAR);
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    }
+    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
+    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+    doubleZSig0 = zSig0<<1;
+    mul64To128( zSig0, zSig0, &term0, &term1 );
+    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        doubleZSig0 -= 2;
+        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+    }
+    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
+        if ( zSig1 == 0 ) zSig1 = 1;
+        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        mul64To128( zSig1, zSig1, &term2, &term3 );
+        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+            term3 |= 1;
+            term2 |= doubleZSig0;
+            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+        }
+        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    }
+    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
+    zSig0 |= doubleZSig0;
+    return
+        roundAndPackFloatx80(
+            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
+{
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
+{
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
+| do not cause an exception.  Otherwise, the comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
+| an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
+    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+    aSig0 |= ( aSig1 != 0 );
+    shiftCount = 0x4028 - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1, savedASig;
+    int32 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    aSig0 |= ( aSig1 != 0 );
+    if ( 0x401E < aExp ) {
+        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
+        goto invalid;
+    }
+    else if ( aExp < 0x3FFF ) {
+        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
+        return 0;
+    }
+    aSig0 |= LIT64( 0x0001000000000000 );
+    shiftCount = 0x402F - aExp;
+    savedASig = aSig0;
+    aSig0 >>= shiftCount;
+    z = aSig0;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_raise( float_flag_invalid STATUS_VAR);
+        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig0<<shiftCount ) != savedASig ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+    shiftCount = 0x402F - aExp;
+    if ( shiftCount <= 0 ) {
+        if ( 0x403E < aExp ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            if (    ! aSign
+                 || (    ( aExp == 0x7FFF )
+                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
+                    )
+               ) {
+                return LIT64( 0x7FFFFFFFFFFFFFFF );
+            }
+            return (sbits64) LIT64( 0x8000000000000000 );
+        }
+        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
+    }
+    else {
+        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
+    }
+    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1;
+    int64 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+    shiftCount = aExp - 0x402F;
+    if ( 0 < shiftCount ) {
+        if ( 0x403E <= aExp ) {
+            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
+            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
+                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
+                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+            }
+            else {
+                float_raise( float_flag_invalid STATUS_VAR);
+                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
+                    return LIT64( 0x7FFFFFFFFFFFFFFF );
+                }
+            }
+            return (sbits64) LIT64( 0x8000000000000000 );
+        }
+        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
+        if ( (bits64) ( aSig1<<shiftCount ) ) {
+            STATUS(float_exception_flags) |= float_flag_inexact;
+        }
+    }
+    else {
+        if ( aExp < 0x3FFF ) {
+            if ( aExp | aSig0 | aSig1 ) {
+                STATUS(float_exception_flags) |= float_flag_inexact;
+            }
+            return 0;
+        }
+        z = aSig0>>( - shiftCount );
+        if (    aSig1
+             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+            STATUS(float_exception_flags) |= float_flag_inexact;
+        }
+    }
+    if ( aSign ) z = - z;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the single-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float128_to_float32( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+    bits32 zSig;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) {
+            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
+        }
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    aSig0 |= ( aSig1 != 0 );
+    shift64RightJamming( aSig0, 18, &aSig0 );
+    zSig = aSig0;
+    if ( aExp || zSig ) {
+        zSig |= 0x40000000;
+        aExp -= 0x3F81;
+    }
+    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float128_to_float64( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) {
+            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
+        }
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+    aSig0 |= ( aSig1 != 0 );
+    if ( aExp || aSig0 ) {
+        aSig0 |= LIT64( 0x4000000000000000 );
+        aExp -= 0x3C01;
+    }
+    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) {
+            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
+        }
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    else {
+        aSig0 |= LIT64( 0x0001000000000000 );
+    }
+    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
+| returns the result as a quadruple-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_round_to_int( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float128 z;
+
+    aExp = extractFloat128Exp( a );
+    if ( 0x402F <= aExp ) {
+        if ( 0x406F <= aExp ) {
+            if (    ( aExp == 0x7FFF )
+                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
+               ) {
+                return propagateFloat128NaN( a, a STATUS_VAR );
+            }
+            return a;
+        }
+        lastBitMask = 1;
+        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
+        roundBitsMask = lastBitMask - 1;
+        z = a;
+        roundingMode = STATUS(float_rounding_mode);
+        if ( roundingMode == float_round_nearest_even ) {
+            if ( lastBitMask ) {
+                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+            }
+            else {
+                if ( (sbits64) z.low < 0 ) {
+                    ++z.high;
+                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+                }
+            }
+        }
+        else if ( roundingMode != float_round_to_zero ) {
+            if (   extractFloat128Sign( z )
+                 ^ ( roundingMode == float_round_up ) ) {
+                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+            }
+        }
+        z.low &= ~ roundBitsMask;
+    }
+    else {
+        if ( aExp < 0x3FFF ) {
+            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+            STATUS(float_exception_flags) |= float_flag_inexact;
+            aSign = extractFloat128Sign( a );
+            switch ( STATUS(float_rounding_mode) ) {
+             case float_round_nearest_even:
+                if (    ( aExp == 0x3FFE )
+                     && (   extractFloat128Frac0( a )
+                          | extractFloat128Frac1( a ) )
+                   ) {
+                    return packFloat128( aSign, 0x3FFF, 0, 0 );
+                }
+                break;
+             case float_round_down:
+                return
+                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
+                    : packFloat128( 0, 0, 0, 0 );
+             case float_round_up:
+                return
+                      aSign ? packFloat128( 1, 0, 0, 0 )
+                    : packFloat128( 0, 0x3FFF, 0, 0 );
+            }
+            return packFloat128( aSign, 0, 0, 0 );
+        }
+        lastBitMask = 1;
+        lastBitMask <<= 0x402F - aExp;
+        roundBitsMask = lastBitMask - 1;
+        z.low = 0;
+        z.high = a.high;
+        roundingMode = STATUS(float_rounding_mode);
+        if ( roundingMode == float_round_nearest_even ) {
+            z.high += lastBitMask>>1;
+            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
+                z.high &= ~ lastBitMask;
+            }
+        }
+        else if ( roundingMode != float_round_to_zero ) {
+            if (   extractFloat128Sign( z )
+                 ^ ( roundingMode == float_round_up ) ) {
+                z.high |= ( a.low != 0 );
+                z.high += roundBitsMask;
+            }
+        }
+        z.high &= ~ roundBitsMask;
+    }
+    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    int32 expDiff;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FFF ) {
+            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig0 |= LIT64( 0x0001000000000000 );
+        }
+        shift128ExtraRightJamming(
+            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FFF ) {
+            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+            return packFloat128( zSign, 0x7FFF, 0, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig0 |= LIT64( 0x0001000000000000 );
+        }
+        shift128ExtraRightJamming(
+            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FFF ) {
+            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+                return propagateFloat128NaN( a, b STATUS_VAR );
+            }
+            return a;
+        }
+        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
+        zSig2 = 0;
+        zSig0 |= LIT64( 0x0002000000000000 );
+        zExp = aExp;
+        goto shiftRight1;
+    }
+    aSig0 |= LIT64( 0x0001000000000000 );
+    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+    --zExp;
+    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+    ++zExp;
+ shiftRight1:
+    shift128ExtraRightJamming(
+        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ roundAndPack:
+    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+    int32 expDiff;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    expDiff = aExp - bExp;
+    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+            return propagateFloat128NaN( a, b STATUS_VAR );
+        }
+        float_raise( float_flag_invalid STATUS_VAR);
+        z.low = float128_default_nan_low;
+        z.high = float128_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig0 < aSig0 ) goto aBigger;
+    if ( aSig0 < bSig0 ) goto bBigger;
+    if ( bSig1 < aSig1 ) goto aBigger;
+    if ( aSig1 < bSig1 ) goto bBigger;
+    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FFF ) {
+        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig0 |= LIT64( 0x4000000000000000 );
+    }
+    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+    bSig0 |= LIT64( 0x4000000000000000 );
+ bBigger:
+    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig0 |= LIT64( 0x4000000000000000 );
+    }
+    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+    aSig0 |= LIT64( 0x4000000000000000 );
+ aBigger:
+    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat128Sigs( a, b, aSign STATUS_VAR );
+    }
+    else {
+        return subFloat128Sigs( a, b, aSign STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat128Sigs( a, b, aSign STATUS_VAR );
+    }
+    else {
+        return addFloat128Sigs( a, b, aSign STATUS_VAR );
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    bSign = extractFloat128Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if (    ( aSig0 | aSig1 )
+             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+            return propagateFloat128NaN( a, b STATUS_VAR );
+        }
+        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
+        return packFloat128( zSign, 0x7FFF, 0, 0 );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid STATUS_VAR);
+            z.low = float128_default_nan_low;
+            z.high = float128_default_nan_high;
+            return z;
+        }
+        return packFloat128( zSign, 0x7FFF, 0, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    if ( bExp == 0 ) {
+        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    }
+    zExp = aExp + bExp - 0x4000;
+    aSig0 |= LIT64( 0x0001000000000000 );
+    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+    zSig2 |= ( zSig3 != 0 );
+    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+        shift128ExtraRightJamming(
+            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+        ++zExp;
+    }
+    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    bSign = extractFloat128Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+        if ( bExp == 0x7FFF ) {
+            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+            goto invalid;
+        }
+        return packFloat128( zSign, 0x7FFF, 0, 0 );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+        return packFloat128( zSign, 0, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( ( bSig0 | bSig1 ) == 0 ) {
+            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+                float_raise( float_flag_invalid STATUS_VAR);
+                z.low = float128_default_nan_low;
+                z.high = float128_default_nan_high;
+                return z;
+            }
+            float_raise( float_flag_divbyzero STATUS_VAR);
+            return packFloat128( zSign, 0x7FFF, 0, 0 );
+        }
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    zExp = aExp - bExp + 0x3FFD;
+    shortShift128Left(
+        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+    shortShift128Left(
+        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
+        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
+    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+    }
+    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
+    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
+        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+        }
+        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    }
+    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the quadruple-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_rem( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, expDiff;
+    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
+    sbits64 sigMean0;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    bSign = extractFloat128Sign( b );
+    if ( aExp == 0x7FFF ) {
+        if (    ( aSig0 | aSig1 )
+             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+            return propagateFloat128NaN( a, b STATUS_VAR );
+        }
+        goto invalid;
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( ( bSig0 | bSig1 ) == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid STATUS_VAR);
+            z.low = float128_default_nan_low;
+            z.high = float128_default_nan_high;
+            return z;
+        }
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return a;
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    expDiff = aExp - bExp;
+    if ( expDiff < -1 ) return a;
+    shortShift128Left(
+        aSig0 | LIT64( 0x0001000000000000 ),
+        aSig1,
+        15 - ( expDiff < 0 ),
+        &aSig0,
+        &aSig1
+    );
+    shortShift128Left(
+        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+    q = le128( bSig0, bSig1, aSig0, aSig1 );
+    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+        q = ( 4 < q ) ? q - 4 : 0;
+        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+        expDiff -= 61;
+    }
+    if ( -64 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+        q = ( 4 < q ) ? q - 4 : 0;
+        q >>= - expDiff;
+        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+        expDiff += 52;
+        if ( expDiff < 0 ) {
+            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+        }
+        else {
+            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+        }
+        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+    }
+    else {
+        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+    }
+    do {
+        alternateASig0 = aSig0;
+        alternateASig1 = aSig1;
+        ++q;
+        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+    } while ( 0 <= (sbits64) aSig0 );
+    add128(
+        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
+    if (    ( sigMean0 < 0 )
+         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
+        aSig0 = alternateASig0;
+        aSig1 = alternateASig1;
+    }
+    zSign = ( (sbits64) aSig0 < 0 );
+    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+    return
+        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the quadruple-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sqrt( float128 a STATUS_PARAM )
+{
+    flag aSign;
+    int32 aExp, zExp;
+    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
+        if ( ! aSign ) return a;
+        goto invalid;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
+ invalid:
+        float_raise( float_flag_invalid STATUS_VAR);
+        z.low = float128_default_nan_low;
+        z.high = float128_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
+    aSig0 |= LIT64( 0x0001000000000000 );
+    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
+    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
+    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+    doubleZSig0 = zSig0<<1;
+    mul64To128( zSig0, zSig0, &term0, &term1 );
+    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        doubleZSig0 -= 2;
+        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+    }
+    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
+        if ( zSig1 == 0 ) zSig1 = 1;
+        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        mul64To128( zSig1, zSig1, &term2, &term3 );
+        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+            term3 |= 1;
+            term2 |= doubleZSig0;
+            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+        }
+        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    }
+    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
+    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_eq( float128 a, float128 b STATUS_PARAM )
+{
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_le( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_lt( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
+{
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+/* misc functions */
+float32 uint32_to_float32( unsigned int a STATUS_PARAM )
+{
+    return int64_to_float32(a STATUS_VAR);
+}
+
+float64 uint32_to_float64( unsigned int a STATUS_PARAM )
+{
+    return int64_to_float64(a STATUS_VAR);
+}
+
+unsigned int float32_to_uint32( float32 a STATUS_PARAM )
+{
+    int64_t v;
+    unsigned int res;
+
+    v = float32_to_int64(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffffffff) {
+        res = 0xffffffff;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else {
+        res = v;
+    }
+    return res;
+}
+
+unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
+{
+    int64_t v;
+    unsigned int res;
+
+    v = float32_to_int64_round_to_zero(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffffffff) {
+        res = 0xffffffff;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else {
+        res = v;
+    }
+    return res;
+}
+
+unsigned int float64_to_uint32( float64 a STATUS_PARAM )
+{
+    int64_t v;
+    unsigned int res;
+
+    v = float64_to_int64(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffffffff) {
+        res = 0xffffffff;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else {
+        res = v;
+    }
+    return res;
+}
+
+unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
+{
+    int64_t v;
+    unsigned int res;
+
+    v = float64_to_int64_round_to_zero(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffffffff) {
+        res = 0xffffffff;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else {
+        res = v;
+    }
+    return res;
+}
+
+#define COMPARE(s, nan_exp)                                                  \
+INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
+                                      int is_quiet STATUS_PARAM )            \
+{                                                                            \
+    flag aSign, bSign;                                                       \
+                                                                             \
+    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
+         extractFloat ## s ## Frac( a ) ) ||                                 \
+        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
+          extractFloat ## s ## Frac( b ) )) {                                \
+        if (!is_quiet ||                                                     \
+            float ## s ## _is_signaling_nan( a ) ||                          \
+            float ## s ## _is_signaling_nan( b ) ) {                         \
+            float_raise( float_flag_invalid STATUS_VAR);                     \
+        }                                                                    \
+        return float_relation_unordered;                                     \
+    }                                                                        \
+    aSign = extractFloat ## s ## Sign( a );                                  \
+    bSign = extractFloat ## s ## Sign( b );                                  \
+    if ( aSign != bSign ) {                                                  \
+        if ( (bits ## s) ( ( a | b )<<1 ) == 0 ) {                           \
+            /* zero case */                                                  \
+            return float_relation_equal;                                     \
+        } else {                                                             \
+            return 1 - (2 * aSign);                                          \
+        }                                                                    \
+    } else {                                                                 \
+        if (a == b) {                                                        \
+            return float_relation_equal;                                     \
+        } else {                                                             \
+            return 1 - 2 * (aSign ^ ( a < b ));                              \
+        }                                                                    \
+    }                                                                        \
+}                                                                            \
+                                                                             \
+int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
+{                                                                            \
+    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
+}                                                                            \
+                                                                             \
+int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
+{                                                                            \
+    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
+}
+
+COMPARE(32, 0xff)
+COMPARE(64, 0x7ff)
Index: /trunk/src/recompiler_new/fpu/softfloat.h
===================================================================
--- /trunk/src/recompiler_new/fpu/softfloat.h	(revision 13168)
+++ /trunk/src/recompiler_new/fpu/softfloat.h	(revision 13168)
@@ -0,0 +1,403 @@
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+
+#include <inttypes.h>
+#include "config.h"
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified.  For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits.  The `flag' type must be able to hold either a 0 or 1.  For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+typedef uint8_t flag;
+typedef uint8_t uint8;
+typedef int8_t int8;
+typedef int uint16;
+typedef int int16;
+typedef unsigned int uint32;
+typedef signed int int32;
+typedef uint64_t uint64;
+typedef int64_t int64;
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines a type that holds integers
+| of _exactly_ the number of bits specified.  For instance, for most
+| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+| `unsigned short int' and `signed short int' (or `short int'), respectively.
+*----------------------------------------------------------------------------*/
+typedef uint8_t bits8;
+typedef int8_t sbits8;
+typedef uint16_t bits16;
+typedef int16_t sbits16;
+typedef uint32_t bits32;
+typedef int32_t sbits32;
+typedef uint64_t bits64;
+typedef int64_t sbits64;
+
+#define LIT64( a ) a##LL
+#define INLINE static inline
+
+/*----------------------------------------------------------------------------
+| The macro `FLOATX80' must be defined to enable the extended double-precision
+| floating-point format `floatx80'.  If this macro is not defined, the
+| `floatx80' type will not be defined, and none of the functions that either
+| input or output the `floatx80' type will be defined.  The same applies to
+| the `FLOAT128' macro and the quadruple-precision format `float128'.
+*----------------------------------------------------------------------------*/
+#ifdef CONFIG_SOFTFLOAT
+/* bit exact soft float support */
+#define FLOATX80
+#define FLOAT128
+#else
+/* native float support */
+#if (defined(__i386__) || defined(__x86_64__)) && (!defined(_BSD) || defined(VBOX))
+#define FLOATX80
+#endif
+#endif /* !CONFIG_SOFTFLOAT */
+#if defined(VBOX) && (!defined(FLOATX80) || defined(CONFIG_SOFTFLOAT))
+# error misconfigured
+#endif 
+
+#define STATUS_PARAM , float_status *status
+#define STATUS(field) status->field
+#define STATUS_VAR , status
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+    float_relation_less      = -1,
+    float_relation_equal     =  0,
+    float_relation_greater   =  1,
+    float_relation_unordered =  2
+};
+
+#ifdef CONFIG_SOFTFLOAT
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+typedef uint32_t float32;
+typedef uint64_t float64;
+#ifdef FLOATX80
+typedef struct {
+    uint64_t low;
+    uint16_t high;
+} floatx80;
+#endif
+#ifdef FLOAT128
+typedef struct {
+#ifdef WORDS_BIGENDIAN
+    uint64_t high, low;
+#else
+    uint64_t low, high;
+#endif
+} float128;
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+enum {
+    float_tininess_after_rounding  = 0,
+    float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum {
+    float_round_nearest_even = 0,
+    float_round_down         = 1,
+    float_round_up           = 2,
+    float_round_to_zero      = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum {
+    float_flag_invalid   =  1,
+    float_flag_divbyzero =  4,
+    float_flag_overflow  =  8,
+    float_flag_underflow = 16,
+    float_flag_inexact   = 32
+};
+
+typedef struct float_status {
+    signed char float_detect_tininess;
+    signed char float_rounding_mode;
+    signed char float_exception_flags;
+#ifdef FLOATX80
+    signed char floatx80_rounding_precision;
+#endif
+} float_status;
+
+void set_float_rounding_mode(int val STATUS_PARAM);
+void set_float_exception_flags(int val STATUS_PARAM);
+INLINE int get_float_exception_flags(float_status *status)
+{
+    return STATUS(float_exception_flags);
+}
+#ifdef FLOATX80
+void set_floatx80_rounding_precision(int val STATUS_PARAM);
+#endif
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 flags STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int STATUS_PARAM );
+float64 int32_to_float64( int STATUS_PARAM );
+float32 uint32_to_float32( unsigned int STATUS_PARAM );
+float64 uint32_to_float64( unsigned int STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( int STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 int32_to_float128( int STATUS_PARAM );
+#endif
+float32 int64_to_float32( int64_t STATUS_PARAM );
+float64 int64_to_float64( int64_t STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64_t STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 int64_to_float128( int64_t STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float32_to_int32( float32 STATUS_PARAM );
+int float32_to_int32_round_to_zero( float32 STATUS_PARAM );
+unsigned int float32_to_uint32( float32 STATUS_PARAM );
+unsigned int float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
+int64_t float32_to_int64( float32 STATUS_PARAM );
+int64_t float32_to_int64_round_to_zero( float32 STATUS_PARAM );
+float64 float32_to_float64( float32 STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 float32_to_float128( float32 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 STATUS_PARAM );
+float32 float32_add( float32, float32 STATUS_PARAM );
+float32 float32_sub( float32, float32 STATUS_PARAM );
+float32 float32_mul( float32, float32 STATUS_PARAM );
+float32 float32_div( float32, float32 STATUS_PARAM );
+float32 float32_rem( float32, float32 STATUS_PARAM );
+float32 float32_sqrt( float32 STATUS_PARAM );
+int float32_eq( float32, float32 STATUS_PARAM );
+int float32_le( float32, float32 STATUS_PARAM );
+int float32_lt( float32, float32 STATUS_PARAM );
+int float32_eq_signaling( float32, float32 STATUS_PARAM );
+int float32_le_quiet( float32, float32 STATUS_PARAM );
+int float32_lt_quiet( float32, float32 STATUS_PARAM );
+int float32_compare( float32, float32 STATUS_PARAM );
+int float32_compare_quiet( float32, float32 STATUS_PARAM );
+int float32_is_signaling_nan( float32 );
+int float64_is_nan( float64 a );
+
+INLINE float32 float32_abs(float32 a)
+{
+    return a & 0x7fffffff;
+}
+
+INLINE float32 float32_chs(float32 a)
+{
+    return a ^ 0x80000000;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float64_to_int32( float64 STATUS_PARAM );
+int float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+unsigned int float64_to_uint32( float64 STATUS_PARAM );
+unsigned int float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64_t float64_to_int64( float64 STATUS_PARAM );
+int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+float32 float64_to_float32( float64 STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 STATUS_PARAM );
+float64 float64_trunc_to_int( float64 STATUS_PARAM );
+float64 float64_add( float64, float64 STATUS_PARAM );
+float64 float64_sub( float64, float64 STATUS_PARAM );
+float64 float64_mul( float64, float64 STATUS_PARAM );
+float64 float64_div( float64, float64 STATUS_PARAM );
+float64 float64_rem( float64, float64 STATUS_PARAM );
+float64 float64_sqrt( float64 STATUS_PARAM );
+int float64_eq( float64, float64 STATUS_PARAM );
+int float64_le( float64, float64 STATUS_PARAM );
+int float64_lt( float64, float64 STATUS_PARAM );
+int float64_eq_signaling( float64, float64 STATUS_PARAM );
+int float64_le_quiet( float64, float64 STATUS_PARAM );
+int float64_lt_quiet( float64, float64 STATUS_PARAM );
+int float64_compare( float64, float64 STATUS_PARAM );
+int float64_compare_quiet( float64, float64 STATUS_PARAM );
+int float64_is_signaling_nan( float64 );
+
+INLINE float64 float64_abs(float64 a)
+{
+    return a & 0x7fffffffffffffffLL;
+}
+
+INLINE float64 float64_chs(float64 a)
+{
+    return a ^ 0x8000000000000000LL;
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int floatx80_to_int32( floatx80 STATUS_PARAM );
+int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64_t floatx80_to_int64( floatx80 STATUS_PARAM );
+int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
+float32 floatx80_to_float32( floatx80 STATUS_PARAM );
+float64 floatx80_to_float64( floatx80 STATUS_PARAM );
+#ifdef FLOAT128
+float128 floatx80_to_float128( floatx80 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
+floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
+int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
+int floatx80_eq_signaling( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_signaling_nan( floatx80 );
+
+INLINE floatx80 floatx80_abs(floatx80 a)
+{
+    a.high &= 0x7fff;
+    return a;
+}
+
+INLINE floatx80 floatx80_chs(floatx80 a)
+{
+    a.high ^= 0x8000;
+    return a;
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float128_to_int32( float128 STATUS_PARAM );
+int float128_to_int32_round_to_zero( float128 STATUS_PARAM );
+int64_t float128_to_int64( float128 STATUS_PARAM );
+int64_t float128_to_int64_round_to_zero( float128 STATUS_PARAM );
+float32 float128_to_float32( float128 STATUS_PARAM );
+float64 float128_to_float64( float128 STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 float128_to_floatx80( float128 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 STATUS_PARAM );
+float128 float128_add( float128, float128 STATUS_PARAM );
+float128 float128_sub( float128, float128 STATUS_PARAM );
+float128 float128_mul( float128, float128 STATUS_PARAM );
+float128 float128_div( float128, float128 STATUS_PARAM );
+float128 float128_rem( float128, float128 STATUS_PARAM );
+float128 float128_sqrt( float128 STATUS_PARAM );
+int float128_eq( float128, float128 STATUS_PARAM );
+int float128_le( float128, float128 STATUS_PARAM );
+int float128_lt( float128, float128 STATUS_PARAM );
+int float128_eq_signaling( float128, float128 STATUS_PARAM );
+int float128_le_quiet( float128, float128 STATUS_PARAM );
+int float128_lt_quiet( float128, float128 STATUS_PARAM );
+int float128_is_signaling_nan( float128 );
+
+INLINE float128 float128_abs(float128 a)
+{
+    a.high &= 0x7fffffffffffffffLL;
+    return a;
+}
+
+INLINE float128 float128_chs(float128 a)
+{
+    a.high ^= 0x8000000000000000LL;
+    return a;
+}
+
+#endif
+
+#else /* CONFIG_SOFTFLOAT */
+
+#include "softfloat-native.h"
+
+#endif /* !CONFIG_SOFTFLOAT */
+
+#endif /* !SOFTFLOAT_H */
Index: /trunk/src/recompiler_new/hostregs_helper.h
===================================================================
--- /trunk/src/recompiler_new/hostregs_helper.h	(revision 13168)
+++ /trunk/src/recompiler_new/hostregs_helper.h	(revision 13168)
@@ -0,0 +1,107 @@
+/*
+ *  Save/restore host registrs.
+ *
+ *  Copyright (c) 2007 CodeSourcery
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+/* The GCC global register vairable extension is used to reserve some
+   host registers for use by dyngen.  However only the core parts of the
+   translation engine are compiled with these settings.  We must manually
+   save/restore these registers when called from regular code.
+   It is not sufficient to save/restore T0 et. al. as these may be declared
+   with a datatype smaller than the actual register.  */
+
+#if defined(DECLARE_HOST_REGS)
+
+#define DO_REG(REG)					\
+    register host_reg_t reg_AREG##REG asm(AREG##REG);	\
+    volatile host_reg_t saved_AREG##REG;
+
+#elif defined(SAVE_HOST_REGS)
+
+#define DO_REG(REG)					\
+    __asm__ __volatile__ ("" : "=r" (reg_AREG##REG));	\
+    saved_AREG##REG = reg_AREG##REG;
+
+#else
+
+#define DO_REG(REG)                                     \
+    reg_AREG##REG = saved_AREG##REG;		        \
+    __asm__ __volatile__ ("" : : "r" (reg_AREG##REG));
+
+#endif
+
+#ifdef AREG0
+DO_REG(0)
+#endif
+
+#ifdef AREG1
+DO_REG(1)
+#endif
+
+#ifdef AREG2
+DO_REG(2)
+#endif
+
+#ifdef AREG3
+DO_REG(3)
+#endif
+
+#ifdef AREG4
+DO_REG(4)
+#endif
+
+#ifdef AREG5
+DO_REG(5)
+#endif
+
+#ifdef AREG6
+DO_REG(6)
+#endif
+
+#ifdef AREG7
+DO_REG(7)
+#endif
+
+#ifdef AREG8
+DO_REG(8)
+#endif
+
+#ifdef AREG9
+DO_REG(9)
+#endif
+
+#ifdef AREG10
+DO_REG(10)
+#endif
+
+#ifdef AREG11
+DO_REG(11)
+#endif
+
+#undef SAVE_HOST_REGS
+#undef DECLARE_HOST_REGS
+#undef DO_REG
Index: /trunk/src/recompiler_new/osdep.h
===================================================================
--- /trunk/src/recompiler_new/osdep.h	(revision 13168)
+++ /trunk/src/recompiler_new/osdep.h	(revision 13168)
@@ -0,0 +1,50 @@
+#ifndef QEMU_OSDEP_H
+#define QEMU_OSDEP_H
+
+#ifdef VBOX
+
+#include <iprt/alloc.h>
+#include <iprt/stdarg.h>
+#include <iprt/string.h>
+
+#define qemu_snprintf(pszBuf, cbBuf, ...) RTStrPrintf((pszBuf), (cbBuf), __VA_ARGS__)
+#define qemu_vsnprintf(pszBuf, cbBuf, pszFormat, args) \
+                            RTStrPrintfV((pszBuf), (cbBuf), (pszFormat), (args))
+#define qemu_vprintf(pszFormat, args) \
+                            RTLogPrintfV((pszFormat), (args))
+#define qemu_printf         RTLogPrintf
+#define qemu_malloc(cb)     RTMemAlloc(cb)
+#define qemu_mallocz(cb)    RTMemAllocZ(cb)
+#define qemu_free(pv)       RTMemFree(pv)
+#define qemu_strdup(psz)    RTStrDup(psz)
+
+#define qemu_vmalloc(cb)    RTMemPageAlloc(cb)
+#define qemu_vfree(pv)      RTMemPageFree(pv)
+
+#ifndef NULL
+# define NULL 0
+#endif
+
+#else /* !VBOX */
+
+#include <stdarg.h>
+
+#define qemu_snprintf snprintf   /* bird */
+#define qemu_vsnprintf vsnprintf /* bird */
+#define qemu_vprintf vprintf     /* bird */
+
+#define qemu_printf printf
+
+void *qemu_malloc(size_t size);
+void *qemu_mallocz(size_t size);
+void qemu_free(void *ptr);
+char *qemu_strdup(const char *str);
+
+void *qemu_vmalloc(size_t size);
+void qemu_vfree(void *ptr);
+
+void *get_mmap_addr(unsigned long size);
+
+#endif /* !VBOX */
+
+#endif
Index: /trunk/src/recompiler_new/softmmu_exec.h
===================================================================
--- /trunk/src/recompiler_new/softmmu_exec.h	(revision 13168)
+++ /trunk/src/recompiler_new/softmmu_exec.h	(revision 13168)
@@ -0,0 +1,65 @@
+/* Common softmmu definitions and inline routines.  */
+
+#define ldul_user ldl_user
+#define ldul_kernel ldl_kernel
+
+#define ACCESS_TYPE 0
+#define MEMSUFFIX _kernel
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+
+#define ACCESS_TYPE 1
+#define MEMSUFFIX _user
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+
+/* these access are slower, they must be as rare as possible */
+#define ACCESS_TYPE 2
+#define MEMSUFFIX _data
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+
+#define ldub(p) ldub_data(p)
+#define ldsb(p) ldsb_data(p)
+#define lduw(p) lduw_data(p)
+#define ldsw(p) ldsw_data(p)
+#define ldl(p) ldl_data(p)
+#define ldq(p) ldq_data(p)
+
+#define stb(p, v) stb_data(p, v)
+#define stw(p, v) stw_data(p, v)
+#define stl(p, v) stl_data(p, v)
+#define stq(p, v) stq_data(p, v)
Index: /trunk/src/recompiler_new/softmmu_header.h
===================================================================
--- /trunk/src/recompiler_new/softmmu_header.h	(revision 13168)
+++ /trunk/src/recompiler_new/softmmu_header.h	(revision 13168)
@@ -0,0 +1,419 @@
+/*
+ *  Software MMU support
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#if DATA_SIZE == 8
+#define SUFFIX q
+#define USUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX l
+#define USUFFIX l
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX w
+#define USUFFIX uw
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX b
+#define USUFFIX ub
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+
+#if ACCESS_TYPE == 0
+
+#define CPU_MEM_INDEX 0
+#define MMUSUFFIX _mmu
+
+#elif ACCESS_TYPE == 1
+
+#define CPU_MEM_INDEX 1
+#define MMUSUFFIX _mmu
+
+#elif ACCESS_TYPE == 2
+
+#ifdef TARGET_I386
+#define CPU_MEM_INDEX ((env->hflags & HF_CPL_MASK) == 3)
+#elif defined (TARGET_PPC)
+#define CPU_MEM_INDEX (msr_pr)
+#elif defined (TARGET_MIPS)
+#define CPU_MEM_INDEX ((env->hflags & MIPS_HFLAG_MODE) == MIPS_HFLAG_UM)
+#elif defined (TARGET_SPARC)
+#define CPU_MEM_INDEX ((env->psrs) == 0)
+#elif defined (TARGET_ARM)
+#define CPU_MEM_INDEX ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR)
+#elif defined (TARGET_SH4)
+#define CPU_MEM_INDEX ((env->sr & SR_MD) == 0)
+#else
+#error unsupported CPU
+#endif
+#define MMUSUFFIX _mmu
+
+#elif ACCESS_TYPE == 3
+
+#ifdef TARGET_I386
+#define CPU_MEM_INDEX ((env->hflags & HF_CPL_MASK) == 3)
+#elif defined (TARGET_PPC)
+#define CPU_MEM_INDEX (msr_pr)
+#elif defined (TARGET_MIPS)
+#define CPU_MEM_INDEX ((env->hflags & MIPS_HFLAG_MODE) == MIPS_HFLAG_UM)
+#elif defined (TARGET_SPARC)
+#define CPU_MEM_INDEX ((env->psrs) == 0)
+#elif defined (TARGET_ARM)
+#define CPU_MEM_INDEX ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR)
+#elif defined (TARGET_SH4)
+#define CPU_MEM_INDEX ((env->sr & SR_MD) == 0)
+#else
+#error unsupported CPU
+#endif
+#define MMUSUFFIX _cmmu
+
+#else
+#error invalid ACCESS_TYPE
+#endif
+
+#if DATA_SIZE == 8
+#define RES_TYPE uint64_t
+#else
+#define RES_TYPE int
+#endif
+
+#if ACCESS_TYPE == 3
+#define ADDR_READ addr_code
+#else
+#define ADDR_READ addr_read
+#endif
+
+DATA_TYPE REGPARM(1) glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+                                                         int is_user);
+void REGPARM(2) glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE v, int is_user);
+
+#if (DATA_SIZE <= 4) && (TARGET_LONG_BITS == 32) && defined(__i386__) && \
+    (ACCESS_TYPE <= 1) && defined(ASM_SOFTMMU) && (!defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB))
+
+#define CPU_TLB_ENTRY_BITS 4
+
+static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    int res;
+
+    asm volatile ("movl %1, %%edx\n"
+                  "movl %1, %%eax\n"
+                  "shrl %3, %%edx\n"
+                  "andl %4, %%eax\n"
+                  "andl %2, %%edx\n"
+                  "leal %5(%%edx, %%ebp), %%edx\n"
+                  "cmpl (%%edx), %%eax\n"
+                  "movl %1, %%eax\n"
+                  "je 1f\n"
+                  "pushl %6\n"
+                  "call %7\n"
+                  "popl %%edx\n"
+                  "movl %%eax, %0\n"
+                  "jmp 2f\n"
+                  "1:\n"
+                  "addl 12(%%edx), %%eax\n"
+#if DATA_SIZE == 1
+                  "movzbl (%%eax), %0\n"
+#elif DATA_SIZE == 2
+                  "movzwl (%%eax), %0\n"
+#elif DATA_SIZE == 4
+                  "movl (%%eax), %0\n"
+#else
+#error unsupported size
+#endif
+                  "2:\n"
+                  : "=r" (res)
+                  : "r" (ptr), 
+                  "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS), 
+                  "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), 
+                  "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)),
+                  "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MEM_INDEX][0].addr_read)),
+                  "i" (CPU_MEM_INDEX),
+                  "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX))
+                  : "%eax", "%ecx", "%edx", "memory", "cc");
+    return res;
+}
+
+#if DATA_SIZE <= 2
+static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    int res;
+
+    asm volatile ("movl %1, %%edx\n"
+                  "movl %1, %%eax\n"
+                  "shrl %3, %%edx\n"
+                  "andl %4, %%eax\n"
+                  "andl %2, %%edx\n"
+                  "leal %5(%%edx, %%ebp), %%edx\n"
+                  "cmpl (%%edx), %%eax\n"
+                  "movl %1, %%eax\n"
+                  "je 1f\n"
+                  "pushl %6\n"
+                  "call %7\n"
+                  "popl %%edx\n"
+#if DATA_SIZE == 1
+                  "movsbl %%al, %0\n"
+#elif DATA_SIZE == 2
+                  "movswl %%ax, %0\n"
+#else
+#error unsupported size
+#endif
+                  "jmp 2f\n"
+                  "1:\n"
+                  "addl 12(%%edx), %%eax\n"
+#if DATA_SIZE == 1
+                  "movsbl (%%eax), %0\n"
+#elif DATA_SIZE == 2
+                  "movswl (%%eax), %0\n"
+#else
+#error unsupported size
+#endif
+                  "2:\n"
+                  : "=r" (res)
+                  : "r" (ptr), 
+                  "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS), 
+                  "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), 
+                  "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)),
+                  "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MEM_INDEX][0].addr_read)),
+                  "i" (CPU_MEM_INDEX),
+                  "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX))
+                  : "%eax", "%ecx", "%edx", "memory", "cc");
+    return res;
+}
+#endif
+
+#ifdef VBOX
+/* generic store macro */
+
+static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+    int index;
+    target_ulong addr;
+    unsigned long physaddr;
+    int is_user;
+
+    addr = ptr;
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    is_user = CPU_MEM_INDEX;
+    if (__builtin_expect(env->tlb_table[is_user][index].addr_write != 
+                         (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))), 0)) {
+        glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, is_user);
+    } else {
+        physaddr = addr + env->tlb_table[is_user][index].addend;
+        glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
+    }
+}
+
+#else /* !VBOX */
+
+static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+    asm volatile ("movl %0, %%edx\n"
+                  "movl %0, %%eax\n"
+                  "shrl %3, %%edx\n"
+                  "andl %4, %%eax\n"
+                  "andl %2, %%edx\n"
+                  "leal %5(%%edx, %%ebp), %%edx\n"
+                  "cmpl (%%edx), %%eax\n"
+                  "movl %0, %%eax\n"
+                  "je 1f\n"
+#if DATA_SIZE == 1
+                  "movzbl %b1, %%edx\n"
+#elif DATA_SIZE == 2
+                  "movzwl %w1, %%edx\n"
+#elif DATA_SIZE == 4
+                  "movl %1, %%edx\n"
+#else
+#error unsupported size
+#endif
+                  "pushl %6\n"
+                  "call %7\n"
+                  "popl %%eax\n"
+                  "jmp 2f\n"
+                  "1:\n"
+                  "addl 8(%%edx), %%eax\n"
+#if DATA_SIZE == 1
+                  "movb %b1, (%%eax)\n"
+#elif DATA_SIZE == 2
+                  "movw %w1, (%%eax)\n"
+#elif DATA_SIZE == 4
+                  "movl %1, (%%eax)\n"
+#else
+#error unsupported size
+#endif
+                  "2:\n"
+                  : 
+                  : "r" (ptr), 
+/* NOTE: 'q' would be needed as constraint, but we could not use it
+   with T1 ! */
+                  "r" (v), 
+                  "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS), 
+                  "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), 
+                  "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)),
+                  "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MEM_INDEX][0].addr_write)),
+                  "i" (CPU_MEM_INDEX),
+                  "m" (*(uint8_t *)&glue(glue(__st, SUFFIX), MMUSUFFIX))
+                  : "%eax", "%ecx", "%edx", "memory", "cc");
+}
+#endif /* !VBOX */
+
+#else
+
+/* generic load/store macros */
+
+static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    int index;
+    RES_TYPE res;
+    target_ulong addr;
+    unsigned long physaddr;
+    int is_user;
+
+    addr = ptr;
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    is_user = CPU_MEM_INDEX;
+    if (__builtin_expect(env->tlb_table[is_user][index].ADDR_READ != 
+                         (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))), 0)) {
+        res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, is_user);
+    } else {
+        physaddr = addr + env->tlb_table[is_user][index].addend;
+        res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
+    }
+    return res;
+}
+
+#if DATA_SIZE <= 2
+static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    int res, index;
+    target_ulong addr;
+    unsigned long physaddr;
+    int is_user;
+
+    addr = ptr;
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    is_user = CPU_MEM_INDEX;
+    if (__builtin_expect(env->tlb_table[is_user][index].ADDR_READ != 
+                         (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))), 0)) {
+        res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, is_user);
+    } else {
+        physaddr = addr + env->tlb_table[is_user][index].addend;
+        res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr);
+    }
+    return res;
+}
+#endif
+
+#if ACCESS_TYPE != 3
+
+/* generic store macro */
+
+static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+    int index;
+    target_ulong addr;
+    unsigned long physaddr;
+    int is_user;
+
+    addr = ptr;
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    is_user = CPU_MEM_INDEX;
+    if (__builtin_expect(env->tlb_table[is_user][index].addr_write != 
+                         (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))), 0)) {
+        glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, is_user);
+    } else {
+        physaddr = addr + env->tlb_table[is_user][index].addend;
+        glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
+    }
+}
+
+#endif /* ACCESS_TYPE != 3 */
+
+#endif /* !asm */
+
+#if ACCESS_TYPE != 3
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(ldq, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stq, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldfl, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stl, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* ACCESS_TYPE != 3 */
+
+#undef RES_TYPE
+#undef DATA_TYPE
+#undef DATA_STYPE
+#undef SUFFIX
+#undef USUFFIX
+#undef DATA_SIZE
+#undef CPU_MEM_INDEX
+#undef MMUSUFFIX
+#undef ADDR_READ
Index: /trunk/src/recompiler_new/softmmu_template.h
===================================================================
--- /trunk/src/recompiler_new/softmmu_template.h	(revision 13168)
+++ /trunk/src/recompiler_new/softmmu_template.h	(revision 13168)
@@ -0,0 +1,322 @@
+/*
+ *  Software MMU support
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#define DATA_SIZE (1 << SHIFT)
+
+#if DATA_SIZE == 8
+#define SUFFIX q
+#define USUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX l
+#define USUFFIX l
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX w
+#define USUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX b
+#define USUFFIX ub
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+
+#ifdef SOFTMMU_CODE_ACCESS
+#define READ_ACCESS_TYPE 2
+#define ADDR_READ addr_code
+#else
+#define READ_ACCESS_TYPE 0
+#define ADDR_READ addr_read
+#endif
+
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr, 
+                                                        int is_user,
+                                                        void *retaddr);
+static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr, 
+                                              target_ulong tlb_addr)
+{
+    DATA_TYPE res;
+    int index;
+
+    index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+#if SHIFT <= 2
+    res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+#else
+#ifdef TARGET_WORDS_BIGENDIAN
+    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
+    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#else
+    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+#endif
+#endif /* SHIFT > 2 */
+#ifdef USE_KQEMU
+    env->last_io_time = cpu_get_time_fast();
+#endif
+    return res;
+}
+
+/* handle all cases except unaligned access which span two pages */
+DATA_TYPE REGPARM(1) glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+                                                         int is_user)
+{
+    DATA_TYPE res;
+    int index;
+    target_ulong tlb_addr;
+    target_phys_addr_t physaddr;
+    void *retaddr;
+    
+    /* test if there is match for unaligned or IO access */
+    /* XXX: could done more in memory macro in a non portable way */
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+    tlb_addr = env->tlb_table[is_user][index].ADDR_READ;
+    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        physaddr = addr + env->tlb_table[is_user][index].addend;
+        if (tlb_addr & ~TARGET_PAGE_MASK) {
+            /* IO access */
+            if ((addr & (DATA_SIZE - 1)) != 0)
+                goto do_unaligned_access;
+            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
+        do_unaligned_access:
+            retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+            do_unaligned_access(addr, READ_ACCESS_TYPE, is_user, retaddr);
+#endif
+            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr, 
+                                                         is_user, retaddr);
+        } else {
+            /* unaligned/aligned access in the same page */
+#ifdef ALIGNED_ONLY
+            if ((addr & (DATA_SIZE - 1)) != 0) {
+                retaddr = GETPC();
+                do_unaligned_access(addr, READ_ACCESS_TYPE, is_user, retaddr);
+            }
+#endif
+            res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(long)physaddr);
+        }
+    } else {
+        /* the page is not in the TLB : fill it */
+        retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+        if ((addr & (DATA_SIZE - 1)) != 0)
+            do_unaligned_access(addr, READ_ACCESS_TYPE, is_user, retaddr);
+#endif
+        tlb_fill(addr, READ_ACCESS_TYPE, is_user, retaddr);
+        goto redo;
+    }
+    return res;
+}
+
+/* handle all unaligned cases */
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr, 
+                                                        int is_user,
+                                                        void *retaddr)
+{
+    DATA_TYPE res, res1, res2;
+    int index, shift;
+    target_phys_addr_t physaddr;
+    target_ulong tlb_addr, addr1, addr2;
+
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+    tlb_addr = env->tlb_table[is_user][index].ADDR_READ;
+    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        physaddr = addr + env->tlb_table[is_user][index].addend;
+        if (tlb_addr & ~TARGET_PAGE_MASK) {
+            /* IO access */
+            if ((addr & (DATA_SIZE - 1)) != 0)
+                goto do_unaligned_access;
+            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+        do_unaligned_access:
+            /* slow unaligned access (it spans two pages) */
+            addr1 = addr & ~(DATA_SIZE - 1);
+            addr2 = addr1 + DATA_SIZE;
+            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1, 
+                                                          is_user, retaddr);
+            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2, 
+                                                          is_user, retaddr);
+            shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef TARGET_WORDS_BIGENDIAN
+            res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
+#else
+            res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
+#endif
+            res = (DATA_TYPE)res;
+        } else {
+            /* unaligned/aligned access in the same page */
+            res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(long)physaddr);
+        }
+    } else {
+        /* the page is not in the TLB : fill it */
+        tlb_fill(addr, READ_ACCESS_TYPE, is_user, retaddr);
+        goto redo;
+    }
+    return res;
+}
+
+#ifndef SOFTMMU_CODE_ACCESS
+
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr, 
+                                                   DATA_TYPE val, 
+                                                   int is_user,
+                                                   void *retaddr);
+
+static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr, 
+                                          DATA_TYPE val,
+                                          target_ulong tlb_addr,
+                                          void *retaddr)
+{
+    int index;
+
+    index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+    env->mem_write_vaddr = tlb_addr;
+    env->mem_write_pc = (unsigned long)retaddr;
+#if SHIFT <= 2
+    io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
+#else
+#ifdef TARGET_WORDS_BIGENDIAN
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#else
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+#endif
+#endif /* SHIFT > 2 */
+#ifdef USE_KQEMU
+    env->last_io_time = cpu_get_time_fast();
+#endif
+}
+
+void REGPARM(2) glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, 
+                                                    DATA_TYPE val,
+                                                    int is_user)
+{
+    target_phys_addr_t physaddr;
+    target_ulong tlb_addr;
+    void *retaddr;
+    int index;
+    
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+    tlb_addr = env->tlb_table[is_user][index].addr_write;
+    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        physaddr = addr + env->tlb_table[is_user][index].addend;
+        if (tlb_addr & ~TARGET_PAGE_MASK) {
+            /* IO access */
+            if ((addr & (DATA_SIZE - 1)) != 0)
+                goto do_unaligned_access;
+            retaddr = GETPC();
+            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+        do_unaligned_access:
+            retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+            do_unaligned_access(addr, 1, is_user, retaddr);
+#endif
+            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val, 
+                                                   is_user, retaddr);
+        } else {
+            /* aligned/unaligned access in the same page */
+#ifdef ALIGNED_ONLY
+            if ((addr & (DATA_SIZE - 1)) != 0) {
+                retaddr = GETPC();
+                do_unaligned_access(addr, 1, is_user, retaddr);
+            }
+#endif
+            glue(glue(st, SUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+        }
+    } else {
+        /* the page is not in the TLB : fill it */
+        retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+        if ((addr & (DATA_SIZE - 1)) != 0)
+            do_unaligned_access(addr, 1, is_user, retaddr);
+#endif
+        tlb_fill(addr, 1, is_user, retaddr);
+        goto redo;
+    }
+}
+
+/* handles all unaligned cases */
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr, 
+                                                   DATA_TYPE val,
+                                                   int is_user,
+                                                   void *retaddr)
+{
+    target_phys_addr_t physaddr;
+    target_ulong tlb_addr;
+    int index, i;
+
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+    tlb_addr = env->tlb_table[is_user][index].addr_write;
+    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        physaddr = addr + env->tlb_table[is_user][index].addend;
+        if (tlb_addr & ~TARGET_PAGE_MASK) {
+            /* IO access */
+            if ((addr & (DATA_SIZE - 1)) != 0)
+                goto do_unaligned_access;
+            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+        do_unaligned_access:
+            /* XXX: not efficient, but simple */
+            for(i = 0;i < DATA_SIZE; i++) {
+#ifdef TARGET_WORDS_BIGENDIAN
+                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)), 
+                                          is_user, retaddr);
+#else
+                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8), 
+                                          is_user, retaddr);
+#endif
+            }
+        } else {
+            /* aligned/unaligned access in the same page */
+            glue(glue(st, SUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+        }
+    } else {
+        /* the page is not in the TLB : fill it */
+        tlb_fill(addr, 1, is_user, retaddr);
+        goto redo;
+    }
+}
+
+#endif /* !defined(SOFTMMU_CODE_ACCESS) */
+
+#undef READ_ACCESS_TYPE
+#undef SHIFT
+#undef DATA_TYPE
+#undef SUFFIX
+#undef USUFFIX
+#undef DATA_SIZE
+#undef ADDR_READ
Index: /trunk/src/recompiler_new/target-i386/cpu.h
===================================================================
--- /trunk/src/recompiler_new/target-i386/cpu.h	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/cpu.h	(revision 13168)
@@ -0,0 +1,897 @@
+/*
+ * i386 virtual CPU header
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#ifndef CPU_I386_H
+#define CPU_I386_H
+
+#include "config.h"
+
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+/* target supports implicit self modifying code */
+#define TARGET_HAS_SMC
+/* support for self modifying code even if the modified instruction is
+   close to the modifying instruction */
+#define TARGET_HAS_PRECISE_SMC
+
+#define TARGET_HAS_ICE 1
+
+#ifdef TARGET_X86_64
+#define ELF_MACHINE	EM_X86_64
+#else
+#define ELF_MACHINE	EM_386
+#endif
+
+#include "cpu-defs.h"
+
+#include "softfloat.h"
+
+#if defined(VBOX)
+# include <iprt/critsect.h>
+# include <iprt/thread.h>
+# include <iprt/assert.h>
+# include <iprt/asm.h>
+# include <VBox/vmm.h>
+#endif /* VBOX */
+
+#if defined(__i386__) && !defined(CONFIG_SOFTMMU)
+#define USE_CODE_COPY
+#endif
+
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL 0
+#define R_CL 1
+#define R_DL 2
+#define R_BL 3
+#define R_AH 4
+#define R_CH 5
+#define R_DH 6
+#define R_BH 7
+
+#define R_ES 0
+#define R_CS 1
+#define R_SS 2
+#define R_DS 3
+#define R_FS 4
+#define R_GS 5
+
+/* segment descriptor fields */
+#define DESC_G_MASK     (1 << 23)
+#define DESC_B_SHIFT    22
+#define DESC_B_MASK     (1 << DESC_B_SHIFT)
+#define DESC_L_SHIFT    21 /* x86_64 only : 64 bit code segment */
+#define DESC_L_MASK     (1 << DESC_L_SHIFT)
+#define DESC_AVL_MASK   (1 << 20)
+#define DESC_P_MASK     (1 << 15)
+#define DESC_DPL_SHIFT  13
+#define DESC_S_MASK     (1 << 12)
+#define DESC_TYPE_SHIFT 8
+#define DESC_A_MASK     (1 << 8)
+
+#define DESC_CS_MASK    (1 << 11) /* 1=code segment 0=data segment */
+#define DESC_C_MASK     (1 << 10) /* code: conforming */
+#define DESC_R_MASK     (1 << 9)  /* code: readable */
+
+#define DESC_E_MASK     (1 << 10) /* data: expansion direction */
+#define DESC_W_MASK     (1 << 9)  /* data: writable */
+
+#define DESC_TSS_BUSY_MASK (1 << 9)
+
+/* eflags masks */
+#define CC_C   	0x0001
+#define CC_P 	0x0004
+#define CC_A	0x0010
+#define CC_Z	0x0040
+#define CC_S    0x0080
+#define CC_O    0x0800
+
+#define TF_SHIFT   8
+#define IOPL_SHIFT 12
+#define VM_SHIFT   17
+
+#define TF_MASK 		0x00000100
+#define IF_MASK 		0x00000200
+#define DF_MASK 		0x00000400
+#define IOPL_MASK		0x00003000
+#define NT_MASK	         	0x00004000
+#define RF_MASK			0x00010000
+#define VM_MASK			0x00020000
+#define AC_MASK			0x00040000 
+#define VIF_MASK                0x00080000
+#define VIP_MASK                0x00100000
+#define ID_MASK                 0x00200000
+
+/* hidden flags - used internally by qemu to represent additionnal cpu
+   states. Only the CPL, INHIBIT_IRQ and HALTED are not redundant. We avoid
+   using the IOPL_MASK, TF_MASK and VM_MASK bit position to ease oring
+   with eflags. */
+/* current cpl */
+#define HF_CPL_SHIFT         0
+/* true if soft mmu is being used */
+#define HF_SOFTMMU_SHIFT     2
+/* true if hardware interrupts must be disabled for next instruction */
+#define HF_INHIBIT_IRQ_SHIFT 3
+/* 16 or 32 segments */
+#define HF_CS32_SHIFT        4
+#define HF_SS32_SHIFT        5
+/* zero base for DS, ES and SS : can be '0' only in 32 bit CS segment */
+#define HF_ADDSEG_SHIFT      6
+/* copy of CR0.PE (protected mode) */
+#define HF_PE_SHIFT          7
+#define HF_TF_SHIFT          8 /* must be same as eflags */
+#define HF_MP_SHIFT          9 /* the order must be MP, EM, TS */
+#define HF_EM_SHIFT         10
+#define HF_TS_SHIFT         11
+#define HF_IOPL_SHIFT       12 /* must be same as eflags */
+#define HF_LMA_SHIFT        14 /* only used on x86_64: long mode active */
+#define HF_CS64_SHIFT       15 /* only used on x86_64: 64 bit code segment  */
+#define HF_OSFXSR_SHIFT     16 /* CR4.OSFXSR */
+#define HF_VM_SHIFT         17 /* must be same as eflags */
+#define HF_HALTED_SHIFT     18 /* CPU halted */
+#define HF_SMM_SHIFT        19 /* CPU in SMM mode */
+
+#define HF_CPL_MASK          (3 << HF_CPL_SHIFT)
+#define HF_SOFTMMU_MASK      (1 << HF_SOFTMMU_SHIFT)
+#define HF_INHIBIT_IRQ_MASK  (1 << HF_INHIBIT_IRQ_SHIFT)
+#define HF_CS32_MASK         (1 << HF_CS32_SHIFT)
+#define HF_SS32_MASK         (1 << HF_SS32_SHIFT)
+#define HF_ADDSEG_MASK       (1 << HF_ADDSEG_SHIFT)
+#define HF_PE_MASK           (1 << HF_PE_SHIFT)
+#define HF_TF_MASK           (1 << HF_TF_SHIFT)
+#define HF_MP_MASK           (1 << HF_MP_SHIFT)
+#define HF_EM_MASK           (1 << HF_EM_SHIFT)
+#define HF_TS_MASK           (1 << HF_TS_SHIFT)
+#define HF_LMA_MASK          (1 << HF_LMA_SHIFT)
+#define HF_CS64_MASK         (1 << HF_CS64_SHIFT)
+#define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
+#define HF_HALTED_MASK       (1 << HF_HALTED_SHIFT)
+#define HF_SMM_MASK          (1 << HF_SMM_SHIFT)
+
+#define CR0_PE_MASK  (1 << 0)
+#define CR0_MP_MASK  (1 << 1)
+#define CR0_EM_MASK  (1 << 2)
+#define CR0_TS_MASK  (1 << 3)
+#define CR0_ET_MASK  (1 << 4)
+#define CR0_NE_MASK  (1 << 5)
+#define CR0_WP_MASK  (1 << 16)
+#define CR0_AM_MASK  (1 << 18)
+#define CR0_PG_MASK  (1 << 31)
+
+#define CR4_VME_MASK  (1 << 0)
+#define CR4_PVI_MASK  (1 << 1)
+#define CR4_TSD_MASK  (1 << 2)
+#define CR4_DE_MASK   (1 << 3)
+#define CR4_PSE_MASK  (1 << 4)
+#define CR4_PAE_MASK  (1 << 5)
+#define CR4_PGE_MASK  (1 << 7)
+#define CR4_PCE_MASK  (1 << 8)
+#define CR4_OSFXSR_MASK (1 << 9)
+#define CR4_OSXMMEXCPT_MASK  (1 << 10)
+
+#define PG_PRESENT_BIT	0
+#define PG_RW_BIT	1
+#define PG_USER_BIT	2
+#define PG_PWT_BIT	3
+#define PG_PCD_BIT	4
+#define PG_ACCESSED_BIT	5
+#define PG_DIRTY_BIT	6
+#define PG_PSE_BIT	7
+#define PG_GLOBAL_BIT	8
+#define PG_NX_BIT	63
+
+#define PG_PRESENT_MASK  (1 << PG_PRESENT_BIT)
+#define PG_RW_MASK	 (1 << PG_RW_BIT)
+#define PG_USER_MASK	 (1 << PG_USER_BIT)
+#define PG_PWT_MASK	 (1 << PG_PWT_BIT)
+#define PG_PCD_MASK	 (1 << PG_PCD_BIT)
+#define PG_ACCESSED_MASK (1 << PG_ACCESSED_BIT)
+#define PG_DIRTY_MASK	 (1 << PG_DIRTY_BIT)
+#define PG_PSE_MASK	 (1 << PG_PSE_BIT)
+#define PG_GLOBAL_MASK	 (1 << PG_GLOBAL_BIT)
+#define PG_NX_MASK	 (1LL << PG_NX_BIT)
+
+#define PG_ERROR_W_BIT     1
+
+#define PG_ERROR_P_MASK    0x01
+#define PG_ERROR_W_MASK    (1 << PG_ERROR_W_BIT)
+#define PG_ERROR_U_MASK    0x04
+#define PG_ERROR_RSVD_MASK 0x08
+#define PG_ERROR_I_D_MASK  0x10
+
+#define MSR_IA32_APICBASE               0x1b
+#define MSR_IA32_APICBASE_BSP           (1<<8)
+#define MSR_IA32_APICBASE_ENABLE        (1<<11)
+#define MSR_IA32_APICBASE_BASE          (0xfffff<<12)
+
+#ifndef MSR_IA32_SYSENTER_CS /* VBox x86.h klugde */
+#define MSR_IA32_SYSENTER_CS            0x174
+#define MSR_IA32_SYSENTER_ESP           0x175
+#define MSR_IA32_SYSENTER_EIP           0x176
+#endif 
+
+#define MSR_MCG_CAP                     0x179
+#define MSR_MCG_STATUS                  0x17a
+#define MSR_MCG_CTL                     0x17b
+
+#define MSR_PAT                         0x277
+
+#define MSR_EFER                        0xc0000080
+
+#define MSR_EFER_SCE   (1 << 0)
+#define MSR_EFER_LME   (1 << 8)
+#define MSR_EFER_LMA   (1 << 10)
+#define MSR_EFER_NXE   (1 << 11)
+#define MSR_EFER_FFXSR (1 << 14)
+#define MSR_APIC_RANGE_START            0x800
+#define MSR_APIC_RANGE_END              0x900
+
+#define MSR_STAR                        0xc0000081
+#define MSR_LSTAR                       0xc0000082
+#define MSR_CSTAR                       0xc0000083
+#define MSR_FMASK                       0xc0000084
+#define MSR_FSBASE                      0xc0000100
+#define MSR_GSBASE                      0xc0000101
+#define MSR_KERNELGSBASE                0xc0000102
+
+/* cpuid_features bits */
+#define CPUID_FP87 (1 << 0)
+#define CPUID_VME  (1 << 1)
+#define CPUID_DE   (1 << 2)
+#define CPUID_PSE  (1 << 3)
+#define CPUID_TSC  (1 << 4)
+#define CPUID_MSR  (1 << 5)
+#define CPUID_PAE  (1 << 6)
+#define CPUID_MCE  (1 << 7)
+#define CPUID_CX8  (1 << 8)
+#define CPUID_APIC (1 << 9)
+#define CPUID_SEP  (1 << 11) /* sysenter/sysexit */
+#define CPUID_MTRR (1 << 12)
+#define CPUID_PGE  (1 << 13)
+#define CPUID_MCA  (1 << 14)
+#define CPUID_CMOV (1 << 15)
+#define CPUID_PAT  (1 << 16)
+#define CPUID_PSE36   (1 << 17)
+#define CPUID_CLFLUSH (1 << 19)
+/* ... */
+#define CPUID_MMX  (1 << 23)
+#define CPUID_FXSR (1 << 24)
+#define CPUID_SSE  (1 << 25)
+#define CPUID_SSE2 (1 << 26)
+
+#ifdef VBOX
+#define CPUID_HTT  (1 << 28)
+#endif
+
+#define CPUID_EXT_SSE3     (1 << 0)
+#define CPUID_EXT_MONITOR  (1 << 3)
+#define CPUID_EXT_DSCPL    (1 << 4)
+#define CPUID_EXT_VMX      (1 << 5)
+#define CPUID_EXT_SMX      (1 << 6)
+#define CPUID_EXT_EST      (1 << 7)
+#define CPUID_EXT_TM2      (1 << 8)
+#define CPUID_EXT_SSSE3    (1 << 9)
+#define CPUID_EXT_CID      (1 << 10)
+#define CPUID_EXT_CX16     (1 << 13)
+#define CPUID_EXT_XTPR     (1 << 14)
+#define CPUID_EXT_DCA      (1 << 17)
+#define CPUID_EXT_POPCNT   (1 << 22)
+
+#define CPUID_EXT2_SYSCALL (1 << 11)
+#define CPUID_EXT2_MP      (1 << 19)
+#define CPUID_EXT2_NX      (1 << 20)
+#define CPUID_EXT2_MMXEXT  (1 << 22)
+#define CPUID_EXT2_FFXSR   (1 << 25)
+#define CPUID_EXT2_PDPE1GB (1 << 26)
+#define CPUID_EXT2_RDTSCP  (1 << 27)
+#define CPUID_EXT2_LM      (1 << 29)
+#define CPUID_EXT2_3DNOWEXT (1 << 30)
+#define CPUID_EXT2_3DNOW   (1 << 31)
+
+#define CPUID_EXT3_LAHF_LM (1 << 0)
+#define CPUID_EXT3_CMP_LEG (1 << 1)
+#define CPUID_EXT3_SVM     (1 << 2)
+#define CPUID_EXT3_EXTAPIC (1 << 3)
+#define CPUID_EXT3_CR8LEG  (1 << 4)
+#define CPUID_EXT3_ABM     (1 << 5)
+#define CPUID_EXT3_SSE4A   (1 << 6)
+#define CPUID_EXT3_MISALIGNSSE (1 << 7)
+#define CPUID_EXT3_3DNOWPREFETCH (1 << 8)
+#define CPUID_EXT3_OSVW    (1 << 9)
+#define CPUID_EXT3_IBS     (1 << 10)
+
+#define EXCP00_DIVZ	0
+#define EXCP01_SSTP	1
+#define EXCP02_NMI	2
+#define EXCP03_INT3	3
+#define EXCP04_INTO	4
+#define EXCP05_BOUND	5
+#define EXCP06_ILLOP	6
+#define EXCP07_PREX	7
+#define EXCP08_DBLE	8
+#define EXCP09_XERR	9
+#define EXCP0A_TSS	10
+#define EXCP0B_NOSEG	11
+#define EXCP0C_STACK	12
+#define EXCP0D_GPF	13
+#define EXCP0E_PAGE	14
+#define EXCP10_COPR	16
+#define EXCP11_ALGN	17
+#define EXCP12_MCHK	18
+
+enum {
+    CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
+    CC_OP_EFLAGS,  /* all cc are explicitely computed, CC_SRC = flags */
+
+    CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
+    CC_OP_MULW,
+    CC_OP_MULL,
+    CC_OP_MULQ,
+
+    CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_ADDW,
+    CC_OP_ADDL,
+    CC_OP_ADDQ,
+
+    CC_OP_ADCB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_ADCW,
+    CC_OP_ADCL,
+    CC_OP_ADCQ,
+
+    CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_SUBW,
+    CC_OP_SUBL,
+    CC_OP_SUBQ,
+
+    CC_OP_SBBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_SBBW,
+    CC_OP_SBBL,
+    CC_OP_SBBQ,
+
+    CC_OP_LOGICB, /* modify all flags, CC_DST = res */
+    CC_OP_LOGICW,
+    CC_OP_LOGICL,
+    CC_OP_LOGICQ,
+
+    CC_OP_INCB, /* modify all flags except, CC_DST = res, CC_SRC = C */
+    CC_OP_INCW,
+    CC_OP_INCL,
+    CC_OP_INCQ,
+
+    CC_OP_DECB, /* modify all flags except, CC_DST = res, CC_SRC = C  */
+    CC_OP_DECW,
+    CC_OP_DECL,
+    CC_OP_DECQ,
+
+    CC_OP_SHLB, /* modify all flags, CC_DST = res, CC_SRC.msb = C */
+    CC_OP_SHLW,
+    CC_OP_SHLL,
+    CC_OP_SHLQ,
+
+    CC_OP_SARB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */
+    CC_OP_SARW,
+    CC_OP_SARL,
+    CC_OP_SARQ,
+
+    CC_OP_NB
+};
+
+#ifdef FLOATX80
+#define USE_X86LDOUBLE
+#endif
+
+#ifdef USE_X86LDOUBLE
+typedef floatx80 CPU86_LDouble;
+#else
+typedef float64 CPU86_LDouble;
+#endif
+
+typedef struct SegmentCache {
+    uint32_t selector;
+    target_ulong base;
+    uint32_t limit;
+    uint32_t flags;
+#ifdef VBOX
+    /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */
+    uint32_t newselector;
+#endif
+} SegmentCache;
+
+typedef union {
+    uint8_t _b[16];
+    uint16_t _w[8];
+    uint32_t _l[4];
+    uint64_t _q[2];
+    float32 _s[4];
+    float64 _d[2];
+} XMMReg;
+
+typedef union {
+    uint8_t _b[8];
+    uint16_t _w[2];
+    uint32_t _l[1];
+    uint64_t q;
+} MMXReg;
+
+#ifdef WORDS_BIGENDIAN
+#define XMM_B(n) _b[15 - (n)]
+#define XMM_W(n) _w[7 - (n)]
+#define XMM_L(n) _l[3 - (n)]
+#define XMM_S(n) _s[3 - (n)]
+#define XMM_Q(n) _q[1 - (n)]
+#define XMM_D(n) _d[1 - (n)]
+
+#define MMX_B(n) _b[7 - (n)]
+#define MMX_W(n) _w[3 - (n)]
+#define MMX_L(n) _l[1 - (n)]
+#else
+#define XMM_B(n) _b[n]
+#define XMM_W(n) _w[n]
+#define XMM_L(n) _l[n]
+#define XMM_S(n) _s[n]
+#define XMM_Q(n) _q[n]
+#define XMM_D(n) _d[n]
+
+#define MMX_B(n) _b[n]
+#define MMX_W(n) _w[n]
+#define MMX_L(n) _l[n]
+#endif
+#define MMX_Q(n) q
+
+#ifdef TARGET_X86_64
+#define CPU_NB_REGS 16
+#else
+#define CPU_NB_REGS 8
+#endif
+
+typedef struct CPUX86State {
+#if TARGET_LONG_BITS > HOST_LONG_BITS
+    /* temporaries if we cannot store them in host registers */
+    target_ulong t0, t1, t2;
+#endif
+
+    /* standard registers */
+    target_ulong regs[CPU_NB_REGS];
+    target_ulong eip;
+    target_ulong eflags; /* eflags register. During CPU emulation, CC
+                        flags and DF are set to zero because they are
+                        stored elsewhere */
+
+    /* emulator internal eflags handling */
+    target_ulong cc_src;
+    target_ulong cc_dst;
+    uint32_t cc_op;
+    int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
+    uint32_t hflags; /* hidden flags, see HF_xxx constants */
+
+    /* segments */
+    SegmentCache segs[6]; /* selector values */
+    SegmentCache ldt;
+    SegmentCache tr;
+    SegmentCache gdt; /* only base and limit are used */
+    SegmentCache idt; /* only base and limit are used */
+
+    target_ulong cr[5]; /* NOTE: cr1 is unused */
+    uint32_t a20_mask;
+
+    /* FPU state */
+    unsigned int fpstt; /* top of stack index */
+    unsigned int fpus;
+    unsigned int fpuc;
+    uint8_t fptags[8];   /* 0 = valid, 1 = empty */
+    union {
+#ifdef USE_X86LDOUBLE
+        CPU86_LDouble d __attribute__((aligned(16)));
+#else
+        CPU86_LDouble d;
+#endif
+        MMXReg mmx;
+    } fpregs[8];
+
+    /* emulator internal variables */
+    float_status fp_status;
+#ifdef VBOX
+    uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */
+#endif 
+    CPU86_LDouble ft0;
+#if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN)
+    uint32_t alignment4; /* long double is 12 byte, pad it to 16. */
+#endif 
+    union {
+	float f;
+        double d;
+	int i32;
+        int64_t i64;
+    } fp_convert;
+    
+    float_status sse_status;
+    uint32_t mxcsr;
+    XMMReg xmm_regs[CPU_NB_REGS];
+    XMMReg xmm_t0;
+    MMXReg mmx_t0;
+
+    /* sysenter registers */
+    uint32_t sysenter_cs;
+    uint64_t sysenter_esp;
+    uint64_t sysenter_eip;
+#ifdef VBOX
+    uint32_t alignment0;
+#endif 
+    uint64_t efer;
+    uint64_t star;
+#ifdef TARGET_X86_64
+    target_ulong lstar;
+    target_ulong cstar;
+    target_ulong fmask;
+    target_ulong kernelgsbase;
+#endif
+
+    uint64_t pat;
+
+    /* temporary data for USE_CODE_COPY mode */
+#ifdef USE_CODE_COPY
+    uint32_t tmp0;
+    uint32_t saved_esp;
+    int native_fp_regs; /* if true, the FPU state is in the native CPU regs */
+#endif
+    
+    /* exception/interrupt handling */
+    jmp_buf jmp_env;
+#if defined(VBOX) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_X86) 
+    /* This will be removed when switching to the no-crt code everywhere. */
+    uint32_t alignment1[23];
+#endif 
+    int exception_index;
+    int error_code;
+    int exception_is_int;
+    target_ulong exception_next_eip;
+    target_ulong dr[8]; /* debug registers */
+    uint32_t smbase;
+    int interrupt_request; 
+    int user_mode_only; /* user mode only simulation */
+
+    CPU_COMMON
+
+#ifdef VBOX
+    /** cpu state flags. (see defines below) */
+    uint32_t    state;
+    /** The VM handle. */
+    PVM         pVM;
+    /** code buffer for instruction emulation */
+    void       *pvCodeBuffer;
+    /** code buffer size */
+    uint32_t    cbCodeBuffer;
+#endif /* VBOX */
+
+    /* processor features (e.g. for CPUID insn) */
+#ifndef VBOX /* remR3CpuId deals with these */
+    uint32_t cpuid_level;
+    uint32_t cpuid_vendor1;
+    uint32_t cpuid_vendor2;
+    uint32_t cpuid_vendor3;
+    uint32_t cpuid_version;
+#endif /* !VBOX */
+    uint32_t cpuid_features;
+    uint32_t cpuid_ext_features;
+#ifndef VBOX
+    uint32_t cpuid_xlevel;
+    uint32_t cpuid_model[12];
+#endif /* !VBOX */
+    uint32_t cpuid_ext2_features;
+    uint32_t cpuid_ext3_features;
+
+#ifndef VBOX
+#ifdef USE_KQEMU
+    int kqemu_enabled;
+    int last_io_time;
+#endif
+    /* in order to simplify APIC support, we leave this pointer to the
+       user */
+    struct APICState *apic_state;
+#else
+    uint32_t alignment2[3];
+#endif 
+} CPUX86State;
+
+#ifdef VBOX
+
+/* Version 1.6 structure; just for loading the old saved state */
+typedef struct SegmentCache_Ver16 {
+    uint32_t selector;
+    uint32_t base;
+    uint32_t limit;
+    uint32_t flags;
+#ifdef VBOX
+    /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */
+    uint32_t newselector;
+#endif
+} SegmentCache_Ver16;
+
+#define CPU_NB_REGS_VER16 8
+
+/* Version 1.6 structure; just for loading the old saved state */
+typedef struct CPUX86State_Ver16 {
+#if TARGET_LONG_BITS > HOST_LONG_BITS
+    /* temporaries if we cannot store them in host registers */
+    uint32_t t0, t1, t2;
+#endif
+
+    /* standard registers */
+    uint32_t regs[CPU_NB_REGS_VER16];
+    uint32_t eip;
+    uint32_t eflags; /* eflags register. During CPU emulation, CC
+                        flags and DF are set to zero because they are
+                        stored elsewhere */
+
+    /* emulator internal eflags handling */
+    uint32_t cc_src;
+    uint32_t cc_dst;
+    uint32_t cc_op;
+    int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
+    uint32_t hflags; /* hidden flags, see HF_xxx constants */
+
+    /* segments */
+    SegmentCache_Ver16 segs[6]; /* selector values */
+    SegmentCache_Ver16 ldt;
+    SegmentCache_Ver16 tr;
+    SegmentCache_Ver16 gdt; /* only base and limit are used */
+    SegmentCache_Ver16 idt; /* only base and limit are used */
+
+    uint32_t cr[5]; /* NOTE: cr1 is unused */
+    uint32_t a20_mask;
+
+    /* FPU state */
+    unsigned int fpstt; /* top of stack index */
+    unsigned int fpus;
+    unsigned int fpuc;
+    uint8_t fptags[8];   /* 0 = valid, 1 = empty */
+    union {
+#ifdef USE_X86LDOUBLE
+        CPU86_LDouble d __attribute__((aligned(16)));
+#else
+        CPU86_LDouble d;
+#endif
+        MMXReg mmx;
+    } fpregs[8];
+
+    /* emulator internal variables */
+    float_status fp_status;
+#ifdef VBOX
+    uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */
+#endif 
+    CPU86_LDouble ft0;
+#if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN)
+    uint32_t alignment4; /* long double is 12 byte, pad it to 16. */
+#endif 
+    union {
+	float f;
+        double d;
+	int i32;
+        int64_t i64;
+    } fp_convert;
+    
+    float_status sse_status;
+    uint32_t mxcsr;
+    XMMReg xmm_regs[CPU_NB_REGS_VER16];
+    XMMReg xmm_t0;
+    MMXReg mmx_t0;
+
+    /* sysenter registers */
+    uint32_t sysenter_cs;
+    uint32_t sysenter_esp;
+    uint32_t sysenter_eip;
+#ifdef VBOX
+    uint32_t alignment0;
+#endif 
+    uint64_t efer;
+    uint64_t star;
+
+    uint64_t pat;
+
+    /* temporary data for USE_CODE_COPY mode */
+#ifdef USE_CODE_COPY
+    uint32_t tmp0;
+    uint32_t saved_esp;
+    int native_fp_regs; /* if true, the FPU state is in the native CPU regs */
+#endif
+    
+    /* exception/interrupt handling */
+    jmp_buf jmp_env;
+} CPUX86State_Ver16;
+
+/** CPUX86State state flags 
+ * @{ */
+#define CPU_RAW_RING0            0x0002 /* Set after first time RawR0 is executed, never cleared. */
+#define CPU_EMULATE_SINGLE_INSTR 0x0040 /* Execute a single instruction in emulation mode */
+#define CPU_EMULATE_SINGLE_STEP  0x0080 /* go into single step mode */
+#define CPU_RAW_HWACC            0x0100 /* Set after first time HWACC is executed, never cleared. */
+/** @} */
+#endif /* !VBOX */
+
+#ifdef VBOX
+CPUX86State *cpu_x86_init(CPUX86State *env);
+#else  /* !VBOX */
+CPUX86State *cpu_x86_init(void);
+#endif /* !VBOX */
+int cpu_x86_exec(CPUX86State *s);
+void cpu_x86_close(CPUX86State *s);
+int cpu_get_pic_interrupt(CPUX86State *s);
+/* MSDOS compatibility mode FPU exception support */
+void cpu_set_ferr(CPUX86State *s);
+
+/* this function must always be used to load data in the segment
+   cache: it synchronizes the hflags with the segment cache values */
+static inline void cpu_x86_load_seg_cache(CPUX86State *env, 
+                                          int seg_reg, unsigned int selector,
+                                          target_ulong base,
+                                          unsigned int limit, 
+                                          unsigned int flags)
+{
+    SegmentCache *sc;
+    unsigned int new_hflags;
+    
+    sc = &env->segs[seg_reg];
+    sc->selector = selector;
+    sc->base = base;
+    sc->limit = limit;
+    sc->flags = flags;
+#ifdef VBOX
+    sc->newselector = 0;
+#endif
+
+    /* update the hidden flags */
+    {
+        if (seg_reg == R_CS) {
+#ifdef TARGET_X86_64
+            if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
+                /* long mode */
+                env->hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+                env->hflags &= ~(HF_ADDSEG_MASK);
+            } else 
+#endif
+            {
+                /* legacy / compatibility case */
+                new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
+                    >> (DESC_B_SHIFT - HF_CS32_SHIFT);
+                env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
+                    new_hflags;
+            }
+        }
+        new_hflags = (env->segs[R_SS].flags & DESC_B_MASK)
+            >> (DESC_B_SHIFT - HF_SS32_SHIFT);
+        if (env->hflags & HF_CS64_MASK) {
+            /* zero base assumed for DS, ES and SS in long mode */
+        } else if (!(env->cr[0] & CR0_PE_MASK) || 
+                   (env->eflags & VM_MASK) ||
+                   !(env->hflags & HF_CS32_MASK)) {
+            /* XXX: try to avoid this test. The problem comes from the
+               fact that is real mode or vm86 mode we only modify the
+               'base' and 'selector' fields of the segment cache to go
+               faster. A solution may be to force addseg to one in
+               translate-i386.c. */
+            new_hflags |= HF_ADDSEG_MASK;
+        } else {
+            new_hflags |= ((env->segs[R_DS].base | 
+                            env->segs[R_ES].base |
+                            env->segs[R_SS].base) != 0) << 
+                HF_ADDSEG_SHIFT;
+        }
+        env->hflags = (env->hflags & 
+                       ~(HF_SS32_MASK | HF_ADDSEG_MASK)) | new_hflags;
+    }
+}
+
+/* wrapper, just in case memory mappings must be changed */
+static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl)
+{
+#if HF_CPL_MASK == 3
+    s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl;
+#else
+#error HF_CPL_MASK is hardcoded
+#endif
+}
+
+/* used for debug or cpu save/restore */
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f);
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper);
+
+/* the following helpers are only usable in user mode simulation as
+   they can trigger unexpected exceptions */
+void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
+void cpu_x86_fsave(CPUX86State *s, uint8_t *ptr, int data32);
+void cpu_x86_frstor(CPUX86State *s, uint8_t *ptr, int data32);
+
+/* you can call this signal handler from your SIGBUS and SIGSEGV
+   signal handlers to inform the virtual CPU of exceptions. non zero
+   is returned if the signal was handled by the virtual CPU.  */
+int cpu_x86_signal_handler(int host_signum, void *pinfo, 
+                           void *puc);
+void cpu_x86_set_a20(CPUX86State *env, int a20_state);
+
+uint64_t cpu_get_tsc(CPUX86State *env);
+
+void cpu_set_apic_base(CPUX86State *env, uint64_t val);
+uint64_t cpu_get_apic_base(CPUX86State *env);
+void cpu_set_apic_tpr(CPUX86State *env, uint8_t val);
+#ifndef NO_CPU_IO_DEFS
+uint8_t cpu_get_apic_tpr(CPUX86State *env);
+#endif
+uint64_t cpu_apic_rdmsr(CPUX86State *env, uint32_t reg);
+void     cpu_apic_wrmsr(CPUX86State *env, uint32_t reg, uint64_t value);
+void cpu_smm_update(CPUX86State *env);
+
+/* will be suppressed */
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
+
+/* used to debug */
+#define X86_DUMP_FPU  0x0001 /* dump FPU state too */
+#define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */
+
+#ifdef USE_KQEMU
+static inline int cpu_get_time_fast(void)
+{
+    int low, high;
+    asm volatile("rdtsc" : "=a" (low), "=d" (high));
+    return low;
+}
+#endif
+
+#ifdef VBOX
+void cpu_trap_raw(CPUX86State *env1);
+
+/* in helper.c */
+uint8_t read_byte(CPUX86State *env1, target_ulong addr);
+uint16_t read_word(CPUX86State *env1, target_ulong addr);
+void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val);
+uint32_t read_dword(CPUX86State *env1, target_ulong addr);
+void write_word(CPUX86State *env1, target_ulong addr, uint16_t val);
+void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val);
+/* in helper.c */
+int emulate_single_instr(CPUX86State *env1);
+int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr, uint32_t *esp_ptr, int dpl);
+
+void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr);
+void save_raw_fp_state(CPUX86State *env, uint8_t *ptr);
+
+#endif
+
+#define TARGET_PAGE_BITS 12
+#include "cpu-all.h"
+
+#endif /* CPU_I386_H */
Index: /trunk/src/recompiler_new/target-i386/exec.h
===================================================================
--- /trunk/src/recompiler_new/target-i386/exec.h	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/exec.h	(revision 13168)
@@ -0,0 +1,616 @@
+/*
+ *  i386 execution defines
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#include "config.h"
+#include "dyngen-exec.h"
+
+/* XXX: factorize this mess */
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+#include "cpu-defs.h"
+
+/* at least 4 register variables are defined */
+register struct CPUX86State *env asm(AREG0);
+
+#if TARGET_LONG_BITS > HOST_LONG_BITS
+
+/* no registers can be used */
+#define T0 (env->t0)
+#define T1 (env->t1)
+#define T2 (env->t2)
+
+#else
+
+/* XXX: use unsigned long instead of target_ulong - better code will
+   be generated for 64 bit CPUs */
+register target_ulong T0 asm(AREG1);
+register target_ulong T1 asm(AREG2);
+register target_ulong T2 asm(AREG3);
+
+/* if more registers are available, we define some registers too */
+#ifdef AREG4
+register target_ulong EAX asm(AREG4);
+#define reg_EAX
+#endif
+
+#ifdef AREG5
+register target_ulong ESP asm(AREG5);
+#define reg_ESP
+#endif
+
+#ifdef AREG6
+register target_ulong EBP asm(AREG6);
+#define reg_EBP
+#endif
+
+#ifdef AREG7
+register target_ulong ECX asm(AREG7);
+#define reg_ECX
+#endif
+
+#ifdef AREG8
+register target_ulong EDX asm(AREG8);
+#define reg_EDX
+#endif
+
+#ifdef AREG9
+register target_ulong EBX asm(AREG9);
+#define reg_EBX
+#endif
+
+#ifdef AREG10
+register target_ulong ESI asm(AREG10);
+#define reg_ESI
+#endif
+
+#ifdef AREG11
+register target_ulong EDI asm(AREG11);
+#define reg_EDI
+#endif
+
+#endif /* ! (TARGET_LONG_BITS > HOST_LONG_BITS) */
+
+#define A0 T2
+
+extern FILE *logfile;
+extern int loglevel;
+
+#ifndef reg_EAX
+#define EAX (env->regs[R_EAX])
+#endif
+#ifndef reg_ECX
+#define ECX (env->regs[R_ECX])
+#endif
+#ifndef reg_EDX
+#define EDX (env->regs[R_EDX])
+#endif
+#ifndef reg_EBX
+#define EBX (env->regs[R_EBX])
+#endif
+#ifndef reg_ESP
+#define ESP (env->regs[R_ESP])
+#endif
+#ifndef reg_EBP
+#define EBP (env->regs[R_EBP])
+#endif
+#ifndef reg_ESI
+#define ESI (env->regs[R_ESI])
+#endif
+#ifndef reg_EDI
+#define EDI (env->regs[R_EDI])
+#endif
+#define EIP  (env->eip)
+#define DF  (env->df)
+
+#define CC_SRC (env->cc_src)
+#define CC_DST (env->cc_dst)
+#define CC_OP  (env->cc_op)
+
+/* float macros */
+#define FT0    (env->ft0)
+#define ST0    (env->fpregs[env->fpstt].d)
+#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
+#define ST1    ST(1)
+
+#ifdef USE_FP_CONVERT
+#define FP_CONVERT  (env->fp_convert)
+#endif
+
+#include "cpu.h"
+#include "exec-all.h"
+
+typedef struct CCTable {
+    int (*compute_all)(void); /* return all the flags */
+    int (*compute_c)(void);  /* return the C flag */
+} CCTable;
+
+extern CCTable cc_table[];
+
+void load_seg(int seg_reg, int selector);
+void helper_ljmp_protected_T0_T1(int next_eip);
+void helper_lcall_real_T0_T1(int shift, int next_eip);
+void helper_lcall_protected_T0_T1(int shift, int next_eip);
+void helper_iret_real(int shift);
+void helper_iret_protected(int shift, int next_eip);
+void helper_lret_protected(int shift, int addend);
+void helper_lldt_T0(void);
+void helper_ltr_T0(void);
+void helper_movl_crN_T0(int reg);
+void helper_movl_drN_T0(int reg);
+void helper_invlpg(target_ulong addr);
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
+void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3);
+void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
+void cpu_x86_flush_tlb(CPUX86State *env, target_ulong addr);
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, 
+                             int is_write, int is_user, int is_softmmu);
+void tlb_fill(target_ulong addr, int is_write, int is_user, 
+              void *retaddr);
+void __hidden cpu_lock(void);
+void __hidden cpu_unlock(void);
+void do_interrupt(int intno, int is_int, int error_code, 
+                  target_ulong next_eip, int is_hw);
+void do_interrupt_user(int intno, int is_int, int error_code, 
+                       target_ulong next_eip);
+void raise_interrupt(int intno, int is_int, int error_code, 
+                     int next_eip_addend);
+void raise_exception_err(int exception_index, int error_code);
+void raise_exception(int exception_index);
+void do_smm_enter(void);
+void __hidden cpu_loop_exit(void);
+
+void OPPROTO op_movl_eflags_T0(void);
+void OPPROTO op_movl_T0_eflags(void);
+#ifdef VBOX
+void OPPROTO op_movl_T0_eflags_vme(void);
+void OPPROTO op_movw_eflags_T0_vme(void);
+void OPPROTO op_cli_vme(void);
+void OPPROTO op_sti_vme(void);
+#endif
+void helper_divl_EAX_T0(void);
+void helper_idivl_EAX_T0(void);
+void helper_mulq_EAX_T0(void);
+void helper_imulq_EAX_T0(void);
+void helper_imulq_T0_T1(void);
+void helper_divq_EAX_T0(void);
+void helper_idivq_EAX_T0(void);
+void helper_bswapq_T0(void);
+void helper_cmpxchg8b(void);
+void helper_single_step(void);
+void helper_cpuid(void);
+void helper_enter_level(int level, int data32);
+void helper_enter64_level(int level, int data64);
+void helper_sysenter(void);
+void helper_sysexit(void);
+void helper_syscall(int next_eip_addend);
+void helper_sysret(int dflag);
+void helper_rdtsc(void);
+void helper_rdmsr(void);
+void helper_wrmsr(void);
+void helper_lsl(void);
+void helper_lar(void);
+void helper_verr(void);
+void helper_verw(void);
+void helper_rsm(void);
+
+#ifdef VBOX
+void helper_external_event(void);
+void helper_record_call(void);
+
+/* in helper.c */
+void sync_seg(CPUX86State *env1, int seg_reg, int selector);
+void sync_ldtr(CPUX86State *env1, int selector);
+int  sync_tr(CPUX86State *env1, int selector);
+
+#endif
+
+void check_iob_T0(void);
+void check_iow_T0(void);
+void check_iol_T0(void);
+void check_iob_DX(void);
+void check_iow_DX(void);
+void check_iol_DX(void);
+
+#if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+
+static inline double ldfq(target_ulong ptr)
+{
+    union {
+        double d;
+        uint64_t i;
+    } u;
+    u.i = ldq(ptr);
+    return u.d;
+}
+
+static inline void stfq(target_ulong ptr, double v)
+{
+    union {
+        double d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    stq(ptr, u.i);
+}
+
+static inline float ldfl(target_ulong ptr)
+{
+    union {
+        float f;
+        uint32_t i;
+    } u;
+    u.i = ldl(ptr);
+    return u.f;
+}
+
+static inline void stfl(target_ulong ptr, float v)
+{
+    union {
+        float f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    stl(ptr, u.i);
+}
+
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#ifdef USE_X86LDOUBLE
+/* use long double functions */
+#define floatx_to_int32 floatx80_to_int32
+#define floatx_to_int64 floatx80_to_int64
+#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
+#define floatx_abs floatx80_abs
+#define floatx_chs floatx80_chs
+#define floatx_round_to_int floatx80_round_to_int
+#define floatx_compare floatx80_compare
+#define floatx_compare_quiet floatx80_compare_quiet
+#ifdef VBOX
+#undef sin
+#undef cos
+#undef sqrt
+#undef pow
+#undef log
+#undef tan
+#undef atan2
+#undef floor
+#undef ceil
+#undef ldexp
+#endif /* !VBOX */
+#define sin sinl
+#define cos cosl
+#define sqrt sqrtl
+#define pow powl
+#define log logl
+#define tan tanl
+#define atan2 atan2l
+#define floor floorl
+#define ceil ceill
+#define ldexp ldexpl
+#else
+#define floatx_to_int32 float64_to_int32
+#define floatx_to_int64 float64_to_int64
+#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero
+#define floatx_abs float64_abs
+#define floatx_chs float64_chs
+#define floatx_round_to_int float64_round_to_int
+#define floatx_compare float64_compare
+#define floatx_compare_quiet float64_compare_quiet
+#endif
+
+extern CPU86_LDouble sin(CPU86_LDouble x);
+extern CPU86_LDouble cos(CPU86_LDouble x);
+extern CPU86_LDouble sqrt(CPU86_LDouble x);
+extern CPU86_LDouble pow(CPU86_LDouble, CPU86_LDouble);
+extern CPU86_LDouble log(CPU86_LDouble x);
+extern CPU86_LDouble tan(CPU86_LDouble x);
+extern CPU86_LDouble atan2(CPU86_LDouble, CPU86_LDouble);
+extern CPU86_LDouble floor(CPU86_LDouble x);
+extern CPU86_LDouble ceil(CPU86_LDouble x);
+
+#define RC_MASK         0xc00
+#define RC_NEAR		0x000
+#define RC_DOWN		0x400
+#define RC_UP		0x800
+#define RC_CHOP		0xc00
+
+#define MAXTAN 9223372036854775808.0
+
+#ifdef USE_X86LDOUBLE
+
+/* only for x86 */
+typedef union {
+    long double d;
+    struct {
+        unsigned long long lower;
+        unsigned short upper;
+    } l;
+} CPU86_LDoubleU;
+
+/* the following deal with x86 long double-precision numbers */
+#define MAXEXPD 0x7fff
+#define EXPBIAS 16383
+#define EXPD(fp)	(fp.l.upper & 0x7fff)
+#define SIGND(fp)	((fp.l.upper) & 0x8000)
+#define MANTD(fp)       (fp.l.lower)
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
+
+#else
+
+/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
+typedef union {
+    double d;
+#if !defined(WORDS_BIGENDIAN) && !defined(__arm__)
+    struct {
+        uint32_t lower;
+        int32_t upper;
+    } l;
+#else
+    struct {
+        int32_t upper;
+        uint32_t lower;
+    } l;
+#endif
+#ifndef __arm__
+    int64_t ll;
+#endif
+} CPU86_LDoubleU;
+
+/* the following deal with IEEE double-precision numbers */
+#define MAXEXPD 0x7ff
+#define EXPBIAS 1023
+#define EXPD(fp)	(((fp.l.upper) >> 20) & 0x7FF)
+#define SIGND(fp)	((fp.l.upper) & 0x80000000)
+#ifdef __arm__
+#define MANTD(fp)	(fp.l.lower | ((uint64_t)(fp.l.upper & ((1 << 20) - 1)) << 32))
+#else
+#define MANTD(fp)	(fp.ll & ((1LL << 52) - 1))
+#endif
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7ff << 20)) | (EXPBIAS << 20)
+#endif
+
+static inline void fpush(void)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+
+static inline void fpop(void)
+{
+    env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
+    env->fpstt = (env->fpstt + 1) & 7;
+}
+
+#ifndef USE_X86LDOUBLE
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+    CPU86_LDoubleU temp;
+    int upper, e;
+    uint64_t ll;
+
+    /* mantissa */
+    upper = lduw(ptr + 8);
+    /* XXX: handle overflow ? */
+    e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
+    e |= (upper >> 4) & 0x800; /* sign */
+    ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1);
+#ifdef __arm__
+    temp.l.upper = (e << 20) | (ll >> 32);
+    temp.l.lower = ll;
+#else
+    temp.ll = ll | ((uint64_t)e << 52);
+#endif
+    return temp.d;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+    CPU86_LDoubleU temp;
+    int e;
+
+    temp.d = f;
+    /* mantissa */
+    stq(ptr, (MANTD(temp) << 11) | (1LL << 63));
+    /* exponent + sign */
+    e = EXPD(temp) - EXPBIAS + 16383;
+    e |= SIGND(temp) >> 16;
+    stw(ptr + 8, e);
+}
+#else
+
+/* XXX: same endianness assumed */
+
+#ifdef CONFIG_USER_ONLY
+
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+    return *(CPU86_LDouble *)ptr;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+    *(CPU86_LDouble *)ptr = f;
+}
+
+#else
+
+/* we use memory access macros */
+
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+    CPU86_LDoubleU temp;
+
+    temp.l.lower = ldq(ptr);
+    temp.l.upper = lduw(ptr + 8);
+    return temp.d;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+    CPU86_LDoubleU temp;
+    
+    temp.d = f;
+    stq(ptr, temp.l.lower);
+    stw(ptr + 8, temp.l.upper);
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
+#endif /* USE_X86LDOUBLE */
+
+#define FPUS_IE (1 << 0)
+#define FPUS_DE (1 << 1)
+#define FPUS_ZE (1 << 2)
+#define FPUS_OE (1 << 3)
+#define FPUS_UE (1 << 4)
+#define FPUS_PE (1 << 5)
+#define FPUS_SF (1 << 6)
+#define FPUS_SE (1 << 7)
+#define FPUS_B  (1 << 15)
+
+#define FPUC_EM 0x3f
+
+extern const CPU86_LDouble f15rk[7];
+
+void helper_fldt_ST0_A0(void);
+void helper_fstt_ST0_A0(void);
+void fpu_raise_exception(void);
+CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b);
+void helper_fbld_ST0_A0(void);
+void helper_fbst_ST0_A0(void);
+void helper_f2xm1(void);
+void helper_fyl2x(void);
+void helper_fptan(void);
+void helper_fpatan(void);
+void helper_fxtract(void);
+void helper_fprem1(void);
+void helper_fprem(void);
+void helper_fyl2xp1(void);
+void helper_fsqrt(void);
+void helper_fsincos(void);
+void helper_frndint(void);
+void helper_fscale(void);
+void helper_fsin(void);
+void helper_fcos(void);
+void helper_fxam_ST0(void);
+void helper_fstenv(target_ulong ptr, int data32);
+void helper_fldenv(target_ulong ptr, int data32);
+void helper_fsave(target_ulong ptr, int data32);
+void helper_frstor(target_ulong ptr, int data32);
+void helper_fxsave(target_ulong ptr, int data64);
+void helper_fxrstor(target_ulong ptr, int data64);
+void restore_native_fp_state(CPUState *env);
+void save_native_fp_state(CPUState *env);
+float approx_rsqrt(float a);
+float approx_rcp(float a);
+void update_fp_status(void);
+void helper_hlt(void);
+void helper_monitor(void);
+void helper_mwait(void);
+
+extern const uint8_t parity_table[256];
+extern const uint8_t rclw_table[32];
+extern const uint8_t rclb_table[32];
+
+static inline uint32_t compute_eflags(void)
+{
+    return env->eflags | cc_table[CC_OP].compute_all() | (DF & DF_MASK);
+}
+
+/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
+static inline void load_eflags(int eflags, int update_mask)
+{
+    CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((eflags >> 10) & 1));
+    env->eflags = (env->eflags & ~update_mask) | 
+        (eflags & update_mask);
+}
+
+static inline void env_to_regs(void)
+{
+#ifdef reg_EAX
+    EAX = env->regs[R_EAX];
+#endif
+#ifdef reg_ECX
+    ECX = env->regs[R_ECX];
+#endif
+#ifdef reg_EDX
+    EDX = env->regs[R_EDX];
+#endif
+#ifdef reg_EBX
+    EBX = env->regs[R_EBX];
+#endif
+#ifdef reg_ESP
+    ESP = env->regs[R_ESP];
+#endif
+#ifdef reg_EBP
+    EBP = env->regs[R_EBP];
+#endif
+#ifdef reg_ESI
+    ESI = env->regs[R_ESI];
+#endif
+#ifdef reg_EDI
+    EDI = env->regs[R_EDI];
+#endif
+}
+
+static inline void regs_to_env(void)
+{
+#ifdef reg_EAX
+    env->regs[R_EAX] = EAX;
+#endif
+#ifdef reg_ECX
+    env->regs[R_ECX] = ECX;
+#endif
+#ifdef reg_EDX
+    env->regs[R_EDX] = EDX;
+#endif
+#ifdef reg_EBX
+    env->regs[R_EBX] = EBX;
+#endif
+#ifdef reg_ESP
+    env->regs[R_ESP] = ESP;
+#endif
+#ifdef reg_EBP
+    env->regs[R_EBP] = EBP;
+#endif
+#ifdef reg_ESI
+    env->regs[R_ESI] = ESI;
+#endif
+#ifdef reg_EDI
+    env->regs[R_EDI] = EDI;
+#endif
+}
Index: /trunk/src/recompiler_new/target-i386/helper.c
===================================================================
--- /trunk/src/recompiler_new/target-i386/helper.c	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/helper.c	(revision 13168)
@@ -0,0 +1,4844 @@
+/*
+ *  i386 helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#ifdef VBOX
+# include <VBox/err.h>
+#endif
+#include "exec.h"
+
+//#define DEBUG_PCALL
+
+#if 0
+#define raise_exception_err(a, b)\
+do {\
+    if (logfile)\
+        fprintf(logfile, "raise_exception line=%d\n", __LINE__);\
+    (raise_exception_err)(a, b);\
+} while (0)
+#endif
+
+const uint8_t parity_table[256] = {
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+};
+
+/* modulo 17 table */
+const uint8_t rclw_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8, 9,10,11,12,13,14,15,
+   16, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 9,10,11,12,13,14,
+};
+
+/* modulo 9 table */
+const uint8_t rclb_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 0, 1, 2, 3, 4, 5,
+    6, 7, 8, 0, 1, 2, 3, 4,
+};
+
+const CPU86_LDouble f15rk[7] =
+{
+    0.00000000000000000000L,
+    1.00000000000000000000L,
+    3.14159265358979323851L,  /*pi*/
+    0.30102999566398119523L,  /*lg2*/
+    0.69314718055994530943L,  /*ln2*/
+    1.44269504088896340739L,  /*l2e*/
+    3.32192809488736234781L,  /*l2t*/
+};
+
+/* thread support */
+
+spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+
+void cpu_lock(void)
+{
+    spin_lock(&global_cpu_lock);
+}
+
+void cpu_unlock(void)
+{
+    spin_unlock(&global_cpu_lock);
+}
+
+void cpu_loop_exit(void)
+{
+    /* NOTE: the register at this point must be saved by hand because
+       longjmp restore them */
+    regs_to_env();
+    longjmp(env->jmp_env, 1);
+}
+
+/* return non zero if error */
+static inline int load_segment(uint32_t *e1_ptr, uint32_t *e2_ptr,
+                               int selector)
+{
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    if (selector & 0x4)
+        dt = &env->ldt;
+    else
+        dt = &env->gdt;
+    index = selector & ~7;
+    if ((index + 7) > dt->limit)
+        return -1;
+    ptr = dt->base + index;
+    *e1_ptr = ldl_kernel(ptr);
+    *e2_ptr = ldl_kernel(ptr + 4);
+    return 0;
+}
+
+static inline unsigned int get_seg_limit(uint32_t e1, uint32_t e2)
+{
+    unsigned int limit;
+    limit = (e1 & 0xffff) | (e2 & 0x000f0000);
+    if (e2 & DESC_G_MASK)
+        limit = (limit << 12) | 0xfff;
+    return limit;
+}
+
+static inline uint32_t get_seg_base(uint32_t e1, uint32_t e2)
+{
+    return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
+}
+
+static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, uint32_t e2)
+{
+    sc->base = get_seg_base(e1, e2);
+    sc->limit = get_seg_limit(e1, e2);
+    sc->flags = e2;
+}
+
+/* init the segment cache in vm86 mode. */
+static inline void load_seg_vm(int seg, int selector)
+{
+    selector &= 0xffff;
+    cpu_x86_load_seg_cache(env, seg, selector,
+                           (selector << 4), 0xffff, 0);
+}
+
+static inline void get_ss_esp_from_tss(uint32_t *ss_ptr,
+                                       uint32_t *esp_ptr, int dpl)
+{
+    int type, index, shift;
+
+#if 0
+    {
+        int i;
+        printf("TR: base=%p limit=%x\n", env->tr.base, env->tr.limit);
+        for(i=0;i<env->tr.limit;i++) {
+            printf("%02x ", env->tr.base[i]);
+            if ((i & 7) == 7) printf("\n");
+        }
+        printf("\n");
+    }
+#endif
+
+    if (!(env->tr.flags & DESC_P_MASK))
+        cpu_abort(env, "invalid tss");
+    type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+    if ((type & 7) != 1)
+        cpu_abort(env, "invalid tss type %d", type);
+    shift = type >> 3;
+    index = (dpl * 4 + 2) << shift;
+    if (index + (4 << shift) - 1 > env->tr.limit)
+        raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+    if (shift == 0) {
+        *esp_ptr = lduw_kernel(env->tr.base + index);
+        *ss_ptr = lduw_kernel(env->tr.base + index + 2);
+    } else {
+        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *ss_ptr = lduw_kernel(env->tr.base + index + 4);
+    }
+}
+
+/* XXX: merge with load_seg() */
+static void tss_load_seg(int seg_reg, int selector)
+{
+    uint32_t e1, e2;
+    int rpl, dpl, cpl;
+
+    if ((selector & 0xfffc) != 0) {
+        if (load_segment(&e1, &e2, selector) != 0)
+            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+        if (!(e2 & DESC_S_MASK))
+            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+        rpl = selector & 3;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        cpl = env->hflags & HF_CPL_MASK;
+        if (seg_reg == R_CS) {
+            if (!(e2 & DESC_CS_MASK))
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            /* XXX: is it correct ? */
+            if (dpl != rpl)
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            if ((e2 & DESC_C_MASK) && dpl > rpl)
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+        } else if (seg_reg == R_SS) {
+            /* SS must be writable data */
+            if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            if (dpl != cpl || dpl != rpl)
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+        } else {
+            /* not readable code */
+            if ((e2 & DESC_CS_MASK) && !(e2 & DESC_R_MASK))
+                raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            /* if data or non conforming code, checks the rights */
+            if (((e2 >> DESC_TYPE_SHIFT) & 0xf) < 12) {
+                if (dpl < cpl || dpl < rpl)
+                    raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+            }
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+    } else {
+        if (seg_reg == R_SS || seg_reg == R_CS)
+            raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+    }
+}
+
+#define SWITCH_TSS_JMP  0
+#define SWITCH_TSS_IRET 1
+#define SWITCH_TSS_CALL 2
+
+/* XXX: restore CPU state in registers (PowerPC case) */
+static void switch_tss(int tss_selector,
+                       uint32_t e1, uint32_t e2, int source,
+                       uint32_t next_eip)
+{
+    int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i;
+    target_ulong tss_base;
+    uint32_t new_regs[8], new_segs[6];
+    uint32_t new_eflags, new_eip, new_cr3, new_ldt, new_trap;
+    uint32_t old_eflags, eflags_mask;
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+#ifdef DEBUG_PCALL
+    if (loglevel & CPU_LOG_PCALL)
+        fprintf(logfile, "switch_tss: sel=0x%04x type=%d src=%d\n", tss_selector, type, source);
+#endif
+
+#if defined(VBOX) && defined(DEBUG)
+    printf("switch_tss %x %x %x %d %08x\n", tss_selector, e1, e2, source, next_eip);
+#endif
+
+    /* if task gate, we read the TSS segment and we load it */
+    if (type == 5) {
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
+        tss_selector = e1 >> 16;
+        if (tss_selector & 4)
+            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+        if (load_segment(&e1, &e2, tss_selector) != 0)
+            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+        if (e2 & DESC_S_MASK)
+            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        if ((type & 7) != 1)
+            raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+    }
+
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
+
+    if (type & 8)
+        tss_limit_max = 103;
+    else
+        tss_limit_max = 43;
+    tss_limit = get_seg_limit(e1, e2);
+    tss_base = get_seg_base(e1, e2);
+    if ((tss_selector & 4) != 0 ||
+        tss_limit < tss_limit_max)
+        raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+    old_type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+    if (old_type & 8)
+        old_tss_limit_max = 103;
+    else
+        old_tss_limit_max = 43;
+
+    /* read all the registers from the new TSS */
+    if (type & 8) {
+        /* 32 bit */
+        new_cr3 = ldl_kernel(tss_base + 0x1c);
+        new_eip = ldl_kernel(tss_base + 0x20);
+        new_eflags = ldl_kernel(tss_base + 0x24);
+        for(i = 0; i < 8; i++)
+            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+        for(i = 0; i < 6; i++)
+            new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
+        new_ldt = lduw_kernel(tss_base + 0x60);
+        new_trap = ldl_kernel(tss_base + 0x64);
+    } else {
+        /* 16 bit */
+        new_cr3 = 0;
+        new_eip = lduw_kernel(tss_base + 0x0e);
+        new_eflags = lduw_kernel(tss_base + 0x10);
+        for(i = 0; i < 8; i++)
+            new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000;
+        for(i = 0; i < 4; i++)
+            new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 4));
+        new_ldt = lduw_kernel(tss_base + 0x2a);
+        new_segs[R_FS] = 0;
+        new_segs[R_GS] = 0;
+        new_trap = 0;
+    }
+
+    /* NOTE: we must avoid memory exceptions during the task switch,
+       so we make dummy accesses before */
+    /* XXX: it can still fail in some cases, so a bigger hack is
+       necessary to valid the TLB after having done the accesses */
+
+    v1 = ldub_kernel(env->tr.base);
+    v2 = ldub_kernel(env->tr.base + old_tss_limit_max);
+    stb_kernel(env->tr.base, v1);
+    stb_kernel(env->tr.base + old_tss_limit_max, v2);
+
+    /* clear busy bit (it is restartable) */
+    if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) {
+        target_ulong ptr;
+        uint32_t e2;
+        ptr = env->gdt.base + (env->tr.selector & ~7);
+        e2 = ldl_kernel(ptr + 4);
+        e2 &= ~DESC_TSS_BUSY_MASK;
+        stl_kernel(ptr + 4, e2);
+    }
+    old_eflags = compute_eflags();
+    if (source == SWITCH_TSS_IRET)
+        old_eflags &= ~NT_MASK;
+
+    /* save the current state in the old TSS */
+    if (type & 8) {
+        /* 32 bit */
+        stl_kernel(env->tr.base + 0x20, next_eip);
+        stl_kernel(env->tr.base + 0x24, old_eflags);
+        stl_kernel(env->tr.base + (0x28 + 0 * 4), EAX);
+        stl_kernel(env->tr.base + (0x28 + 1 * 4), ECX);
+        stl_kernel(env->tr.base + (0x28 + 2 * 4), EDX);
+        stl_kernel(env->tr.base + (0x28 + 3 * 4), EBX);
+        stl_kernel(env->tr.base + (0x28 + 4 * 4), ESP);
+        stl_kernel(env->tr.base + (0x28 + 5 * 4), EBP);
+        stl_kernel(env->tr.base + (0x28 + 6 * 4), ESI);
+        stl_kernel(env->tr.base + (0x28 + 7 * 4), EDI);
+        for(i = 0; i < 6; i++)
+            stw_kernel(env->tr.base + (0x48 + i * 4), env->segs[i].selector);
+#if defined(VBOX) && defined(DEBUG)
+        printf("TSS 32 bits switch\n");
+        printf("Saving CS=%08X\n", env->segs[R_CS].selector);
+#endif
+    } else {
+        /* 16 bit */
+        stw_kernel(env->tr.base + 0x0e, next_eip);
+        stw_kernel(env->tr.base + 0x10, old_eflags);
+        stw_kernel(env->tr.base + (0x12 + 0 * 2), EAX);
+        stw_kernel(env->tr.base + (0x12 + 1 * 2), ECX);
+        stw_kernel(env->tr.base + (0x12 + 2 * 2), EDX);
+        stw_kernel(env->tr.base + (0x12 + 3 * 2), EBX);
+        stw_kernel(env->tr.base + (0x12 + 4 * 2), ESP);
+        stw_kernel(env->tr.base + (0x12 + 5 * 2), EBP);
+        stw_kernel(env->tr.base + (0x12 + 6 * 2), ESI);
+        stw_kernel(env->tr.base + (0x12 + 7 * 2), EDI);
+        for(i = 0; i < 4; i++)
+            stw_kernel(env->tr.base + (0x22 + i * 4), env->segs[i].selector);
+    }
+
+    /* now if an exception occurs, it will occurs in the next task
+       context */
+
+    if (source == SWITCH_TSS_CALL) {
+        stw_kernel(tss_base, env->tr.selector);
+        new_eflags |= NT_MASK;
+    }
+
+    /* set busy bit */
+    if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) {
+        target_ulong ptr;
+        uint32_t e2;
+        ptr = env->gdt.base + (tss_selector & ~7);
+        e2 = ldl_kernel(ptr + 4);
+        e2 |= DESC_TSS_BUSY_MASK;
+        stl_kernel(ptr + 4, e2);
+    }
+
+    /* set the new CPU state */
+    /* from this point, any exception which occurs can give problems */
+    env->cr[0] |= CR0_TS_MASK;
+    env->hflags |= HF_TS_MASK;
+    env->tr.selector = tss_selector;
+    env->tr.base = tss_base;
+    env->tr.limit = tss_limit;
+    env->tr.flags = e2 & ~DESC_TSS_BUSY_MASK;
+
+    if ((type & 8) && (env->cr[0] & CR0_PG_MASK)) {
+        cpu_x86_update_cr3(env, new_cr3);
+    }
+
+    /* load all registers without an exception, then reload them with
+       possible exception */
+    env->eip = new_eip;
+    eflags_mask = TF_MASK | AC_MASK | ID_MASK |
+        IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK;
+    if (!(type & 8))
+        eflags_mask &= 0xffff;
+    load_eflags(new_eflags, eflags_mask);
+    /* XXX: what to do in 16 bit case ? */
+    EAX = new_regs[0];
+    ECX = new_regs[1];
+    EDX = new_regs[2];
+    EBX = new_regs[3];
+    ESP = new_regs[4];
+    EBP = new_regs[5];
+    ESI = new_regs[6];
+    EDI = new_regs[7];
+    if (new_eflags & VM_MASK) {
+        for(i = 0; i < 6; i++)
+            load_seg_vm(i, new_segs[i]);
+        /* in vm86, CPL is always 3 */
+        cpu_x86_set_cpl(env, 3);
+    } else {
+        /* CPL is set the RPL of CS */
+        cpu_x86_set_cpl(env, new_segs[R_CS] & 3);
+        /* first just selectors as the rest may trigger exceptions */
+        for(i = 0; i < 6; i++)
+            cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0);
+    }
+
+    env->ldt.selector = new_ldt & ~4;
+    env->ldt.base = 0;
+    env->ldt.limit = 0;
+    env->ldt.flags = 0;
+
+    /* load the LDT */
+    if (new_ldt & 4)
+        raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+
+    if ((new_ldt & 0xfffc) != 0) {
+        dt = &env->gdt;
+        index = new_ldt & ~7;
+        if ((index + 7) > dt->limit)
+            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+        if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+        load_seg_cache_raw_dt(&env->ldt, e1, e2);
+    }
+
+    /* load the segments */
+    if (!(new_eflags & VM_MASK)) {
+        tss_load_seg(R_CS, new_segs[R_CS]);
+        tss_load_seg(R_SS, new_segs[R_SS]);
+        tss_load_seg(R_ES, new_segs[R_ES]);
+        tss_load_seg(R_DS, new_segs[R_DS]);
+        tss_load_seg(R_FS, new_segs[R_FS]);
+        tss_load_seg(R_GS, new_segs[R_GS]);
+    }
+
+    /* check that EIP is in the CS segment limits */
+    if (new_eip > env->segs[R_CS].limit) {
+        /* XXX: different exception if CALL ? */
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+}
+
+/* check if Port I/O is allowed in TSS */
+static inline void check_io(int addr, int size)
+{
+    int io_offset, val, mask;
+
+    /* TSS must be a valid 32 bit one */
+    if (!(env->tr.flags & DESC_P_MASK) ||
+        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
+        env->tr.limit < 103)
+        goto fail;
+    io_offset = lduw_kernel(env->tr.base + 0x66);
+    io_offset += (addr >> 3);
+    /* Note: the check needs two bytes */
+    if ((io_offset + 1) > env->tr.limit)
+        goto fail;
+    val = lduw_kernel(env->tr.base + io_offset);
+    val >>= (addr & 7);
+    mask = (1 << size) - 1;
+    /* all bits must be zero to allow the I/O */
+    if ((val & mask) != 0) {
+    fail:
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+}
+
+void check_iob_T0(void)
+{
+    check_io(T0, 1);
+}
+
+void check_iow_T0(void)
+{
+    check_io(T0, 2);
+}
+
+void check_iol_T0(void)
+{
+    check_io(T0, 4);
+}
+
+void check_iob_DX(void)
+{
+    check_io(EDX & 0xffff, 1);
+}
+
+void check_iow_DX(void)
+{
+    check_io(EDX & 0xffff, 2);
+}
+
+void check_iol_DX(void)
+{
+    check_io(EDX & 0xffff, 4);
+}
+
+static inline unsigned int get_sp_mask(unsigned int e2)
+{
+    if (e2 & DESC_B_MASK)
+        return 0xffffffff;
+    else
+        return 0xffff;
+}
+
+#ifdef TARGET_X86_64
+#define SET_ESP(val, sp_mask)\
+do {\
+    if ((sp_mask) == 0xffff)\
+        ESP = (ESP & ~0xffff) | ((val) & 0xffff);\
+    else if ((sp_mask) == 0xffffffffLL)\
+        ESP = (uint32_t)(val);\
+    else\
+        ESP = (val);\
+} while (0)
+#else
+#define SET_ESP(val, sp_mask) ESP = (ESP & ~(sp_mask)) | ((val) & (sp_mask))
+#endif
+
+/* XXX: add a is_user flag to have proper security support */
+#define PUSHW(ssp, sp, sp_mask, val)\
+{\
+    sp -= 2;\
+    stw_kernel((ssp) + (sp & (sp_mask)), (val));\
+}
+
+#define PUSHL(ssp, sp, sp_mask, val)\
+{\
+    sp -= 4;\
+    stl_kernel((ssp) + (sp & (sp_mask)), (val));\
+}
+
+#define POPW(ssp, sp, sp_mask, val)\
+{\
+    val = lduw_kernel((ssp) + (sp & (sp_mask)));\
+    sp += 2;\
+}
+
+#define POPL(ssp, sp, sp_mask, val)\
+{\
+    val = (uint32_t)ldl_kernel((ssp) + (sp & (sp_mask)));\
+    sp += 4;\
+}
+
+/* protected mode interrupt */
+static void do_interrupt_protected(int intno, int is_int, int error_code,
+                                   unsigned int next_eip, int is_hw)
+{
+    SegmentCache *dt;
+    target_ulong ptr, ssp;
+    int type, dpl, selector, ss_dpl, cpl;
+    int has_error_code, new_stack, shift;
+    uint32_t e1, e2, offset, ss, esp, ss_e1, ss_e2;
+    uint32_t old_eip, sp_mask;
+
+#ifdef VBOX
+    if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS)
+        cpu_loop_exit();
+#endif
+
+    has_error_code = 0;
+    if (!is_int && !is_hw) {
+        switch(intno) {
+        case 8:
+        case 10:
+        case 11:
+        case 12:
+        case 13:
+        case 14:
+        case 17:
+            has_error_code = 1;
+            break;
+        }
+    }
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+
+    dt = &env->idt;
+    if (intno * 8 + 7 > dt->limit)
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+    ptr = dt->base + intno * 8;
+    e1 = ldl_kernel(ptr);
+    e2 = ldl_kernel(ptr + 4);
+    /* check gate type */
+    type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+    switch(type) {
+    case 5: /* task gate */
+        /* must do that check here to return the correct error code */
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
+        switch_tss(intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
+        if (has_error_code) {
+            int type;
+            uint32_t mask;
+            /* push the error code */
+            type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+            shift = type >> 3;
+            if (env->segs[R_SS].flags & DESC_B_MASK)
+                mask = 0xffffffff;
+            else
+                mask = 0xffff;
+            esp = (ESP - (2 << shift)) & mask;
+            ssp = env->segs[R_SS].base + esp;
+            if (shift)
+                stl_kernel(ssp, error_code);
+            else
+                stw_kernel(ssp, error_code);
+            SET_ESP(esp, mask);
+        }
+        return;
+    case 6: /* 286 interrupt gate */
+    case 7: /* 286 trap gate */
+    case 14: /* 386 interrupt gate */
+    case 15: /* 386 trap gate */
+        break;
+    default:
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+        break;
+    }
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privledge if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+    /* check valid bit */
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
+    selector = e1 >> 16;
+    offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+    if ((selector & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, 0);
+
+    if (load_segment(&e1, &e2, selector) != 0)
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (dpl > cpl)
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+    if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+        /* to inner priviledge */
+        get_ss_esp_from_tss(&ss, &esp, dpl);
+        if ((ss & 0xfffc) == 0)
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        if ((ss & 3) != dpl)
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        if (load_segment(&ss_e1, &ss_e2, ss) != 0)
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+        if (ss_dpl != dpl)
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        if (!(ss_e2 & DESC_S_MASK) ||
+            (ss_e2 & DESC_CS_MASK) ||
+            !(ss_e2 & DESC_W_MASK))
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+        if (!(ss_e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-477 of 253666.pdf */
+            raise_exception_err(EXCP0C_STACK, ss & 0xfffc);
+#else
+            raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+#endif
+        new_stack = 1;
+        sp_mask = get_sp_mask(ss_e2);
+        ssp = get_seg_base(ss_e1, ss_e2);
+#if defined(VBOX) && defined(DEBUG)
+        printf("new stack %04X:%08X gate dpl=%d\n", ss, esp, dpl);
+#endif
+    } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+        /* to same priviledge */
+        if (env->eflags & VM_MASK)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0;
+        sp_mask = get_sp_mask(env->segs[R_SS].flags);
+        ssp = env->segs[R_SS].base;
+        esp = ESP;
+        dpl = cpl;
+    } else {
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0; /* avoid warning */
+        sp_mask = 0; /* avoid warning */
+        ssp = 0; /* avoid warning */
+        esp = 0; /* avoid warning */
+    }
+
+    shift = type >> 3;
+
+#if 0
+    /* XXX: check that enough room is available */
+    push_size = 6 + (new_stack << 2) + (has_error_code << 1);
+    if (env->eflags & VM_MASK)
+        push_size += 8;
+    push_size <<= shift;
+#endif
+    if (shift == 1) {
+        if (new_stack) {
+            if (env->eflags & VM_MASK) {
+                PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
+                PUSHL(ssp, esp, sp_mask, env->segs[R_ES].selector);
+            }
+            PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector);
+            PUSHL(ssp, esp, sp_mask, ESP);
+        }
+        PUSHL(ssp, esp, sp_mask, compute_eflags());
+        PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector);
+        PUSHL(ssp, esp, sp_mask, old_eip);
+        if (has_error_code) {
+            PUSHL(ssp, esp, sp_mask, error_code);
+        }
+    } else {
+        if (new_stack) {
+            if (env->eflags & VM_MASK) {
+                PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
+                PUSHW(ssp, esp, sp_mask, env->segs[R_ES].selector);
+            }
+            PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector);
+            PUSHW(ssp, esp, sp_mask, ESP);
+        }
+        PUSHW(ssp, esp, sp_mask, compute_eflags());
+        PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector);
+        PUSHW(ssp, esp, sp_mask, old_eip);
+        if (has_error_code) {
+            PUSHW(ssp, esp, sp_mask, error_code);
+        }
+    }
+
+    if (new_stack) {
+        if (env->eflags & VM_MASK) {
+            cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
+            cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0, 0);
+        }
+        ss = (ss & ~3) | dpl;
+        cpu_x86_load_seg_cache(env, R_SS, ss,
+                               ssp, get_seg_limit(ss_e1, ss_e2), ss_e2);
+    }
+    SET_ESP(esp, sp_mask);
+
+    selector = (selector & ~3) | dpl;
+    cpu_x86_load_seg_cache(env, R_CS, selector,
+                   get_seg_base(e1, e2),
+                   get_seg_limit(e1, e2),
+                   e2);
+    cpu_x86_set_cpl(env, dpl);
+    env->eip = offset;
+
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+}
+
+#ifdef VBOX
+
+/* check if VME interrupt redirection is enabled in TSS */
+static inline bool is_vme_irq_redirected(int intno)
+{
+    int io_offset, intredir_offset;
+    unsigned char val, mask;
+
+    /* TSS must be a valid 32 bit one */
+    if (!(env->tr.flags & DESC_P_MASK) ||
+        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
+        env->tr.limit < 103)
+        goto fail;
+    io_offset = lduw_kernel(env->tr.base + 0x66);
+    /* the virtual interrupt redirection bitmap is located below the io bitmap */
+    intredir_offset = io_offset - 0x20;
+
+    intredir_offset += (intno >> 3);
+    if ((intredir_offset) > env->tr.limit)
+        goto fail;
+
+    val = ldub_kernel(env->tr.base + intredir_offset);
+    mask = 1 << (unsigned char)(intno & 7);
+
+    /* bit set means no redirection. */
+    if ((val & mask) != 0) {
+        return false;
+    }
+    return true;
+
+fail:
+    raise_exception_err(EXCP0D_GPF, 0);
+    return true;
+}
+
+/* V86 mode software interrupt with CR4.VME=1 */
+static void do_soft_interrupt_vme(int intno, int error_code, unsigned int next_eip)
+{
+    target_ulong ptr, ssp;
+    int selector;
+    uint32_t offset, esp;
+    uint32_t old_cs, old_eflags;
+    uint32_t iopl;
+
+    iopl = ((env->eflags >> IOPL_SHIFT) & 3);
+
+    if (!is_vme_irq_redirected(intno))
+    {
+        if (iopl == 3)
+            /* normal protected mode handler call */
+            return do_interrupt_protected(intno, 1, error_code, next_eip, 0);
+        else
+            raise_exception_err(EXCP0D_GPF, 0);
+    }
+
+    /* virtual mode idt is at linear address 0 */
+    ptr = 0 + intno * 4;
+    offset = lduw_kernel(ptr);
+    selector = lduw_kernel(ptr + 2);
+    esp = ESP;
+    ssp = env->segs[R_SS].base;
+    old_cs = env->segs[R_CS].selector;
+
+    old_eflags = compute_eflags();
+    if (iopl < 3)
+    {
+        /* copy VIF into IF and set IOPL to 3 */
+        if (env->eflags & VIF_MASK)
+            old_eflags |= IF_MASK;
+        else
+            old_eflags &= ~IF_MASK;
+
+        old_eflags |= (3 << IOPL_SHIFT);
+    }
+
+    /* XXX: use SS segment size ? */
+    PUSHW(ssp, esp, 0xffff, old_eflags);
+    PUSHW(ssp, esp, 0xffff, old_cs);
+    PUSHW(ssp, esp, 0xffff, next_eip);
+
+    /* update processor state */
+    ESP = (ESP & ~0xffff) | (esp & 0xffff);
+    env->eip = offset;
+    env->segs[R_CS].selector = selector;
+    env->segs[R_CS].base = (selector << 4);
+    env->eflags &= ~(TF_MASK | RF_MASK);
+
+    if (iopl < 3)
+        env->eflags &= ~VIF_MASK;
+    else
+        env->eflags &= ~IF_MASK;
+}
+#endif /* VBOX */
+
+#ifdef TARGET_X86_64
+
+#define PUSHQ(sp, val)\
+{\
+    sp -= 8;\
+    stq_kernel(sp, (val));\
+}
+
+#define POPQ(sp, val)\
+{\
+    val = ldq_kernel(sp);\
+    sp += 8;\
+}
+
+static inline target_ulong get_rsp_from_tss(int level)
+{
+    int index;
+
+#if 0
+    printf("TR: base=" TARGET_FMT_lx " limit=%x\n",
+           env->tr.base, env->tr.limit);
+#endif
+
+    if (!(env->tr.flags & DESC_P_MASK))
+        cpu_abort(env, "invalid tss");
+    index = 8 * level + 4;
+    if ((index + 7) > env->tr.limit)
+        raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+    return ldq_kernel(env->tr.base + index);
+}
+
+/* 64 bit interrupt */
+static void do_interrupt64(int intno, int is_int, int error_code,
+                           target_ulong next_eip, int is_hw)
+{
+    SegmentCache *dt;
+    target_ulong ptr;
+    int type, dpl, selector, cpl, ist;
+    int has_error_code, new_stack;
+    uint32_t e1, e2, e3, ss;
+    target_ulong old_eip, esp, offset;
+
+#ifdef VBOX
+    if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS)
+        cpu_loop_exit();
+#endif
+
+    has_error_code = 0;
+    if (!is_int && !is_hw) {
+        switch(intno) {
+        case 8:
+        case 10:
+        case 11:
+        case 12:
+        case 13:
+        case 14:
+        case 17:
+            has_error_code = 1;
+            break;
+        }
+    }
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+
+    dt = &env->idt;
+    if (intno * 16 + 15 > dt->limit)
+        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+    ptr = dt->base + intno * 16;
+    e1 = ldl_kernel(ptr);
+    e2 = ldl_kernel(ptr + 4);
+    e3 = ldl_kernel(ptr + 8);
+    /* check gate type */
+    type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+    switch(type) {
+    case 14: /* 386 interrupt gate */
+    case 15: /* 386 trap gate */
+        break;
+    default:
+        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+        break;
+    }
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privledge if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+    /* check valid bit */
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, intno * 16 + 2);
+    selector = e1 >> 16;
+    offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+    ist = e2 & 7;
+    if ((selector & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, 0);
+
+    if (load_segment(&e1, &e2, selector) != 0)
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (dpl > cpl)
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if (!(e2 & DESC_P_MASK))
+        raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+    if (!(e2 & DESC_L_MASK) || (e2 & DESC_B_MASK))
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+    if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) {
+        /* to inner priviledge */
+        if (ist != 0)
+            esp = get_rsp_from_tss(ist + 3);
+        else
+            esp = get_rsp_from_tss(dpl);
+        esp &= ~0xfLL; /* align stack */
+        ss = 0;
+        new_stack = 1;
+    } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+        /* to same priviledge */
+        if (env->eflags & VM_MASK)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0;
+        if (ist != 0)
+            esp = get_rsp_from_tss(ist + 3);
+        else
+            esp = ESP;
+        esp &= ~0xfLL; /* align stack */
+        dpl = cpl;
+    } else {
+        raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        new_stack = 0; /* avoid warning */
+        esp = 0; /* avoid warning */
+    }
+
+    PUSHQ(esp, env->segs[R_SS].selector);
+    PUSHQ(esp, ESP);
+    PUSHQ(esp, compute_eflags());
+    PUSHQ(esp, env->segs[R_CS].selector);
+    PUSHQ(esp, old_eip);
+    if (has_error_code) {
+        PUSHQ(esp, error_code);
+    }
+
+    if (new_stack) {
+        ss = 0 | dpl;
+        cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
+    }
+    ESP = esp;
+
+    selector = (selector & ~3) | dpl;
+    cpu_x86_load_seg_cache(env, R_CS, selector,
+                   get_seg_base(e1, e2),
+                   get_seg_limit(e1, e2),
+                   e2);
+    cpu_x86_set_cpl(env, dpl);
+    env->eip = offset;
+
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+}
+#endif
+
+void helper_syscall(int next_eip_addend)
+{
+    int selector;
+
+    if (!(env->efer & MSR_EFER_SCE)) {
+        raise_exception_err(EXCP06_ILLOP, 0);
+    }
+    selector = (env->star >> 32) & 0xffff;
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_LMA_MASK) {
+        int code64;
+
+        ECX = env->eip + next_eip_addend;
+        env->regs[11] = compute_eflags();
+
+        code64 = env->hflags & HF_CS64_MASK;
+
+        cpu_x86_set_cpl(env, 0);
+        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+                           0, 0xffffffff,
+                               DESC_G_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags &= ~env->fmask;
+        load_eflags(env->eflags, 0);
+        if (code64)
+            env->eip = env->lstar;
+        else
+            env->eip = env->cstar;
+    } else
+#endif
+    {
+        ECX = (uint32_t)(env->eip + next_eip_addend);
+
+        cpu_x86_set_cpl(env, 0);
+        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+                           0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
+        env->eip = (uint32_t)env->star;
+    }
+}
+
+void helper_sysret(int dflag)
+{
+    int cpl, selector;
+
+    if (!(env->efer & MSR_EFER_SCE)) {
+        raise_exception_err(EXCP06_ILLOP, 0);
+    }
+    cpl = env->hflags & HF_CPL_MASK;
+    if (!(env->cr[0] & CR0_PE_MASK) || cpl != 0) {
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+    selector = (env->star >> 48) & 0xffff;
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_LMA_MASK) {
+        if (dflag == 2) {
+            cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3,
+                                   0, 0xffffffff,
+                                   DESC_G_MASK | DESC_P_MASK |
+                                   DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                                   DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+                                   DESC_L_MASK);
+            env->eip = ECX;
+        } else {
+            cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+                                   0, 0xffffffff,
+                                   DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                                   DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                                   DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+            env->eip = (uint32_t)ECX;
+        }
+        cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+        load_eflags((uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK |
+                    IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK);
+        cpu_x86_set_cpl(env, 3);
+    } else
+#endif
+    {
+        cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        env->eip = (uint32_t)ECX;
+        cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eflags |= IF_MASK;
+        cpu_x86_set_cpl(env, 3);
+    }
+#ifdef USE_KQEMU
+    if (kqemu_is_ok(env)) {
+        if (env->hflags & HF_LMA_MASK)
+            CC_OP = CC_OP_EFLAGS;
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+#ifdef VBOX
+/**
+ * Checks and processes external VMM events.
+ * Called by op_check_external_event() when any of the flags is set and can be serviced.
+ */
+void helper_external_event(void)
+{
+#if defined(RT_OS_DARWIN) && defined(VBOX_STRICT)
+    uintptr_t uESP;
+    __asm__ __volatile__("movl %%esp, %0" : "=r" (uESP));
+    AssertMsg(!(uESP & 15), ("esp=%#p\n", uESP));
+#endif
+    if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD)
+    {
+        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_EXTERNAL_HARD);
+        cpu_interrupt(env, CPU_INTERRUPT_HARD);
+    }
+    if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_EXIT)
+    {
+        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_EXTERNAL_EXIT);
+        cpu_interrupt(env, CPU_INTERRUPT_EXIT);
+    }
+    if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_DMA)
+    {
+        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_EXTERNAL_DMA);
+        remR3DmaRun(env);
+    }
+    if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_TIMER)
+    {
+        ASMAtomicAndS32(&env->interrupt_request, ~CPU_INTERRUPT_EXTERNAL_TIMER);
+        remR3TimersRun(env);
+    }
+}
+/* helper for recording call instruction addresses for later scanning */
+void helper_record_call()
+{
+    if (    !(env->state & CPU_RAW_RING0)
+        &&  (env->cr[0] & CR0_PG_MASK)
+        &&  !(env->eflags & X86_EFL_IF))
+        remR3RecordCall(env);
+}
+#endif /* VBOX */
+
+/* real mode interrupt */
+static void do_interrupt_real(int intno, int is_int, int error_code,
+                              unsigned int next_eip)
+{
+    SegmentCache *dt;
+    target_ulong ptr, ssp;
+    int selector;
+    uint32_t offset, esp;
+    uint32_t old_cs, old_eip;
+
+    /* real mode (simpler !) */
+    dt = &env->idt;
+    if (intno * 4 + 3 > dt->limit)
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+    ptr = dt->base + intno * 4;
+    offset = lduw_kernel(ptr);
+    selector = lduw_kernel(ptr + 2);
+    esp = ESP;
+    ssp = env->segs[R_SS].base;
+    if (is_int)
+        old_eip = next_eip;
+    else
+        old_eip = env->eip;
+    old_cs = env->segs[R_CS].selector;
+    /* XXX: use SS segment size ? */
+    PUSHW(ssp, esp, 0xffff, compute_eflags());
+    PUSHW(ssp, esp, 0xffff, old_cs);
+    PUSHW(ssp, esp, 0xffff, old_eip);
+
+    /* update processor state */
+    ESP = (ESP & ~0xffff) | (esp & 0xffff);
+    env->eip = offset;
+    env->segs[R_CS].selector = selector;
+    env->segs[R_CS].base = (selector << 4);
+    env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK);
+}
+
+/* fake user mode interrupt */
+void do_interrupt_user(int intno, int is_int, int error_code,
+                       target_ulong next_eip)
+{
+    SegmentCache *dt;
+    target_ulong ptr;
+    int dpl, cpl;
+    uint32_t e2;
+
+    dt = &env->idt;
+    ptr = dt->base + (intno * 8);
+    e2 = ldl_kernel(ptr + 4);
+
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    /* check privledge if software int */
+    if (is_int && dpl < cpl)
+        raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+
+    /* Since we emulate only user space, we cannot do more than
+       exiting the emulation with the suitable exception and error
+       code */
+    if (is_int)
+        EIP = next_eip;
+}
+
+/*
+ * Begin execution of an interruption. is_int is TRUE if coming from
+ * the int instruction. next_eip is the EIP value AFTER the interrupt
+ * instruction. It is only relevant if is_int is TRUE.
+ */
+void do_interrupt(int intno, int is_int, int error_code,
+                  target_ulong next_eip, int is_hw)
+{
+    if (loglevel & CPU_LOG_INT) {
+        if ((env->cr[0] & CR0_PE_MASK)) {
+            static int count;
+            fprintf(logfile, "%6d: v=%02x e=%04x i=%d cpl=%d IP=%04x:" TARGET_FMT_lx " pc=" TARGET_FMT_lx " SP=%04x:" TARGET_FMT_lx,
+                    count, intno, error_code, is_int,
+                    env->hflags & HF_CPL_MASK,
+                    env->segs[R_CS].selector, EIP,
+                    (int)env->segs[R_CS].base + EIP,
+                    env->segs[R_SS].selector, ESP);
+            if (intno == 0x0e) {
+                fprintf(logfile, " CR2=" TARGET_FMT_lx, env->cr[2]);
+            } else {
+                fprintf(logfile, " EAX=" TARGET_FMT_lx, EAX);
+            }
+            fprintf(logfile, "\n");
+            cpu_dump_state(env, logfile, fprintf, X86_DUMP_CCOP);
+#if 0
+            {
+                int i;
+                uint8_t *ptr;
+                fprintf(logfile, "       code=");
+                ptr = env->segs[R_CS].base + env->eip;
+                for(i = 0; i < 16; i++) {
+                    fprintf(logfile, " %02x", ldub(ptr + i));
+                }
+                fprintf(logfile, "\n");
+            }
+#endif
+            count++;
+        }
+    }
+    if (env->cr[0] & CR0_PE_MASK) {
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            do_interrupt64(intno, is_int, error_code, next_eip, is_hw);
+        } else
+#endif
+        {
+#ifdef VBOX
+            /* int xx *, v86 code and VME enabled? */
+            if (    (env->eflags & VM_MASK)
+                &&  (env->cr[4] & CR4_VME_MASK)
+                &&  is_int
+                &&  !is_hw
+                &&  env->eip + 1 != next_eip /* single byte int 3 goes straight to the protected mode handler */
+               )
+                do_soft_interrupt_vme(intno, error_code, next_eip);
+            else
+#endif /* VBOX */
+                do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw);
+        }
+    } else {
+        do_interrupt_real(intno, is_int, error_code, next_eip);
+    }
+}
+
+/*
+ * Signal an interruption. It is executed in the main CPU loop.
+ * is_int is TRUE if coming from the int instruction. next_eip is the
+ * EIP value AFTER the interrupt instruction. It is only relevant if
+ * is_int is TRUE.
+ */
+void raise_interrupt(int intno, int is_int, int error_code,
+                     int next_eip_addend)
+{
+#if defined(VBOX) && defined(DEBUG)
+    NOT_DMIK(Log2(("raise_interrupt: %x %x %x %VGv\n", intno, is_int, error_code, env->eip + next_eip_addend)));
+#endif
+    env->exception_index = intno;
+    env->error_code = error_code;
+    env->exception_is_int = is_int;
+    env->exception_next_eip = env->eip + next_eip_addend;
+    cpu_loop_exit();
+}
+
+/* same as raise_exception_err, but do not restore global registers */
+static void raise_exception_err_norestore(int exception_index, int error_code)
+{
+    env->exception_index = exception_index;
+    env->error_code = error_code;
+    env->exception_is_int = 0;
+    env->exception_next_eip = 0;
+    longjmp(env->jmp_env, 1);
+}
+
+/* shortcuts to generate exceptions */
+
+void (raise_exception_err)(int exception_index, int error_code)
+{
+    raise_interrupt(exception_index, 0, error_code, 0);
+}
+
+void raise_exception(int exception_index)
+{
+    raise_interrupt(exception_index, 0, 0, 0);
+}
+
+/* SMM support */
+
+#if defined(CONFIG_USER_ONLY)
+
+void do_smm_enter(void)
+{
+}
+
+void helper_rsm(void)
+{
+}
+
+#else
+
+#ifdef TARGET_X86_64
+#define SMM_REVISION_ID 0x00020064
+#else
+#define SMM_REVISION_ID 0x00020000
+#endif
+
+void do_smm_enter(void)
+{
+#ifdef VBOX
+    cpu_abort(env, "do_ssm_enter");
+#else /* !VBOX */
+    target_ulong sm_state;
+    SegmentCache *dt;
+    int i, offset;
+
+    if (loglevel & CPU_LOG_INT) {
+        fprintf(logfile, "SMM: enter\n");
+        cpu_dump_state(env, logfile, fprintf, X86_DUMP_CCOP);
+    }
+
+    env->hflags |= HF_SMM_MASK;
+    cpu_smm_update(env);
+
+    sm_state = env->smbase + 0x8000;
+
+#ifdef TARGET_X86_64
+    for(i = 0; i < 6; i++) {
+        dt = &env->segs[i];
+        offset = 0x7e00 + i * 16;
+        stw_phys(sm_state + offset, dt->selector);
+        stw_phys(sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff);
+        stl_phys(sm_state + offset + 4, dt->limit);
+        stq_phys(sm_state + offset + 8, dt->base);
+    }
+
+    stq_phys(sm_state + 0x7e68, env->gdt.base);
+    stl_phys(sm_state + 0x7e64, env->gdt.limit);
+
+    stw_phys(sm_state + 0x7e70, env->ldt.selector);
+    stq_phys(sm_state + 0x7e78, env->ldt.base);
+    stl_phys(sm_state + 0x7e74, env->ldt.limit);
+    stw_phys(sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff);
+
+    stq_phys(sm_state + 0x7e88, env->idt.base);
+    stl_phys(sm_state + 0x7e84, env->idt.limit);
+
+    stw_phys(sm_state + 0x7e90, env->tr.selector);
+    stq_phys(sm_state + 0x7e98, env->tr.base);
+    stl_phys(sm_state + 0x7e94, env->tr.limit);
+    stw_phys(sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff);
+
+    stq_phys(sm_state + 0x7ed0, env->efer);
+
+    stq_phys(sm_state + 0x7ff8, EAX);
+    stq_phys(sm_state + 0x7ff0, ECX);
+    stq_phys(sm_state + 0x7fe8, EDX);
+    stq_phys(sm_state + 0x7fe0, EBX);
+    stq_phys(sm_state + 0x7fd8, ESP);
+    stq_phys(sm_state + 0x7fd0, EBP);
+    stq_phys(sm_state + 0x7fc8, ESI);
+    stq_phys(sm_state + 0x7fc0, EDI);
+    for(i = 8; i < 16; i++)
+        stq_phys(sm_state + 0x7ff8 - i * 8, env->regs[i]);
+    stq_phys(sm_state + 0x7f78, env->eip);
+    stl_phys(sm_state + 0x7f70, compute_eflags());
+    stl_phys(sm_state + 0x7f68, env->dr[6]);
+    stl_phys(sm_state + 0x7f60, env->dr[7]);
+
+    stl_phys(sm_state + 0x7f48, env->cr[4]);
+    stl_phys(sm_state + 0x7f50, env->cr[3]);
+    stl_phys(sm_state + 0x7f58, env->cr[0]);
+
+    stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+    stl_phys(sm_state + 0x7f00, env->smbase);
+#else
+    stl_phys(sm_state + 0x7ffc, env->cr[0]);
+    stl_phys(sm_state + 0x7ff8, env->cr[3]);
+    stl_phys(sm_state + 0x7ff4, compute_eflags());
+    stl_phys(sm_state + 0x7ff0, env->eip);
+    stl_phys(sm_state + 0x7fec, EDI);
+    stl_phys(sm_state + 0x7fe8, ESI);
+    stl_phys(sm_state + 0x7fe4, EBP);
+    stl_phys(sm_state + 0x7fe0, ESP);
+    stl_phys(sm_state + 0x7fdc, EBX);
+    stl_phys(sm_state + 0x7fd8, EDX);
+    stl_phys(sm_state + 0x7fd4, ECX);
+    stl_phys(sm_state + 0x7fd0, EAX);
+    stl_phys(sm_state + 0x7fcc, env->dr[6]);
+    stl_phys(sm_state + 0x7fc8, env->dr[7]);
+
+    stl_phys(sm_state + 0x7fc4, env->tr.selector);
+    stl_phys(sm_state + 0x7f64, env->tr.base);
+    stl_phys(sm_state + 0x7f60, env->tr.limit);
+    stl_phys(sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff);
+
+    stl_phys(sm_state + 0x7fc0, env->ldt.selector);
+    stl_phys(sm_state + 0x7f80, env->ldt.base);
+    stl_phys(sm_state + 0x7f7c, env->ldt.limit);
+    stl_phys(sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff);
+
+    stl_phys(sm_state + 0x7f74, env->gdt.base);
+    stl_phys(sm_state + 0x7f70, env->gdt.limit);
+
+    stl_phys(sm_state + 0x7f58, env->idt.base);
+    stl_phys(sm_state + 0x7f54, env->idt.limit);
+
+    for(i = 0; i < 6; i++) {
+        dt = &env->segs[i];
+        if (i < 3)
+            offset = 0x7f84 + i * 12;
+        else
+            offset = 0x7f2c + (i - 3) * 12;
+        stl_phys(sm_state + 0x7fa8 + i * 4, dt->selector);
+        stl_phys(sm_state + offset + 8, dt->base);
+        stl_phys(sm_state + offset + 4, dt->limit);
+        stl_phys(sm_state + offset, (dt->flags >> 8) & 0xf0ff);
+    }
+    stl_phys(sm_state + 0x7f14, env->cr[4]);
+
+    stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+    stl_phys(sm_state + 0x7ef8, env->smbase);
+#endif
+    /* init SMM cpu state */
+
+#ifdef TARGET_X86_64
+    env->efer = 0;
+    env->hflags &= ~HF_LMA_MASK;
+#endif
+    load_eflags(0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->eip = 0x00008000;
+    cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
+                           0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
+    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
+
+    cpu_x86_update_cr0(env,
+                       env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK));
+    cpu_x86_update_cr4(env, 0);
+    env->dr[7] = 0x00000400;
+    CC_OP = CC_OP_EFLAGS;
+#endif /* VBOX */
+}
+
+void helper_rsm(void)
+{
+#ifdef VBOX
+   cpu_abort(env, "helper_rsm");
+#else /* !VBOX */
+    target_ulong sm_state;
+    int i, offset;
+    uint32_t val;
+
+    sm_state = env->smbase + 0x8000;
+#ifdef TARGET_X86_64
+    env->efer = ldq_phys(sm_state + 0x7ed0);
+    if (env->efer & MSR_EFER_LMA)
+        env->hflags |= HF_LMA_MASK;
+    else
+        env->hflags &= ~HF_LMA_MASK;
+
+    for(i = 0; i < 6; i++) {
+        offset = 0x7e00 + i * 16;
+        cpu_x86_load_seg_cache(env, i,
+                               lduw_phys(sm_state + offset),
+                               ldq_phys(sm_state + offset + 8),
+                               ldl_phys(sm_state + offset + 4),
+                               (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
+    }
+
+    env->gdt.base = ldq_phys(sm_state + 0x7e68);
+    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+
+    env->ldt.selector = lduw_phys(sm_state + 0x7e70);
+    env->ldt.base = ldq_phys(sm_state + 0x7e78);
+    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
+
+    env->idt.base = ldq_phys(sm_state + 0x7e88);
+    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+
+    env->tr.selector = lduw_phys(sm_state + 0x7e90);
+    env->tr.base = ldq_phys(sm_state + 0x7e98);
+    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
+
+    EAX = ldq_phys(sm_state + 0x7ff8);
+    ECX = ldq_phys(sm_state + 0x7ff0);
+    EDX = ldq_phys(sm_state + 0x7fe8);
+    EBX = ldq_phys(sm_state + 0x7fe0);
+    ESP = ldq_phys(sm_state + 0x7fd8);
+    EBP = ldq_phys(sm_state + 0x7fd0);
+    ESI = ldq_phys(sm_state + 0x7fc8);
+    EDI = ldq_phys(sm_state + 0x7fc0);
+    for(i = 8; i < 16; i++)
+        env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
+    env->eip = ldq_phys(sm_state + 0x7f78);
+    load_eflags(ldl_phys(sm_state + 0x7f70),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->dr[6] = ldl_phys(sm_state + 0x7f68);
+    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+
+    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+
+    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    if (val & 0x20000) {
+        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+    }
+#else
+    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
+    load_eflags(ldl_phys(sm_state + 0x7ff4),
+                ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+    env->eip = ldl_phys(sm_state + 0x7ff0);
+    EDI = ldl_phys(sm_state + 0x7fec);
+    ESI = ldl_phys(sm_state + 0x7fe8);
+    EBP = ldl_phys(sm_state + 0x7fe4);
+    ESP = ldl_phys(sm_state + 0x7fe0);
+    EBX = ldl_phys(sm_state + 0x7fdc);
+    EDX = ldl_phys(sm_state + 0x7fd8);
+    ECX = ldl_phys(sm_state + 0x7fd4);
+    EAX = ldl_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+
+    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldl_phys(sm_state + 0x7f64);
+    env->tr.limit = ldl_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+
+    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldl_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+
+    env->gdt.base = ldl_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+
+    env->idt.base = ldl_phys(sm_state + 0x7f58);
+    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+
+    for(i = 0; i < 6; i++) {
+        if (i < 3)
+            offset = 0x7f84 + i * 12;
+        else
+            offset = 0x7f2c + (i - 3) * 12;
+        cpu_x86_load_seg_cache(env, i,
+                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldl_phys(sm_state + offset + 8),
+                               ldl_phys(sm_state + offset + 4),
+                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+    }
+    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+
+    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    if (val & 0x20000) {
+        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+    }
+#endif
+    CC_OP = CC_OP_EFLAGS;
+    env->hflags &= ~HF_SMM_MASK;
+    cpu_smm_update(env);
+
+    if (loglevel & CPU_LOG_INT) {
+        fprintf(logfile, "SMM: after RSM\n");
+        cpu_dump_state(env, logfile, fprintf, X86_DUMP_CCOP);
+    }
+#endif /* !VBOX */
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
+
+#ifdef BUGGY_GCC_DIV64
+/* gcc 2.95.4 on PowerPC does not seem to like using __udivdi3, so we
+   call it from another function */
+uint32_t div32(uint64_t *q_ptr, uint64_t num, uint32_t den)
+{
+    *q_ptr = num / den;
+    return num % den;
+}
+
+int32_t idiv32(int64_t *q_ptr, int64_t num, int32_t den)
+{
+    *q_ptr = num / den;
+    return num % den;
+}
+#endif
+
+void helper_divl_EAX_T0(void)
+{
+    unsigned int den, r;
+    uint64_t num, q;
+
+    num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+    den = T0;
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+#ifdef BUGGY_GCC_DIV64
+    r = div32(&q, num, den);
+#else
+    q = (num / den);
+    r = (num % den);
+#endif
+    if (q > 0xffffffff)
+        raise_exception(EXCP00_DIVZ);
+    EAX = (uint32_t)q;
+    EDX = (uint32_t)r;
+}
+
+void helper_idivl_EAX_T0(void)
+{
+    int den, r;
+    int64_t num, q;
+
+    num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+    den = T0;
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+#ifdef BUGGY_GCC_DIV64
+    r = idiv32(&q, num, den);
+#else
+    q = (num / den);
+    r = (num % den);
+#endif
+    if (q != (int32_t)q)
+        raise_exception(EXCP00_DIVZ);
+    EAX = (uint32_t)q;
+    EDX = (uint32_t)r;
+}
+
+void helper_cmpxchg8b(void)
+{
+    uint64_t d;
+    int eflags;
+
+    eflags = cc_table[CC_OP].compute_all();
+    d = ldq(A0);
+    if (d == (((uint64_t)EDX << 32) | EAX)) {
+        stq(A0, ((uint64_t)ECX << 32) | EBX);
+        eflags |= CC_Z;
+    } else {
+        /* always do the store */
+        stq(A0, d);
+        EDX = (uint32_t)(d >> 32);
+        EAX = (uint32_t)d;
+        eflags &= ~CC_Z;
+    }
+    CC_SRC = eflags;
+}
+
+void helper_single_step()
+{
+    env->dr[6] |= 0x4000;
+    raise_exception(EXCP01_SSTP);
+}
+
+void helper_cpuid(void)
+{
+#ifndef VBOX
+    uint32_t index;
+    index = (uint32_t)EAX;
+
+    /* test if maximum index reached */
+    if (index & 0x80000000) {
+        if (index > env->cpuid_xlevel)
+            index = env->cpuid_level;
+    } else {
+        if (index > env->cpuid_level)
+            index = env->cpuid_level;
+    }
+
+    switch(index) {
+    case 0:
+        EAX = env->cpuid_level;
+        EBX = env->cpuid_vendor1;
+        EDX = env->cpuid_vendor2;
+        ECX = env->cpuid_vendor3;
+        break;
+    case 1:
+        EAX = env->cpuid_version;
+        EBX = 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
+        ECX = env->cpuid_ext_features;
+        EDX = env->cpuid_features;
+        break;
+    case 2:
+        /* cache info: needed for Pentium Pro compatibility */
+        EAX = 0x410601;
+        EBX = 0;
+        ECX = 0;
+        EDX = 0;
+        break;
+    case 0x80000000:
+        EAX = env->cpuid_xlevel;
+        EBX = env->cpuid_vendor1;
+        EDX = env->cpuid_vendor2;
+        ECX = env->cpuid_vendor3;
+        break;
+    case 0x80000001:
+        EAX = env->cpuid_features;
+        EBX = 0;
+        ECX = 0;
+        EDX = env->cpuid_ext2_features;
+        break;
+    case 0x80000002:
+    case 0x80000003:
+    case 0x80000004:
+        EAX = env->cpuid_model[(index - 0x80000002) * 4 + 0];
+        EBX = env->cpuid_model[(index - 0x80000002) * 4 + 1];
+        ECX = env->cpuid_model[(index - 0x80000002) * 4 + 2];
+        EDX = env->cpuid_model[(index - 0x80000002) * 4 + 3];
+        break;
+    case 0x80000005:
+        /* cache info (L1 cache) */
+        EAX = 0x01ff01ff;
+        EBX = 0x01ff01ff;
+        ECX = 0x40020140;
+        EDX = 0x40020140;
+        break;
+    case 0x80000006:
+        /* cache info (L2 cache) */
+        EAX = 0;
+        EBX = 0x42004200;
+        ECX = 0x02008140;
+        EDX = 0;
+        break;
+    case 0x80000008:
+        /* virtual & phys address size in low 2 bytes. */
+        EAX = 0x00003028;
+        EBX = 0;
+        ECX = 0;
+        EDX = 0;
+        break;
+    default:
+        /* reserved values: zero */
+        EAX = 0;
+        EBX = 0;
+        ECX = 0;
+        EDX = 0;
+        break;
+    }
+#else /* VBOX */
+    remR3CpuId(env, EAX, &EAX, &EBX, &ECX, &EDX);
+#endif /* VBOX */
+}
+
+void helper_enter_level(int level, int data32)
+{
+    target_ulong ssp;
+    uint32_t esp_mask, esp, ebp;
+
+    esp_mask = get_sp_mask(env->segs[R_SS].flags);
+    ssp = env->segs[R_SS].base;
+    ebp = EBP;
+    esp = ESP;
+    if (data32) {
+        /* 32 bit */
+        esp -= 4;
+        while (--level) {
+            esp -= 4;
+            ebp -= 4;
+            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+        }
+        esp -= 4;
+        stl(ssp + (esp & esp_mask), T1);
+    } else {
+        /* 16 bit */
+        esp -= 2;
+        while (--level) {
+            esp -= 2;
+            ebp -= 2;
+            stw(ssp + (esp & esp_mask), lduw(ssp + (ebp & esp_mask)));
+        }
+        esp -= 2;
+        stw(ssp + (esp & esp_mask), T1);
+    }
+}
+
+#ifdef TARGET_X86_64
+void helper_enter64_level(int level, int data64)
+{
+    target_ulong esp, ebp;
+    ebp = EBP;
+    esp = ESP;
+
+    if (data64) {
+        /* 64 bit */
+        esp -= 8;
+        while (--level) {
+            esp -= 8;
+            ebp -= 8;
+            stq(esp, ldq(ebp));
+        }
+        esp -= 8;
+        stq(esp, T1);
+    } else {
+        /* 16 bit */
+        esp -= 2;
+        while (--level) {
+            esp -= 2;
+            ebp -= 2;
+            stw(esp, lduw(ebp));
+        }
+        esp -= 2;
+        stw(esp, T1);
+    }
+}
+#endif
+
+void helper_lldt_T0(void)
+{
+    int selector;
+    SegmentCache *dt;
+    uint32_t e1, e2;
+    int index, entry_limit;
+    target_ulong ptr;
+#ifdef VBOX
+    Log(("helper_lldt_T0: old ldtr=%RTsel {.base=%VGv, .limit=%VGv} new=%RTsel\n",
+         (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit, (RTSEL)(T0 & 0xffff)));
+#endif
+
+    selector = T0 & 0xffff;
+    if ((selector & 0xfffc) == 0) {
+        /* XXX: NULL selector case: invalid LDT */
+        env->ldt.base = 0;
+        env->ldt.limit = 0;
+    } else {
+        if (selector & 0x4)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        dt = &env->gdt;
+        index = selector & ~7;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            entry_limit = 15;
+        else
+#endif
+            entry_limit = 7;
+        if ((index + entry_limit) > dt->limit)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+        if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t e3;
+            e3 = ldl_kernel(ptr + 8);
+            load_seg_cache_raw_dt(&env->ldt, e1, e2);
+            env->ldt.base |= (target_ulong)e3 << 32;
+        } else
+#endif
+        {
+            load_seg_cache_raw_dt(&env->ldt, e1, e2);
+        }
+    }
+    env->ldt.selector = selector;
+#ifdef VBOX
+    Log(("helper_lldt_T0: new ldtr=%RTsel {.base=%VGv, .limit=%VGv}\n",
+         (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit));
+#endif
+}
+
+void helper_ltr_T0(void)
+{
+    int selector;
+    SegmentCache *dt;
+    uint32_t e1, e2;
+    int index, type, entry_limit;
+    target_ulong ptr;
+
+#ifdef VBOX
+    Log(("helper_ltr_T0: old tr=%RTsel {.base=%VGv, .limit=%VGv, .flags=%RX32} new=%RTsel\n",
+         (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit,
+         env->tr.flags, (RTSEL)(T0 & 0xffff)));
+#endif
+
+    selector = T0 & 0xffff;
+    if ((selector & 0xfffc) == 0) {
+        /* NULL selector case: invalid TR */
+        env->tr.base = 0;
+        env->tr.limit = 0;
+        env->tr.flags = 0;
+    } else {
+        if (selector & 0x4)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        dt = &env->gdt;
+        index = selector & ~7;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            entry_limit = 15;
+        else
+#endif
+            entry_limit = 7;
+        if ((index + entry_limit) > dt->limit)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        if ((e2 & DESC_S_MASK) ||
+            (type != 1 && type != 9))
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t e3;
+            e3 = ldl_kernel(ptr + 8);
+            load_seg_cache_raw_dt(&env->tr, e1, e2);
+            env->tr.base |= (target_ulong)e3 << 32;
+        } else
+#endif
+        {
+            load_seg_cache_raw_dt(&env->tr, e1, e2);
+        }
+        e2 |= DESC_TSS_BUSY_MASK;
+        stl_kernel(ptr + 4, e2);
+    }
+    env->tr.selector = selector;
+#ifdef VBOX
+    Log(("helper_ltr_T0: new tr=%RTsel {.base=%VGv, .limit=%VGv, .flags=%RX32} new=%RTsel\n",
+         (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit,
+         env->tr.flags, (RTSEL)(T0 & 0xffff)));
+#endif
+}
+
+/* only works if protected mode and not VM86. seg_reg must be != R_CS */
+void load_seg(int seg_reg, int selector)
+{
+    uint32_t e1, e2;
+    int cpl, dpl, rpl;
+    SegmentCache *dt;
+    int index;
+    target_ulong ptr;
+
+    selector &= 0xffff;
+    cpl = env->hflags & HF_CPL_MASK;
+
+#ifdef VBOX
+    /* Trying to load a selector with CPL=1? */
+    if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0))
+    {
+        Log(("RPL 1 -> sel %04X -> %04X\n", selector, selector & 0xfffc));
+        selector = selector & 0xfffc;
+    }
+#endif
+
+    if ((selector & 0xfffc) == 0) {
+        /* null selector case */
+        if (seg_reg == R_SS
+#ifdef TARGET_X86_64
+            && (!(env->hflags & HF_CS64_MASK) || cpl == 3)
+#endif
+            )
+            raise_exception_err(EXCP0D_GPF, 0);
+        cpu_x86_load_seg_cache(env, seg_reg, selector, 0, 0, 0);
+    } else {
+
+        if (selector & 0x4)
+            dt = &env->ldt;
+        else
+            dt = &env->gdt;
+        index = selector & ~7;
+        if ((index + 7) > dt->limit)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+
+        if (!(e2 & DESC_S_MASK))
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        rpl = selector & 3;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (seg_reg == R_SS) {
+            /* must be writable segment */
+            if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+            if (rpl != cpl || dpl != cpl)
+                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        } else {
+            /* must be readable segment */
+            if ((e2 & (DESC_CS_MASK | DESC_R_MASK)) == DESC_CS_MASK)
+                raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+
+            if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+                /* if not conforming code, test rights */
+                if (dpl < cpl || dpl < rpl)
+                    raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+            }
+        }
+
+        if (!(e2 & DESC_P_MASK)) {
+            if (seg_reg == R_SS)
+                raise_exception_err(EXCP0C_STACK, selector & 0xfffc);
+            else
+                raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+        }
+
+        /* set the access bit if not already set */
+        if (!(e2 & DESC_A_MASK)) {
+            e2 |= DESC_A_MASK;
+            stl_kernel(ptr + 4, e2);
+        }
+
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+#if 0
+        fprintf(logfile, "load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx flags=%08x\n",
+                selector, (unsigned long)sc->base, sc->limit, sc->flags);
+#endif
+    }
+}
+
+/* protected mode jump */
+void helper_ljmp_protected_T0_T1(int next_eip_addend)
+{
+    int new_cs, gate_cs, type;
+    uint32_t e1, e2, cpl, dpl, rpl, limit;
+    target_ulong new_eip, next_eip;
+
+    new_cs = T0;
+    new_eip = T1;
+    if ((new_cs & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, 0);
+    if (load_segment(&e1, &e2, new_cs) != 0)
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if (!(e2 & DESC_CS_MASK))
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (e2 & DESC_C_MASK) {
+            /* conforming code segment */
+            if (dpl > cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        } else {
+            /* non conforming code segment */
+            rpl = new_cs & 3;
+            if (rpl > cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            if (dpl != cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+        limit = get_seg_limit(e1, e2);
+        if (new_eip > limit &&
+            !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK))
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                       get_seg_base(e1, e2), limit, e2);
+        EIP = new_eip;
+    } else {
+        /* jump to call or task gate */
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        rpl = new_cs & 3;
+        cpl = env->hflags & HF_CPL_MASK;
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1: /* 286 TSS */
+        case 9: /* 386 TSS */
+        case 5: /* task gate */
+            if (dpl < cpl || dpl < rpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            next_eip = env->eip + next_eip_addend;
+            switch_tss(new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
+            CC_OP = CC_OP_EFLAGS;
+            break;
+        case 4: /* 286 call gate */
+        case 12: /* 386 call gate */
+            if ((dpl < cpl) || (dpl < rpl))
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            if (!(e2 & DESC_P_MASK))
+                raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+            gate_cs = e1 >> 16;
+            new_eip = (e1 & 0xffff);
+            if (type == 12)
+                new_eip |= (e2 & 0xffff0000);
+            if (load_segment(&e1, &e2, gate_cs) != 0)
+                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+            dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+            /* must be code segment */
+            if (((e2 & (DESC_S_MASK | DESC_CS_MASK)) !=
+                 (DESC_S_MASK | DESC_CS_MASK)))
+                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+            if (((e2 & DESC_C_MASK) && (dpl > cpl)) ||
+                (!(e2 & DESC_C_MASK) && (dpl != cpl)))
+                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+            if (!(e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-514 of 253666.pdf */
+                raise_exception_err(EXCP0B_NOSEG, gate_cs & 0xfffc);
+#else
+                raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+#endif
+            limit = get_seg_limit(e1, e2);
+            if (new_eip > limit)
+                raise_exception_err(EXCP0D_GPF, 0);
+            cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2), limit, e2);
+            EIP = new_eip;
+            break;
+        default:
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            break;
+        }
+    }
+}
+
+/* real mode call */
+void helper_lcall_real_T0_T1(int shift, int next_eip)
+{
+    int new_cs, new_eip;
+    uint32_t esp, esp_mask;
+    target_ulong ssp;
+
+    new_cs = T0;
+    new_eip = T1;
+    esp = ESP;
+    esp_mask = get_sp_mask(env->segs[R_SS].flags);
+    ssp = env->segs[R_SS].base;
+    if (shift) {
+        PUSHL(ssp, esp, esp_mask, env->segs[R_CS].selector);
+        PUSHL(ssp, esp, esp_mask, next_eip);
+    } else {
+        PUSHW(ssp, esp, esp_mask, env->segs[R_CS].selector);
+        PUSHW(ssp, esp, esp_mask, next_eip);
+    }
+
+    SET_ESP(esp, esp_mask);
+    env->eip = new_eip;
+    env->segs[R_CS].selector = new_cs;
+    env->segs[R_CS].base = (new_cs << 4);
+}
+
+/* protected mode call */
+void helper_lcall_protected_T0_T1(int shift, int next_eip_addend)
+{
+    int new_cs, new_stack, i;
+    uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count;
+    uint32_t ss, ss_e1, ss_e2, sp, type, ss_dpl, sp_mask;
+    uint32_t val, limit, old_sp_mask;
+    target_ulong ssp, old_ssp, next_eip, new_eip;
+
+    new_cs = T0;
+    new_eip = T1;
+    next_eip = env->eip + next_eip_addend;
+#ifdef DEBUG_PCALL
+    if (loglevel & CPU_LOG_PCALL) {
+        fprintf(logfile, "lcall %04x:%08x s=%d\n",
+                new_cs, (uint32_t)new_eip, shift);
+        cpu_dump_state(env, logfile, fprintf, X86_DUMP_CCOP);
+    }
+#endif
+    if ((new_cs & 0xfffc) == 0)
+        raise_exception_err(EXCP0D_GPF, 0);
+    if (load_segment(&e1, &e2, new_cs) != 0)
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    cpl = env->hflags & HF_CPL_MASK;
+#ifdef DEBUG_PCALL
+    if (loglevel & CPU_LOG_PCALL) {
+        fprintf(logfile, "desc=%08x:%08x\n", e1, e2);
+    }
+#endif
+    if (e2 & DESC_S_MASK) {
+        if (!(e2 & DESC_CS_MASK))
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (e2 & DESC_C_MASK) {
+            /* conforming code segment */
+            if (dpl > cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        } else {
+            /* non conforming code segment */
+            rpl = new_cs & 3;
+            if (rpl > cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            if (dpl != cpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        }
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+
+#ifdef TARGET_X86_64
+        /* XXX: check 16/32 bit cases in long mode */
+        if (shift == 2) {
+            target_ulong rsp;
+            /* 64 bit case */
+            rsp = ESP;
+            PUSHQ(rsp, env->segs[R_CS].selector);
+            PUSHQ(rsp, next_eip);
+            /* from this point, not restartable */
+            ESP = rsp;
+            cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2),
+                                   get_seg_limit(e1, e2), e2);
+            EIP = new_eip;
+        } else
+#endif
+        {
+            sp = ESP;
+            sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            ssp = env->segs[R_SS].base;
+            if (shift) {
+                PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+                PUSHL(ssp, sp, sp_mask, next_eip);
+            } else {
+                PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+                PUSHW(ssp, sp, sp_mask, next_eip);
+            }
+
+            limit = get_seg_limit(e1, e2);
+            if (new_eip > limit)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            /* from this point, not restartable */
+            SET_ESP(sp, sp_mask);
+            cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+                                   get_seg_base(e1, e2), limit, e2);
+            EIP = new_eip;
+        }
+    } else {
+        /* check gate type */
+        type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        rpl = new_cs & 3;
+        switch(type) {
+        case 1: /* available 286 TSS */
+        case 9: /* available 386 TSS */
+        case 5: /* task gate */
+            if (dpl < cpl || dpl < rpl)
+                raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            switch_tss(new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
+            CC_OP = CC_OP_EFLAGS;
+            return;
+        case 4: /* 286 call gate */
+        case 12: /* 386 call gate */
+            break;
+        default:
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+            break;
+        }
+        shift = type >> 3;
+
+        if (dpl < cpl || dpl < rpl)
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        /* check valid bit */
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG,  new_cs & 0xfffc);
+        selector = e1 >> 16;
+        offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+        param_count = e2 & 0x1f;
+        if ((selector & 0xfffc) == 0)
+            raise_exception_err(EXCP0D_GPF, 0);
+
+        if (load_segment(&e1, &e2, selector) != 0)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        if (dpl > cpl)
+            raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+        if (!(e2 & DESC_P_MASK))
+            raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+
+        if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+            /* to inner priviledge */
+            get_ss_esp_from_tss(&ss, &sp, dpl);
+#ifdef DEBUG_PCALL
+            if (loglevel & CPU_LOG_PCALL)
+                fprintf(logfile, "new ss:esp=%04x:%08x param_count=%d ESP=" TARGET_FMT_lx "\n",
+                        ss, sp, param_count, ESP);
+#endif
+            if ((ss & 0xfffc) == 0)
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            if ((ss & 3) != dpl)
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            if (load_segment(&ss_e1, &ss_e2, ss) != 0)
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+            if (ss_dpl != dpl)
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            if (!(ss_e2 & DESC_S_MASK) ||
+                (ss_e2 & DESC_CS_MASK) ||
+                !(ss_e2 & DESC_W_MASK))
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+            if (!(ss_e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-99 of 253666.pdf */
+                raise_exception_err(EXCP0C_STACK, ss & 0xfffc);
+#else
+                raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+#endif
+
+            //            push_size = ((param_count * 2) + 8) << shift;
+
+            old_sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            old_ssp = env->segs[R_SS].base;
+
+            sp_mask = get_sp_mask(ss_e2);
+            ssp = get_seg_base(ss_e1, ss_e2);
+            if (shift) {
+                PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
+                PUSHL(ssp, sp, sp_mask, ESP);
+                for(i = param_count - 1; i >= 0; i--) {
+                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    PUSHL(ssp, sp, sp_mask, val);
+                }
+            } else {
+                PUSHW(ssp, sp, sp_mask, env->segs[R_SS].selector);
+                PUSHW(ssp, sp, sp_mask, ESP);
+                for(i = param_count - 1; i >= 0; i--) {
+                    val = lduw_kernel(old_ssp + ((ESP + i * 2) & old_sp_mask));
+                    PUSHW(ssp, sp, sp_mask, val);
+                }
+            }
+            new_stack = 1;
+        } else {
+            /* to same priviledge */
+            sp = ESP;
+            sp_mask = get_sp_mask(env->segs[R_SS].flags);
+            ssp = env->segs[R_SS].base;
+            //            push_size = (4 << shift);
+            new_stack = 0;
+        }
+
+        if (shift) {
+            PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+            PUSHL(ssp, sp, sp_mask, next_eip);
+        } else {
+            PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+            PUSHW(ssp, sp, sp_mask, next_eip);
+        }
+
+        /* from this point, not restartable */
+
+        if (new_stack) {
+            ss = (ss & ~3) | dpl;
+            cpu_x86_load_seg_cache(env, R_SS, ss,
+                                   ssp,
+                                   get_seg_limit(ss_e1, ss_e2),
+                                   ss_e2);
+        }
+
+        selector = (selector & ~3) | dpl;
+        cpu_x86_load_seg_cache(env, R_CS, selector,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+        cpu_x86_set_cpl(env, dpl);
+        SET_ESP(sp, sp_mask);
+        EIP = offset;
+    }
+#ifdef USE_KQEMU
+    if (kqemu_is_ok(env)) {
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+/* real and vm86 mode iret */
+void helper_iret_real(int shift)
+{
+    uint32_t sp, new_cs, new_eip, new_eflags, sp_mask;
+    target_ulong ssp;
+    int eflags_mask;
+#ifdef VBOX
+    bool fVME = false;
+
+    remR3TrapClear(env->pVM);
+#endif /* VBOX */
+
+    sp_mask = 0xffff; /* XXXX: use SS segment size ? */
+    sp = ESP;
+    ssp = env->segs[R_SS].base;
+    if (shift == 1) {
+        /* 32 bits */
+        POPL(ssp, sp, sp_mask, new_eip);
+        POPL(ssp, sp, sp_mask, new_cs);
+        new_cs &= 0xffff;
+        POPL(ssp, sp, sp_mask, new_eflags);
+    } else {
+        /* 16 bits */
+        POPW(ssp, sp, sp_mask, new_eip);
+        POPW(ssp, sp, sp_mask, new_cs);
+        POPW(ssp, sp, sp_mask, new_eflags);
+    }
+#ifdef VBOX
+    if (    (env->eflags & VM_MASK)
+        &&  ((env->eflags >> IOPL_SHIFT) & 3) != 3
+        &&  (env->cr[4] & CR4_VME_MASK)) /* implied or else we would fault earlier */
+    {
+        fVME = true;
+        /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */
+        /* if TF will be set -> #GP */
+        if (    ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK))
+            ||  (new_eflags & TF_MASK))
+            raise_exception(EXCP0D_GPF);
+    }
+#endif /* VBOX */
+
+    ESP = (ESP & ~sp_mask) | (sp & sp_mask);
+    load_seg_vm(R_CS, new_cs);
+    env->eip = new_eip;
+#ifdef VBOX
+    if (fVME)
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
+    else
+#endif
+    if (env->eflags & VM_MASK)
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | RF_MASK | NT_MASK;
+    else
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | RF_MASK | NT_MASK;
+    if (shift == 0)
+        eflags_mask &= 0xffff;
+    load_eflags(new_eflags, eflags_mask);
+
+#ifdef VBOX
+    if (fVME)
+    {
+        if (new_eflags & IF_MASK)
+            env->eflags |= VIF_MASK;
+        else
+            env->eflags &= ~VIF_MASK;
+    }
+#endif /* VBOX */
+}
+
+static inline void validate_seg(int seg_reg, int cpl)
+{
+    int dpl;
+    uint32_t e2;
+
+    /* XXX: on x86_64, we do not want to nullify FS and GS because
+       they may still contain a valid base. I would be interested to
+       know how a real x86_64 CPU behaves */
+    if ((seg_reg == R_FS || seg_reg == R_GS) &&
+        (env->segs[seg_reg].selector & 0xfffc) == 0)
+        return;
+
+    e2 = env->segs[seg_reg].flags;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+        /* data or non conforming code segment */
+        if (dpl < cpl) {
+            cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0);
+        }
+    }
+}
+
+/* protected mode iret */
+static inline void helper_ret_protected(int shift, int is_iret, int addend)
+{
+    uint32_t new_cs, new_eflags, new_ss;
+    uint32_t new_es, new_ds, new_fs, new_gs;
+    uint32_t e1, e2, ss_e1, ss_e2;
+    int cpl, dpl, rpl, eflags_mask, iopl;
+    target_ulong ssp, sp, new_eip, new_esp, sp_mask;
+
+#ifdef TARGET_X86_64
+    if (shift == 2)
+        sp_mask = -1;
+    else
+#endif
+        sp_mask = get_sp_mask(env->segs[R_SS].flags);
+    sp = ESP;
+    ssp = env->segs[R_SS].base;
+    new_eflags = 0; /* avoid warning */
+#ifdef TARGET_X86_64
+    if (shift == 2) {
+        POPQ(sp, new_eip);
+        POPQ(sp, new_cs);
+        new_cs &= 0xffff;
+        if (is_iret) {
+            POPQ(sp, new_eflags);
+        }
+    } else
+#endif
+    if (shift == 1) {
+        /* 32 bits */
+        POPL(ssp, sp, sp_mask, new_eip);
+        POPL(ssp, sp, sp_mask, new_cs);
+        new_cs &= 0xffff;
+        if (is_iret) {
+            POPL(ssp, sp, sp_mask, new_eflags);
+#if defined(VBOX) && defined(DEBUG)
+            printf("iret: new CS     %04X\n", new_cs);
+            printf("iret: new EIP    %08X\n", new_eip);
+            printf("iret: new EFLAGS %08X\n", new_eflags);
+            printf("iret: EAX=%08x\n", EAX);
+#endif
+
+            if (new_eflags & VM_MASK)
+                goto return_to_vm86;
+        }
+#ifdef VBOX
+        if ((new_cs & 0x3) == 1 && (env->state & CPU_RAW_RING0))
+        {
+#ifdef DEBUG
+            printf("RPL 1 -> new_cs %04X -> %04X\n", new_cs, new_cs & 0xfffc);
+#endif
+            new_cs = new_cs & 0xfffc;
+        }
+#endif
+    } else {
+        /* 16 bits */
+        POPW(ssp, sp, sp_mask, new_eip);
+        POPW(ssp, sp, sp_mask, new_cs);
+        if (is_iret)
+            POPW(ssp, sp, sp_mask, new_eflags);
+    }
+#ifdef DEBUG_PCALL
+    if (loglevel & CPU_LOG_PCALL) {
+        fprintf(logfile, "lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n",
+                new_cs, new_eip, shift, addend);
+        cpu_dump_state(env, logfile, fprintf, X86_DUMP_CCOP);
+    }
+#endif
+    if ((new_cs & 0xfffc) == 0)
+    {
+#if defined(VBOX) && defined(DEBUG)
+        printf("new_cs & 0xfffc) == 0\n");
+#endif
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    }
+    if (load_segment(&e1, &e2, new_cs) != 0)
+    {
+#if defined(VBOX) && defined(DEBUG)
+        printf("load_segment failed\n");
+#endif
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    }
+    if (!(e2 & DESC_S_MASK) ||
+        !(e2 & DESC_CS_MASK))
+    {
+#if defined(VBOX) && defined(DEBUG)
+        printf("e2 mask %08x\n", e2);
+#endif
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    }
+    cpl = env->hflags & HF_CPL_MASK;
+    rpl = new_cs & 3;
+    if (rpl < cpl)
+    {
+#if defined(VBOX) && defined(DEBUG)
+        printf("rpl < cpl (%d vs %d)\n", rpl, cpl);
+#endif
+        raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+    }
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    if (e2 & DESC_C_MASK) {
+        if (dpl > rpl)
+        {
+#if defined(VBOX) && defined(DEBUG)
+            printf("dpl > rpl (%d vs %d)\n", dpl, rpl);
+#endif
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        }
+    } else {
+        if (dpl != rpl)
+        {
+#if defined(VBOX) && defined(DEBUG)
+            printf("dpl != rpl (%d vs %d) e1=%x e2=%x\n", dpl, rpl, e1, e2);
+#endif
+            raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+        }
+    }
+    if (!(e2 & DESC_P_MASK))
+    {
+#if defined(VBOX) && defined(DEBUG)
+        printf("DESC_P_MASK e2=%08x\n", e2);
+#endif
+        raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+    }
+    sp += addend;
+    if (rpl == cpl && (!(env->hflags & HF_CS64_MASK) ||
+                       ((env->hflags & HF_CS64_MASK) && !is_iret))) {
+        /* return to same priledge level */
+        cpu_x86_load_seg_cache(env, R_CS, new_cs,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+    } else {
+        /* return to different priviledge level */
+#ifdef TARGET_X86_64
+        if (shift == 2) {
+            POPQ(sp, new_esp);
+            POPQ(sp, new_ss);
+            new_ss &= 0xffff;
+        } else
+#endif
+        if (shift == 1) {
+            /* 32 bits */
+            POPL(ssp, sp, sp_mask, new_esp);
+            POPL(ssp, sp, sp_mask, new_ss);
+            new_ss &= 0xffff;
+        } else {
+            /* 16 bits */
+            POPW(ssp, sp, sp_mask, new_esp);
+            POPW(ssp, sp, sp_mask, new_ss);
+        }
+#ifdef DEBUG_PCALL
+        if (loglevel & CPU_LOG_PCALL) {
+            fprintf(logfile, "new ss:esp=%04x:" TARGET_FMT_lx "\n",
+                    new_ss, new_esp);
+        }
+#endif
+        if ((new_ss & 0xfffc) == 0) {
+#ifdef TARGET_X86_64
+            /* NULL ss is allowed in long mode if cpl != 3*/
+            /* XXX: test CS64 ? */
+            if ((env->hflags & HF_LMA_MASK) && rpl != 3) {
+                cpu_x86_load_seg_cache(env, R_SS, new_ss,
+                                       0, 0xffffffff,
+                                       DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                                       DESC_S_MASK | (rpl << DESC_DPL_SHIFT) |
+                                       DESC_W_MASK | DESC_A_MASK);
+                ss_e2 = DESC_B_MASK; /* XXX: should not be needed ? */
+            } else
+#endif
+            {
+                raise_exception_err(EXCP0D_GPF, 0);
+            }
+        } else {
+            if ((new_ss & 3) != rpl)
+                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+            if (load_segment(&ss_e1, &ss_e2, new_ss) != 0)
+                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+            if (!(ss_e2 & DESC_S_MASK) ||
+                (ss_e2 & DESC_CS_MASK) ||
+                !(ss_e2 & DESC_W_MASK))
+                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+            dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+            if (dpl != rpl)
+                raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+            if (!(ss_e2 & DESC_P_MASK))
+                raise_exception_err(EXCP0B_NOSEG, new_ss & 0xfffc);
+            cpu_x86_load_seg_cache(env, R_SS, new_ss,
+                                   get_seg_base(ss_e1, ss_e2),
+                                   get_seg_limit(ss_e1, ss_e2),
+                                   ss_e2);
+        }
+
+        cpu_x86_load_seg_cache(env, R_CS, new_cs,
+                       get_seg_base(e1, e2),
+                       get_seg_limit(e1, e2),
+                       e2);
+        cpu_x86_set_cpl(env, rpl);
+        sp = new_esp;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_CS64_MASK)
+            sp_mask = -1;
+        else
+#endif
+            sp_mask = get_sp_mask(ss_e2);
+
+        /* validate data segments */
+        validate_seg(R_ES, rpl);
+        validate_seg(R_DS, rpl);
+        validate_seg(R_FS, rpl);
+        validate_seg(R_GS, rpl);
+
+        sp += addend;
+    }
+    SET_ESP(sp, sp_mask);
+    env->eip = new_eip;
+    if (is_iret) {
+        /* NOTE: 'cpl' is the _old_ CPL */
+        eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
+        if (cpl == 0)
+#ifdef VBOX
+            eflags_mask |= IOPL_MASK | VIF_MASK | VIP_MASK;
+#else
+            eflags_mask |= IOPL_MASK;
+#endif
+        iopl = (env->eflags >> IOPL_SHIFT) & 3;
+        if (cpl <= iopl)
+            eflags_mask |= IF_MASK;
+        if (shift == 0)
+            eflags_mask &= 0xffff;
+        load_eflags(new_eflags, eflags_mask);
+    }
+    return;
+
+ return_to_vm86:
+
+#if 0 // defined(VBOX) && defined(DEBUG)
+    printf("V86: new CS     %04X\n", new_cs);
+    printf("V86: Descriptor %08X:%08X\n", e2, e1);
+    printf("V86: new EIP    %08X\n", new_eip);
+    printf("V86: new EFLAGS %08X\n", new_eflags);
+#endif
+
+    POPL(ssp, sp, sp_mask, new_esp);
+    POPL(ssp, sp, sp_mask, new_ss);
+    POPL(ssp, sp, sp_mask, new_es);
+    POPL(ssp, sp, sp_mask, new_ds);
+    POPL(ssp, sp, sp_mask, new_fs);
+    POPL(ssp, sp, sp_mask, new_gs);
+
+    /* modify processor state */
+    load_eflags(new_eflags, TF_MASK | AC_MASK | ID_MASK |
+                IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+    load_seg_vm(R_CS, new_cs & 0xffff);
+    cpu_x86_set_cpl(env, 3);
+    load_seg_vm(R_SS, new_ss & 0xffff);
+    load_seg_vm(R_ES, new_es & 0xffff);
+    load_seg_vm(R_DS, new_ds & 0xffff);
+    load_seg_vm(R_FS, new_fs & 0xffff);
+    load_seg_vm(R_GS, new_gs & 0xffff);
+
+    env->eip = new_eip & 0xffff;
+    ESP = new_esp;
+}
+
+void helper_iret_protected(int shift, int next_eip)
+{
+    int tss_selector, type;
+    uint32_t e1, e2;
+
+#ifdef VBOX
+    remR3TrapClear(env->pVM);
+#endif
+
+    /* specific case for TSS */
+    if (env->eflags & NT_MASK) {
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            raise_exception_err(EXCP0D_GPF, 0);
+#endif
+        tss_selector = lduw_kernel(env->tr.base + 0);
+        if (tss_selector & 4)
+            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+        if (load_segment(&e1, &e2, tss_selector) != 0)
+            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0x17;
+        /* NOTE: we check both segment and busy TSS */
+        if (type != 3)
+            raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+        switch_tss(tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip);
+    } else {
+        helper_ret_protected(shift, 1, 0);
+    }
+#ifdef USE_KQEMU
+    if (kqemu_is_ok(env)) {
+        CC_OP = CC_OP_EFLAGS;
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+void helper_lret_protected(int shift, int addend)
+{
+    helper_ret_protected(shift, 0, addend);
+#ifdef USE_KQEMU
+    if (kqemu_is_ok(env)) {
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+void helper_sysenter(void)
+{
+    if (env->sysenter_cs == 0) {
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+    env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
+    cpu_x86_set_cpl(env, 0);
+    cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                           0, 0xffffffff,
+                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                           DESC_S_MASK |
+                           DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
+                           0, 0xffffffff,
+                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                           DESC_S_MASK |
+                           DESC_W_MASK | DESC_A_MASK);
+    ESP = env->sysenter_esp;
+    EIP = env->sysenter_eip;
+}
+
+void helper_sysexit(void)
+{
+    int cpl;
+
+    cpl = env->hflags & HF_CPL_MASK;
+    if (env->sysenter_cs == 0 || cpl != 0) {
+        raise_exception_err(EXCP0D_GPF, 0);
+    }
+    cpu_x86_set_cpl(env, 3);
+    cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 16) & 0xfffc) | 3,
+                           0, 0xffffffff,
+                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                           DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                           DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 24) & 0xfffc) | 3,
+                           0, 0xffffffff,
+                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                           DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+                           DESC_W_MASK | DESC_A_MASK);
+    ESP = ECX;
+    EIP = EDX;
+#ifdef USE_KQEMU
+    if (kqemu_is_ok(env)) {
+        env->exception_index = -1;
+        cpu_loop_exit();
+    }
+#endif
+}
+
+void helper_movl_crN_T0(int reg)
+{
+#if !defined(CONFIG_USER_ONLY)
+    switch(reg) {
+    case 0:
+        cpu_x86_update_cr0(env, T0);
+        break;
+    case 3:
+        cpu_x86_update_cr3(env, T0);
+        break;
+    case 4:
+        cpu_x86_update_cr4(env, T0);
+        break;
+    case 8:
+        cpu_set_apic_tpr(env, T0);
+        break;
+    default:
+        env->cr[reg] = T0;
+        break;
+    }
+#endif
+}
+
+/* XXX: do more */
+void helper_movl_drN_T0(int reg)
+{
+    env->dr[reg] = T0;
+}
+
+void helper_invlpg(target_ulong addr)
+{
+    cpu_x86_flush_tlb(env, addr);
+}
+
+void helper_rdtsc(void)
+{
+    uint64_t val;
+
+    if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+        raise_exception(EXCP0D_GPF);
+    }
+    val = cpu_get_tsc(env);
+    EAX = (uint32_t)(val);
+    EDX = (uint32_t)(val >> 32);
+}
+
+#if defined(CONFIG_USER_ONLY)
+void helper_wrmsr(void)
+{
+}
+
+void helper_rdmsr(void)
+{
+}
+#else
+void helper_wrmsr(void)
+{
+    uint64_t val;
+
+    val = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+
+    switch((uint32_t)ECX) {
+    case MSR_IA32_SYSENTER_CS:
+        env->sysenter_cs = val & 0xffff;
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        env->sysenter_esp = val;
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        env->sysenter_eip = val;
+        break;
+    case MSR_IA32_APICBASE:
+        cpu_set_apic_base(env, val);
+        break;
+    case MSR_EFER:
+        {
+            uint64_t update_mask;
+            update_mask = 0;
+            if (env->cpuid_ext2_features & CPUID_EXT2_SYSCALL)
+                update_mask |= MSR_EFER_SCE;
+            if (env->cpuid_ext2_features & CPUID_EXT2_LM)
+                update_mask |= MSR_EFER_LME;
+            if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+                update_mask |= MSR_EFER_FFXSR;
+            if (env->cpuid_ext2_features & CPUID_EXT2_NX)
+                update_mask |= MSR_EFER_NXE;
+            env->efer = (env->efer & ~update_mask) |
+            (val & update_mask);
+        }
+        break;
+    case MSR_STAR:
+        env->star = val;
+        break;
+    case MSR_PAT:
+        env->pat = val;
+        break;
+#ifdef TARGET_X86_64
+    case MSR_LSTAR:
+        env->lstar = val;
+        break;
+    case MSR_CSTAR:
+        env->cstar = val;
+        break;
+    case MSR_FMASK:
+        env->fmask = val;
+        break;
+    case MSR_FSBASE:
+        env->segs[R_FS].base = val;
+        break;
+    case MSR_GSBASE:
+        env->segs[R_GS].base = val;
+        break;
+    case MSR_KERNELGSBASE:
+        env->kernelgsbase = val;
+        break;
+#endif
+    default:
+#ifndef VBOX
+        /* XXX: exception ? */
+        break;
+#else  /* VBOX */
+    {
+        uint32_t ecx = (uint32_t)ECX;
+        /* In X2APIC specification this range is reserved for APIC control. */
+        if (ecx >= MSR_APIC_RANGE_START && ecx < MSR_APIC_RANGE_END)
+            cpu_apic_wrmsr(env, ecx, val);
+        /** @todo else exception? */
+        break;
+    }
+#endif /* VBOX */
+    }
+}
+
+void helper_rdmsr(void)
+{
+    uint64_t val;
+    switch((uint32_t)ECX) {
+    case MSR_IA32_SYSENTER_CS:
+        val = env->sysenter_cs;
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        val = env->sysenter_esp;
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        val = env->sysenter_eip;
+        break;
+    case MSR_IA32_APICBASE:
+        val = cpu_get_apic_base(env);
+        break;
+    case MSR_EFER:
+        val = env->efer;
+        break;
+    case MSR_STAR:
+        val = env->star;
+        break;
+    case MSR_PAT:
+        val = env->pat;
+        break;
+#ifdef TARGET_X86_64
+    case MSR_LSTAR:
+        val = env->lstar;
+        break;
+    case MSR_CSTAR:
+        val = env->cstar;
+        break;
+    case MSR_FMASK:
+        val = env->fmask;
+        break;
+    case MSR_FSBASE:
+        val = env->segs[R_FS].base;
+        break;
+    case MSR_GSBASE:
+        val = env->segs[R_GS].base;
+        break;
+    case MSR_KERNELGSBASE:
+        val = env->kernelgsbase;
+        break;
+#endif
+    default:
+#ifndef VBOX
+        /* XXX: exception ? */
+        val = 0;
+        break;
+#else  /* VBOX */
+    {
+        uint32_t ecx = (uint32_t)ECX;
+        /* In X2APIC specification this range is reserved for APIC control. */
+        if (ecx >= MSR_APIC_RANGE_START && ecx < MSR_APIC_RANGE_END)
+            val = cpu_apic_rdmsr(env, ecx);
+        else
+            val = 0; /** @todo else exception? */
+        break;
+    }
+#endif /* VBOX */
+    }
+    EAX = (uint32_t)(val);
+    EDX = (uint32_t)(val >> 32);
+}
+#endif
+
+void helper_lsl(void)
+{
+    unsigned int selector, limit;
+    uint32_t e1, e2, eflags;
+    int rpl, dpl, cpl, type;
+
+    eflags = cc_table[CC_OP].compute_all();
+    selector = T0 & 0xffff;
+    if (load_segment(&e1, &e2, selector) != 0)
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+            /* conforming */
+        } else {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1:
+        case 2:
+        case 3:
+        case 9:
+        case 11:
+            break;
+        default:
+            goto fail;
+        }
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return;
+        }
+    }
+    limit = get_seg_limit(e1, e2);
+    T1 = limit;
+    CC_SRC = eflags | CC_Z;
+}
+
+void helper_lar(void)
+{
+    unsigned int selector;
+    uint32_t e1, e2, eflags;
+    int rpl, dpl, cpl, type;
+
+    eflags = cc_table[CC_OP].compute_all();
+    selector = T0 & 0xffff;
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(&e1, &e2, selector) != 0)
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_S_MASK) {
+        if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+            /* conforming */
+        } else {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        switch(type) {
+        case 1:
+        case 2:
+        case 3:
+        case 4:
+        case 5:
+        case 9:
+        case 11:
+        case 12:
+            break;
+        default:
+            goto fail;
+        }
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return;
+        }
+    }
+    T1 = e2 & 0x00f0ff00;
+    CC_SRC = eflags | CC_Z;
+}
+
+void helper_verr(void)
+{
+    unsigned int selector;
+    uint32_t e1, e2, eflags;
+    int rpl, dpl, cpl;
+
+    eflags = cc_table[CC_OP].compute_all();
+    selector = T0 & 0xffff;
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(&e1, &e2, selector) != 0)
+        goto fail;
+    if (!(e2 & DESC_S_MASK))
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_CS_MASK) {
+        if (!(e2 & DESC_R_MASK))
+            goto fail;
+        if (!(e2 & DESC_C_MASK)) {
+            if (dpl < cpl || dpl < rpl)
+                goto fail;
+        }
+    } else {
+        if (dpl < cpl || dpl < rpl) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return;
+        }
+    }
+    CC_SRC = eflags | CC_Z;
+}
+
+void helper_verw(void)
+{
+    unsigned int selector;
+    uint32_t e1, e2, eflags;
+    int rpl, dpl, cpl;
+
+    eflags = cc_table[CC_OP].compute_all();
+    selector = T0 & 0xffff;
+    if ((selector & 0xfffc) == 0)
+        goto fail;
+    if (load_segment(&e1, &e2, selector) != 0)
+        goto fail;
+    if (!(e2 & DESC_S_MASK))
+        goto fail;
+    rpl = selector & 3;
+    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+    cpl = env->hflags & HF_CPL_MASK;
+    if (e2 & DESC_CS_MASK) {
+        goto fail;
+    } else {
+        if (dpl < cpl || dpl < rpl)
+            goto fail;
+        if (!(e2 & DESC_W_MASK)) {
+        fail:
+            CC_SRC = eflags & ~CC_Z;
+            return;
+        }
+    }
+    CC_SRC = eflags | CC_Z;
+}
+
+/* FPU helpers */
+
+void helper_fldt_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = helper_fldt(A0);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fstt_ST0_A0(void)
+{
+    helper_fstt(ST0, A0);
+}
+
+void fpu_set_exception(int mask)
+{
+    env->fpus |= mask;
+    if (env->fpus & (~env->fpuc & FPUC_EM))
+        env->fpus |= FPUS_SE | FPUS_B;
+}
+
+CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b)
+{
+    if (b == 0.0)
+        fpu_set_exception(FPUS_ZE);
+    return a / b;
+}
+
+void fpu_raise_exception(void)
+{
+    if (env->cr[0] & CR0_NE_MASK) {
+        raise_exception(EXCP10_COPR);
+    }
+#if !defined(CONFIG_USER_ONLY)
+    else {
+        cpu_set_ferr(env);
+    }
+#endif
+}
+
+/* BCD ops */
+
+void helper_fbld_ST0_A0(void)
+{
+    CPU86_LDouble tmp;
+    uint64_t val;
+    unsigned int v;
+    int i;
+
+    val = 0;
+    for(i = 8; i >= 0; i--) {
+        v = ldub(A0 + i);
+        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
+    }
+    tmp = val;
+    if (ldub(A0 + 9) & 0x80)
+        tmp = -tmp;
+    fpush();
+    ST0 = tmp;
+}
+
+void helper_fbst_ST0_A0(void)
+{
+    int v;
+    target_ulong mem_ref, mem_end;
+    int64_t val;
+
+    val = floatx_to_int64(ST0, &env->fp_status);
+    mem_ref = A0;
+    mem_end = mem_ref + 9;
+    if (val < 0) {
+        stb(mem_end, 0x80);
+        val = -val;
+    } else {
+        stb(mem_end, 0x00);
+    }
+    while (mem_ref < mem_end) {
+        if (val == 0)
+            break;
+        v = val % 100;
+        val = val / 100;
+        v = ((v / 10) << 4) | (v % 10);
+        stb(mem_ref++, v);
+    }
+    while (mem_ref < mem_end) {
+        stb(mem_ref++, 0);
+    }
+}
+
+void helper_f2xm1(void)
+{
+    ST0 = pow(2.0,ST0) - 1.0;
+}
+
+void helper_fyl2x(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if (fptemp>0.0){
+        fptemp = log(fptemp)/log(2.0);	 /* log2(ST) */
+        ST1 *= fptemp;
+        fpop();
+    } else {
+        env->fpus &= (~0x4700);
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fptan(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = tan(fptemp);
+        fpush();
+        ST0 = 1.0;
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**52 only */
+    }
+}
+
+void helper_fpatan(void)
+{
+    CPU86_LDouble fptemp, fpsrcop;
+
+    fpsrcop = ST1;
+    fptemp = ST0;
+    ST1 = atan2(fpsrcop,fptemp);
+    fpop();
+}
+
+void helper_fxtract(void)
+{
+    CPU86_LDoubleU temp;
+    unsigned int expdif;
+
+    temp.d = ST0;
+    expdif = EXPD(temp) - EXPBIAS;
+    /*DP exponent bias*/
+    ST0 = expdif;
+    fpush();
+    BIASEXPONENT(temp);
+    ST0 = temp.d;
+}
+
+void helper_fprem1(void)
+{
+    CPU86_LDouble dblq, fpsrcop, fptemp;
+    CPU86_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    int q;
+
+    fpsrcop = ST0;
+    fptemp = ST1;
+    fpsrcop1.d = fpsrcop;
+    fptemp1.d = fptemp;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+    if (expdif < 53) {
+        dblq = fpsrcop / fptemp;
+        dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
+        ST0 = fpsrcop - fptemp*dblq;
+        q = (int)dblq; /* cutting off top bits is assumed here */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+				/* (C0,C1,C3) <-- (q2,q1,q0) */
+        env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
+        env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
+        env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
+    } else {
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, expdif-50);
+        fpsrcop = (ST0 / ST1) / fptemp;
+        /* fpsrcop = integer obtained by rounding to the nearest */
+        fpsrcop = (fpsrcop-floor(fpsrcop) < ceil(fpsrcop)-fpsrcop)?
+            floor(fpsrcop): ceil(fpsrcop);
+        ST0 -= (ST1 * fpsrcop * fptemp);
+    }
+}
+
+void helper_fprem(void)
+{
+    CPU86_LDouble dblq, fpsrcop, fptemp;
+    CPU86_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    int q;
+
+    fpsrcop = ST0;
+    fptemp = ST1;
+    fpsrcop1.d = fpsrcop;
+    fptemp1.d = fptemp;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+    if ( expdif < 53 ) {
+        dblq = fpsrcop / fptemp;
+        dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
+        ST0 = fpsrcop - fptemp*dblq;
+        q = (int)dblq; /* cutting off top bits is assumed here */
+        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+				/* (C0,C1,C3) <-- (q2,q1,q0) */
+        env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
+        env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
+        env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
+    } else {
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, expdif-50);
+        fpsrcop = (ST0 / ST1) / fptemp;
+        /* fpsrcop = integer obtained by chopping */
+        fpsrcop = (fpsrcop < 0.0)?
+            -(floor(fabs(fpsrcop))): floor(fpsrcop);
+        ST0 -= (ST1 * fpsrcop * fptemp);
+    }
+}
+
+void helper_fyl2xp1(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp+1.0)>0.0) {
+        fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
+        ST1 *= fptemp;
+        fpop();
+    } else {
+        env->fpus &= (~0x4700);
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fsqrt(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if (fptemp<0.0) {
+        env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
+        env->fpus |= 0x400;
+    }
+    ST0 = sqrt(fptemp);
+}
+
+void helper_fsincos(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = sin(fptemp);
+        fpush();
+        ST0 = cos(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**63 only */
+    }
+}
+
+void helper_frndint(void)
+{
+    ST0 = floatx_round_to_int(ST0, &env->fp_status);
+}
+
+void helper_fscale(void)
+{
+    ST0 = ldexp (ST0, (int)(ST1));
+}
+
+void helper_fsin(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = sin(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg| < 2**53 only */
+    }
+}
+
+void helper_fcos(void)
+{
+    CPU86_LDouble fptemp;
+
+    fptemp = ST0;
+    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = cos(fptemp);
+        env->fpus &= (~0x400);  /* C2 <-- 0 */
+        /* the above code is for  |arg5 < 2**63 only */
+    }
+}
+
+void helper_fxam_ST0(void)
+{
+    CPU86_LDoubleU temp;
+    int expdif;
+
+    temp.d = ST0;
+
+    env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
+    if (SIGND(temp))
+        env->fpus |= 0x200; /* C1 <-- 1 */
+
+    /* XXX: test fptags too */
+    expdif = EXPD(temp);
+    if (expdif == MAXEXPD) {
+#ifdef USE_X86LDOUBLE
+        if (MANTD(temp) == 0x8000000000000000ULL)
+#else
+        if (MANTD(temp) == 0)
+#endif
+            env->fpus |=  0x500 /*Infinity*/;
+        else
+            env->fpus |=  0x100 /*NaN*/;
+    } else if (expdif == 0) {
+        if (MANTD(temp) == 0)
+            env->fpus |=  0x4000 /*Zero*/;
+        else
+            env->fpus |= 0x4400 /*Denormal*/;
+    } else {
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fstenv(target_ulong ptr, int data32)
+{
+    int fpus, fptag, exp, i;
+    uint64_t mant;
+    CPU86_LDoubleU tmp;
+
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for (i=7; i>=0; i--) {
+	fptag <<= 2;
+	if (env->fptags[i]) {
+            fptag |= 3;
+	} else {
+            tmp.d = env->fpregs[i].d;
+            exp = EXPD(tmp);
+            mant = MANTD(tmp);
+            if (exp == 0 && mant == 0) {
+                /* zero */
+	        fptag |= 1;
+	    } else if (exp == 0 || exp == MAXEXPD
+#ifdef USE_X86LDOUBLE
+                       || (mant & (1LL << 63)) == 0
+#endif
+                       ) {
+                /* NaNs, infinity, denormal */
+                fptag |= 2;
+            }
+        }
+    }
+    if (data32) {
+        /* 32 bit */
+        stl(ptr, env->fpuc);
+        stl(ptr + 4, fpus);
+        stl(ptr + 8, fptag);
+        stl(ptr + 12, 0); /* fpip */
+        stl(ptr + 16, 0); /* fpcs */
+        stl(ptr + 20, 0); /* fpoo */
+        stl(ptr + 24, 0); /* fpos */
+    } else {
+        /* 16 bit */
+        stw(ptr, env->fpuc);
+        stw(ptr + 2, fpus);
+        stw(ptr + 4, fptag);
+        stw(ptr + 6, 0);
+        stw(ptr + 8, 0);
+        stw(ptr + 10, 0);
+        stw(ptr + 12, 0);
+    }
+}
+
+void helper_fldenv(target_ulong ptr, int data32)
+{
+    int i, fpus, fptag;
+
+    if (data32) {
+	env->fpuc = lduw(ptr);
+        fpus = lduw(ptr + 4);
+        fptag = lduw(ptr + 8);
+    }
+    else {
+	env->fpuc = lduw(ptr);
+        fpus = lduw(ptr + 2);
+        fptag = lduw(ptr + 4);
+    }
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    for(i = 0;i < 8; i++) {
+        env->fptags[i] = ((fptag & 3) == 3);
+        fptag >>= 2;
+    }
+}
+
+void helper_fsave(target_ulong ptr, int data32)
+{
+    CPU86_LDouble tmp;
+    int i;
+
+    helper_fstenv(ptr, data32);
+
+    ptr += (14 << data32);
+    for(i = 0;i < 8; i++) {
+        tmp = ST(i);
+        helper_fstt(tmp, ptr);
+        ptr += 10;
+    }
+
+    /* fninit */
+    env->fpus = 0;
+    env->fpstt = 0;
+    env->fpuc = 0x37f;
+    env->fptags[0] = 1;
+    env->fptags[1] = 1;
+    env->fptags[2] = 1;
+    env->fptags[3] = 1;
+    env->fptags[4] = 1;
+    env->fptags[5] = 1;
+    env->fptags[6] = 1;
+    env->fptags[7] = 1;
+}
+
+void helper_frstor(target_ulong ptr, int data32)
+{
+    CPU86_LDouble tmp;
+    int i;
+
+    helper_fldenv(ptr, data32);
+    ptr += (14 << data32);
+
+    for(i = 0;i < 8; i++) {
+        tmp = helper_fldt(ptr);
+        ST(i) = tmp;
+        ptr += 10;
+    }
+}
+
+void helper_fxsave(target_ulong ptr, int data64)
+{
+    int fpus, fptag, i, nb_xmm_regs;
+    CPU86_LDouble tmp;
+    target_ulong addr;
+
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for(i = 0; i < 8; i++) {
+        fptag |= (env->fptags[i] << i);
+    }
+    stw(ptr, env->fpuc);
+    stw(ptr + 2, fpus);
+    stw(ptr + 4, fptag ^ 0xff);
+
+    addr = ptr + 0x20;
+    for(i = 0;i < 8; i++) {
+        tmp = ST(i);
+        helper_fstt(tmp, addr);
+        addr += 16;
+    }
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        /* XXX: finish it */
+        stl(ptr + 0x18, env->mxcsr); /* mxcsr */
+        stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+        nb_xmm_regs = 8 << data64;
+        addr = ptr + 0xa0;
+        for(i = 0; i < nb_xmm_regs; i++) {
+            stq(addr, env->xmm_regs[i].XMM_Q(0));
+            stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
+            addr += 16;
+        }
+    }
+}
+
+void helper_fxrstor(target_ulong ptr, int data64)
+{
+    int i, fpus, fptag, nb_xmm_regs;
+    CPU86_LDouble tmp;
+    target_ulong addr;
+
+    env->fpuc = lduw(ptr);
+    fpus = lduw(ptr + 2);
+    fptag = lduw(ptr + 4);
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    fptag ^= 0xff;
+    for(i = 0;i < 8; i++) {
+        env->fptags[i] = ((fptag >> i) & 1);
+    }
+
+    addr = ptr + 0x20;
+    for(i = 0;i < 8; i++) {
+        tmp = helper_fldt(addr);
+        ST(i) = tmp;
+        addr += 16;
+    }
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        /* XXX: finish it */
+        env->mxcsr = ldl(ptr + 0x18);
+        //ldl(ptr + 0x1c);
+        nb_xmm_regs = 8 << data64;
+        addr = ptr + 0xa0;
+        for(i = 0; i < nb_xmm_regs; i++) {
+#if !defined(VBOX) || __GNUC__ < 4
+            env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+            env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */
+# if 1
+            env->xmm_regs[i].XMM_L(0) = ldl(addr);
+            env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
+            env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
+            env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
+# else
+			/* this works fine on Mac OS X, gcc 4.0.1 */
+            uint64_t u64 = ldq(addr);
+			env->xmm_regs[i].XMM_Q(0);
+            u64 = ldq(addr + 4);
+			env->xmm_regs[i].XMM_Q(1) = u64;
+# endif
+#endif
+            addr += 16;
+        }
+    }
+}
+
+#ifndef USE_X86LDOUBLE
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+{
+    CPU86_LDoubleU temp;
+    int e;
+
+    temp.d = f;
+    /* mantissa */
+    *pmant = (MANTD(temp) << 11) | (1LL << 63);
+    /* exponent + sign */
+    e = EXPD(temp) - EXPBIAS + 16383;
+    e |= SIGND(temp) >> 16;
+    *pexp = e;
+}
+
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+    CPU86_LDoubleU temp;
+    int e;
+    uint64_t ll;
+
+    /* XXX: handle overflow ? */
+    e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
+    e |= (upper >> 4) & 0x800; /* sign */
+    ll = (mant >> 11) & ((1LL << 52) - 1);
+#ifdef __arm__
+    temp.l.upper = (e << 20) | (ll >> 32);
+    temp.l.lower = ll;
+#else
+    temp.ll = ll | ((uint64_t)e << 52);
+#endif
+    return temp.d;
+}
+
+#else
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+{
+    CPU86_LDoubleU temp;
+
+    temp.d = f;
+    *pmant = temp.l.lower;
+    *pexp = temp.l.upper;
+}
+
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+    CPU86_LDoubleU temp;
+
+    temp.l.upper = upper;
+    temp.l.lower = mant;
+    return temp.d;
+}
+#endif
+
+#ifdef TARGET_X86_64
+
+//#define DEBUG_MULDIV
+
+static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+    *plow += a;
+    /* carry test */
+    if (*plow < a)
+        (*phigh)++;
+    *phigh += b;
+}
+
+static void neg128(uint64_t *plow, uint64_t *phigh)
+{
+    *plow = ~ *plow;
+    *phigh = ~ *phigh;
+    add128(plow, phigh, 1, 0);
+}
+
+static void mul64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+    uint32_t a0, a1, b0, b1;
+    uint64_t v;
+
+    a0 = a;
+    a1 = a >> 32;
+
+    b0 = b;
+    b1 = b >> 32;
+
+    v = (uint64_t)a0 * (uint64_t)b0;
+    *plow = v;
+    *phigh = 0;
+
+    v = (uint64_t)a0 * (uint64_t)b1;
+    add128(plow, phigh, v << 32, v >> 32);
+
+    v = (uint64_t)a1 * (uint64_t)b0;
+    add128(plow, phigh, v << 32, v >> 32);
+
+    v = (uint64_t)a1 * (uint64_t)b1;
+    *phigh += v;
+#ifdef DEBUG_MULDIV
+    printf("mul: 0x%016" PRIx64 " * 0x%016" PRIx64 " = 0x%016" PRIx64 "%016" PRIx64 "\n",
+           a, b, *phigh, *plow);
+#endif
+}
+
+static void imul64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
+{
+    int sa, sb;
+    sa = (a < 0);
+    if (sa)
+        a = -a;
+    sb = (b < 0);
+    if (sb)
+        b = -b;
+    mul64(plow, phigh, a, b);
+    if (sa ^ sb) {
+        neg128(plow, phigh);
+    }
+}
+
+/* return TRUE if overflow */
+static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b)
+{
+    uint64_t q, r, a1, a0;
+    int i, qb, ab;
+
+    a0 = *plow;
+    a1 = *phigh;
+    if (a1 == 0) {
+        q = a0 / b;
+        r = a0 % b;
+        *plow = q;
+        *phigh = r;
+    } else {
+        if (a1 >= b)
+            return 1;
+        /* XXX: use a better algorithm */
+        for(i = 0; i < 64; i++) {
+            ab = a1 >> 63;
+            a1 = (a1 << 1) | (a0 >> 63);
+            if (ab || a1 >= b) {
+                a1 -= b;
+                qb = 1;
+            } else {
+                qb = 0;
+            }
+            a0 = (a0 << 1) | qb;
+        }
+#if defined(DEBUG_MULDIV)
+        printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n",
+               *phigh, *plow, b, a0, a1);
+#endif
+        *plow = a0;
+        *phigh = a1;
+    }
+    return 0;
+}
+
+/* return TRUE if overflow */
+static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b)
+{
+    int sa, sb;
+    sa = ((int64_t)*phigh < 0);
+    if (sa)
+        neg128(plow, phigh);
+    sb = (b < 0);
+    if (sb)
+        b = -b;
+    if (div64(plow, phigh, b) != 0)
+        return 1;
+    if (sa ^ sb) {
+        if (*plow > (1ULL << 63))
+            return 1;
+        *plow = - *plow;
+    } else {
+        if (*plow >= (1ULL << 63))
+            return 1;
+    }
+    if (sa)
+        *phigh = - *phigh;
+    return 0;
+}
+
+void helper_mulq_EAX_T0(void)
+{
+    uint64_t r0, r1;
+
+    mul64(&r0, &r1, EAX, T0);
+    EAX = r0;
+    EDX = r1;
+    CC_DST = r0;
+    CC_SRC = r1;
+}
+
+void helper_imulq_EAX_T0(void)
+{
+    uint64_t r0, r1;
+
+    imul64(&r0, &r1, EAX, T0);
+    EAX = r0;
+    EDX = r1;
+    CC_DST = r0;
+    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+}
+
+void helper_imulq_T0_T1(void)
+{
+    uint64_t r0, r1;
+
+    imul64(&r0, &r1, T0, T1);
+    T0 = r0;
+    CC_DST = r0;
+    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+}
+
+void helper_divq_EAX_T0(void)
+{
+    uint64_t r0, r1;
+    if (T0 == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    r0 = EAX;
+    r1 = EDX;
+    if (div64(&r0, &r1, T0))
+        raise_exception(EXCP00_DIVZ);
+    EAX = r0;
+    EDX = r1;
+}
+
+void helper_idivq_EAX_T0(void)
+{
+    uint64_t r0, r1;
+    if (T0 == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    r0 = EAX;
+    r1 = EDX;
+    if (idiv64(&r0, &r1, T0))
+        raise_exception(EXCP00_DIVZ);
+    EAX = r0;
+    EDX = r1;
+}
+
+void helper_bswapq_T0(void)
+{
+    T0 = bswap64(T0);
+}
+#endif
+
+void helper_hlt(void)
+{
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+    env->hflags |= HF_HALTED_MASK;
+    env->exception_index = EXCP_HLT;
+    cpu_loop_exit();
+}
+
+void helper_monitor(void)
+{
+    if ((uint32_t)ECX != 0)
+        raise_exception(EXCP0D_GPF);
+    /* XXX: store address ? */
+}
+
+void helper_mwait(void)
+{
+    if ((uint32_t)ECX != 0)
+        raise_exception(EXCP0D_GPF);
+#ifdef VBOX
+    helper_hlt();
+#else
+    /* XXX: not complete but not completely erroneous */
+    if (env->cpu_index != 0 || env->next_cpu != NULL) {
+        /* more than one CPU: do not sleep because another CPU may
+           wake this one */
+    } else {
+        helper_hlt();
+    }
+#endif
+}
+
+float approx_rsqrt(float a)
+{
+    return 1.0 / sqrt(a);
+}
+
+float approx_rcp(float a)
+{
+    return 1.0 / a;
+}
+
+void update_fp_status(void)
+{
+    int rnd_type;
+
+    /* set rounding mode */
+    switch(env->fpuc & RC_MASK) {
+    default:
+    case RC_NEAR:
+        rnd_type = float_round_nearest_even;
+        break;
+    case RC_DOWN:
+        rnd_type = float_round_down;
+        break;
+    case RC_UP:
+        rnd_type = float_round_up;
+        break;
+    case RC_CHOP:
+        rnd_type = float_round_to_zero;
+        break;
+    }
+    set_float_rounding_mode(rnd_type, &env->fp_status);
+#ifdef FLOATX80
+    switch((env->fpuc >> 8) & 3) {
+    case 0:
+        rnd_type = 32;
+        break;
+    case 2:
+        rnd_type = 64;
+        break;
+    case 3:
+    default:
+        rnd_type = 80;
+        break;
+    }
+    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
+#endif
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#define MMUSUFFIX _mmu
+#define GETPC() (__builtin_return_address(0))
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+#endif
+
+/* try to fill the TLB and return an exception if error. If retaddr is
+   NULL, it means that the function was called in C code (i.e. not
+   from generated code or from helper.c) */
+/* XXX: fix it to restore all registers */
+void tlb_fill(target_ulong addr, int is_write, int is_user, void *retaddr)
+{
+    TranslationBlock *tb;
+    int ret;
+    unsigned long pc;
+    CPUX86State *saved_env;
+
+    /* XXX: hack to restore env in all cases, even if not called from
+       generated code */
+    saved_env = env;
+    env = cpu_single_env;
+
+    ret = cpu_x86_handle_mmu_fault(env, addr, is_write, is_user, 1);
+    if (ret) {
+        if (retaddr) {
+            /* now we have a real cpu fault */
+            pc = (unsigned long)retaddr;
+            tb = tb_find_pc(pc);
+            if (tb) {
+                /* the PC is inside the translated code. It means that we have
+                   a virtual CPU fault */
+                cpu_restore_state(tb, env, pc, NULL);
+            }
+        }
+        if (retaddr)
+            raise_exception_err(env->exception_index, env->error_code);
+        else
+            raise_exception_err_norestore(env->exception_index, env->error_code);
+    }
+    env = saved_env;
+}
+
+#ifdef VBOX
+
+/**
+ * Correctly computes the eflags.
+ * @returns eflags.
+ * @param   env1    CPU environment.
+ */
+uint32_t raw_compute_eflags(CPUX86State *env1)
+{
+    CPUX86State *savedenv = env;
+    env = env1;
+    uint32_t efl = compute_eflags();
+    env = savedenv;
+    return efl;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns readed data byte.
+ * @param   env1    CPU environment.
+ * @param   pvAddr  GC Virtual address.
+ */
+uint8_t read_byte(CPUX86State *env1, target_ulong addr)
+{
+    CPUX86State *savedenv = env;
+    env = env1;
+    uint8_t u8 = ldub_kernel(addr);
+    env = savedenv;
+    return u8;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns readed data byte.
+ * @param   env1    CPU environment.
+ * @param   pvAddr  GC Virtual address.
+ */
+uint16_t read_word(CPUX86State *env1, target_ulong addr)
+{
+    CPUX86State *savedenv = env;
+    env = env1;
+    uint16_t u16 = lduw_kernel(addr);
+    env = savedenv;
+    return u16;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns readed data byte.
+ * @param   env1    CPU environment.
+ * @param   pvAddr  GC Virtual address.
+ */
+uint32_t read_dword(CPUX86State *env1, target_ulong addr)
+{
+    CPUX86State *savedenv = env;
+    env = env1;
+    uint32_t u32 = ldl_kernel(addr);
+    env = savedenv;
+    return u32;
+}
+
+/**
+ * Writes byte to virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns readed data byte.
+ * @param   env1    CPU environment.
+ * @param   pvAddr  GC Virtual address.
+ * @param   val     byte value
+ */
+void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val)
+{
+    CPUX86State *savedenv = env;
+    env = env1;
+    stb(addr, val);
+    env = savedenv;
+}
+
+void write_word(CPUX86State *env1, target_ulong addr, uint16_t val)
+{
+    CPUX86State *savedenv = env;
+    env = env1;
+    stw(addr, val);
+    env = savedenv;
+}
+
+void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val)
+{
+    CPUX86State *savedenv = env;
+    env = env1;
+    stl(addr, val);
+    env = savedenv;
+}
+
+/**
+ * Correctly loads selector into segment register with updating internal
+ * qemu data/caches.
+ * @param   env1        CPU environment.
+ * @param   seg_reg     Segment register.
+ * @param   selector    Selector to load.
+ */
+void sync_seg(CPUX86State *env1, int seg_reg, int selector)
+{
+    CPUX86State *savedenv = env;
+    env = env1;
+
+    if (    env->eflags & X86_EFL_VM
+        ||  !(env->cr[0] & X86_CR0_PE))
+    {
+        load_seg_vm(seg_reg, selector);
+
+        env = savedenv;
+
+        /* Successful sync. */
+        env1->segs[seg_reg].newselector = 0;
+    }
+    else
+    {
+        if (setjmp(env1->jmp_env) == 0)
+        {
+            if (seg_reg == R_CS)
+            {
+                uint32_t e1, e2;
+                load_segment(&e1, &e2, selector);
+                cpu_x86_load_seg_cache(env, R_CS, selector,
+                               get_seg_base(e1, e2),
+                               get_seg_limit(e1, e2),
+                               e2);
+            }
+            else
+                load_seg(seg_reg, selector);
+            env = savedenv;
+
+            /* Successful sync. */
+            env1->segs[seg_reg].newselector = 0;
+        }
+        else
+        {
+            env = savedenv;
+
+            /* Postpone sync until the guest uses the selector. */
+            env1->segs[seg_reg].selector    = selector;     /* hidden values are now incorrect, but will be resynced when this register is accessed. */
+            env1->segs[seg_reg].newselector = selector;
+            Log(("sync_seg: out of sync seg_reg=%d selector=%#x\n", seg_reg, selector));
+        }
+    }
+
+}
+
+
+/**
+ * Correctly loads a new ldtr selector.
+ *
+ * @param   env1        CPU environment.
+ * @param   selector    Selector to load.
+ */
+void sync_ldtr(CPUX86State *env1, int selector)
+{
+    CPUX86State *saved_env = env;
+    target_ulong saved_T0 = T0;
+    if (setjmp(env1->jmp_env) == 0)
+    {
+        env = env1;
+        T0 = selector;
+        helper_lldt_T0();
+        T0 = saved_T0;
+        env = saved_env;
+    }
+    else
+    {
+        T0 = saved_T0;
+        env = saved_env;
+#ifdef VBOX_STRICT
+        cpu_abort(env1, "sync_ldtr: selector=%#x\n", selector);
+#endif
+    }
+}
+
+/**
+ * Correctly loads a new tr selector.
+ *
+ * @param   env1        CPU environment.
+ * @param   selector    Selector to load.
+ */
+int sync_tr(CPUX86State *env1, int selector)
+{
+    /* ARG! this was going to call helper_ltr_T0 but that won't work because of busy flag. */
+    SegmentCache *dt;
+    uint32_t e1, e2;
+    int index, type, entry_limit;
+    target_ulong ptr;
+    CPUX86State *saved_env = env;
+    env = env1;
+
+    selector &= 0xffff;
+    if ((selector & 0xfffc) == 0) {
+        /* NULL selector case: invalid TR */
+        env->tr.base = 0;
+        env->tr.limit = 0;
+        env->tr.flags = 0;
+    } else {
+        if (selector & 0x4)
+            goto l_failure;
+        dt = &env->gdt;
+        index = selector & ~7;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK)
+            entry_limit = 15;
+        else
+#endif
+            entry_limit = 7;
+        if ((index + entry_limit) > dt->limit)
+            goto l_failure;
+        ptr = dt->base + index;
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
+        type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+        if ((e2 & DESC_S_MASK) /*||
+            (type != 1 && type != 9)*/)
+            goto l_failure;
+        if (!(e2 & DESC_P_MASK))
+            goto l_failure;
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t e3;
+            e3 = ldl_kernel(ptr + 8);
+            load_seg_cache_raw_dt(&env->tr, e1, e2);
+            env->tr.base |= (target_ulong)e3 << 32;
+        } else
+#endif
+        {
+            load_seg_cache_raw_dt(&env->tr, e1, e2);
+        }
+        e2 |= DESC_TSS_BUSY_MASK;
+        stl_kernel(ptr + 4, e2);
+    }
+    env->tr.selector = selector;
+
+    env = saved_env;
+    return 0;
+l_failure:
+    AssertMsgFailed(("selector=%d\n", selector));
+    return -1;
+}
+
+int emulate_single_instr(CPUX86State *env1)
+{
+#if 1 /* single stepping is broken when using a static tb... feel free to figure out why. :-) */
+    /* This has to be static because it needs to be addressible
+       using 32-bit immediate addresses on 64-bit machines. This
+       is dictated by the gcc code model used when building this
+       module / op.o. Using a static here pushes the problem
+       onto the module loader. */
+    static TranslationBlock tb_temp;
+#endif
+    TranslationBlock *tb;
+    TranslationBlock *current;
+    int csize;
+    void (*gen_func)(void);
+    uint8_t *tc_ptr;
+    target_ulong old_eip;
+
+    /* ensures env is loaded in ebp! */
+    CPUX86State *savedenv = env;
+    env = env1;
+
+    RAWEx_ProfileStart(env, STATS_EMULATE_SINGLE_INSTR);
+
+#if 1 /* see above */
+    tc_ptr = env->pvCodeBuffer;
+#else
+    tc_ptr = code_gen_ptr;
+#endif
+
+    /*
+     * Setup temporary translation block.
+     */
+    /* tb_alloc: */
+#if 1 /* see above */
+    tb = &tb_temp;
+    tb->pc = env->segs[R_CS].base + env->eip;
+    tb->cflags = 0;
+#else
+    tb = tb_alloc(env->segs[R_CS].base + env->eip);
+    if (!tb)
+    {
+        tb_flush(env);
+        tb = tb_alloc(env->segs[R_CS].base + env->eip);
+    }
+#endif
+
+    /* tb_find_slow: */
+    tb->tc_ptr = tc_ptr;
+    tb->cs_base = env->segs[R_CS].base;
+    tb->flags = env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
+
+    /* Initialize the rest with sensible values. */
+    tb->size = 0;
+    tb->phys_hash_next = NULL;
+    tb->page_next[0] = NULL;
+    tb->page_next[1] = NULL;
+    tb->page_addr[0] = 0;
+    tb->page_addr[1] = 0;
+    tb->tb_next_offset[0] = 0xffff;
+    tb->tb_next_offset[1] = 0xffff;
+    tb->tb_next[0] = 0xffff;
+    tb->tb_next[1] = 0xffff;
+    tb->jmp_next[0] = NULL;
+    tb->jmp_next[1] = NULL;
+    tb->jmp_first = NULL;
+
+    current = env->current_tb;
+    env->current_tb = NULL;
+
+    /*
+     * Translate only one instruction.
+     */
+    ASMAtomicOrU32(&env->state, CPU_EMULATE_SINGLE_INSTR);
+    if (cpu_gen_code(env, tb, env->cbCodeBuffer, &csize) < 0)
+    {
+        AssertFailed();
+        RAWEx_ProfileStop(env, STATS_EMULATE_SINGLE_INSTR);
+        ASMAtomicAndU32(&env->state, ~CPU_EMULATE_SINGLE_INSTR);
+        env = savedenv;
+        return -1;
+    }
+#ifdef DEBUG
+    if(csize > env->cbCodeBuffer)
+    {
+        RAWEx_ProfileStop(env, STATS_EMULATE_SINGLE_INSTR);
+        AssertFailed();
+        ASMAtomicAndU32(&env->state, ~CPU_EMULATE_SINGLE_INSTR);
+        env = savedenv;
+        return -1;
+    }
+    if (tb->tc_ptr != tc_ptr)
+    {
+        RAWEx_ProfileStop(env, STATS_EMULATE_SINGLE_INSTR);
+        AssertFailed();
+        ASMAtomicAndU32(&env->state, ~CPU_EMULATE_SINGLE_INSTR);
+        env = savedenv;
+        return -1;
+    }
+#endif
+    ASMAtomicAndU32(&env->state, ~CPU_EMULATE_SINGLE_INSTR);
+
+    /* tb_link_phys: */
+    tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2);
+    Assert(tb->jmp_next[0] == NULL); Assert(tb->jmp_next[1] == NULL);
+    if (tb->tb_next_offset[0] != 0xffff)
+        tb_set_jmp_target(tb, 0, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[0]));
+    if (tb->tb_next_offset[1] != 0xffff)
+        tb_set_jmp_target(tb, 1, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[1]));
+
+    /*
+     * Execute it using emulation
+     */
+    old_eip = env->eip;
+    gen_func = (void *)tb->tc_ptr;
+    env->current_tb = tb;
+
+    // eip remains the same for repeated instructions; no idea why qemu doesn't do a jump inside the generated code
+    // perhaps not a very safe hack
+    while(old_eip == env->eip)
+    {
+        gen_func();
+        /*
+         * Exit once we detect an external interrupt and interrupts are enabled
+         */
+        if( (env->interrupt_request & (CPU_INTERRUPT_EXTERNAL_EXIT|CPU_INTERRUPT_EXTERNAL_TIMER)) ||
+            ( (env->eflags & IF_MASK) &&
+             !(env->hflags & HF_INHIBIT_IRQ_MASK) &&
+              (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) ) )
+        {
+            break;
+        }
+    }
+    env->current_tb = current;
+
+    Assert(tb->phys_hash_next == NULL);
+    Assert(tb->page_next[0] == NULL);
+    Assert(tb->page_next[1] == NULL);
+    Assert(tb->page_addr[0] == 0);
+    Assert(tb->page_addr[1] == 0);
+/*
+    Assert(tb->tb_next_offset[0] == 0xffff);
+    Assert(tb->tb_next_offset[1] == 0xffff);
+    Assert(tb->tb_next[0] == 0xffff);
+    Assert(tb->tb_next[1] == 0xffff);
+    Assert(tb->jmp_next[0] == NULL);
+    Assert(tb->jmp_next[1] == NULL);
+    Assert(tb->jmp_first == NULL); */
+
+    RAWEx_ProfileStop(env, STATS_EMULATE_SINGLE_INSTR);
+
+    /*
+     * Execute the next instruction when we encounter instruction fusing.
+     */
+    if (env->hflags & HF_INHIBIT_IRQ_MASK)
+    {
+        Log(("REM: Emulating next instruction due to instruction fusing (HF_INHIBIT_IRQ_MASK) at %VGv\n", env->eip));
+        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+        emulate_single_instr(env);
+    }
+
+    env = savedenv;
+    return 0;
+}
+
+int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr,
+                             uint32_t *esp_ptr, int dpl)
+{
+    int type, index, shift;
+
+    CPUX86State *savedenv = env;
+    env = env1;
+
+    if (!(env->tr.flags & DESC_P_MASK))
+        cpu_abort(env, "invalid tss");
+    type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+    if ((type & 7) != 1)
+        cpu_abort(env, "invalid tss type %d", type);
+    shift = type >> 3;
+    index = (dpl * 4 + 2) << shift;
+    if (index + (4 << shift) - 1 > env->tr.limit)
+    {
+        env = savedenv;
+        return 0;
+    }
+        //raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+
+    if (shift == 0) {
+        *esp_ptr = lduw_kernel(env->tr.base + index);
+        *ss_ptr = lduw_kernel(env->tr.base + index + 2);
+    } else {
+        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *ss_ptr = lduw_kernel(env->tr.base + index + 4);
+    }
+
+    env = savedenv;
+    return 1;
+}
+
+//*****************************************************************************
+// Needs to be at the bottom of the file (overriding macros)
+
+static inline CPU86_LDouble helper_fldt_raw(uint8_t *ptr)
+{
+    return *(CPU86_LDouble *)ptr;
+}
+
+static inline void helper_fstt_raw(CPU86_LDouble f, uint8_t *ptr)
+{
+    *(CPU86_LDouble *)ptr = f;
+}
+
+#undef stw
+#undef stl
+#undef stq
+#define stw(a,b) *(uint16_t *)(a) = (uint16_t)(b)
+#define stl(a,b) *(uint32_t *)(a) = (uint32_t)(b)
+#define stq(a,b) *(uint64_t *)(a) = (uint64_t)(b)
+#define data64 0
+
+//*****************************************************************************
+void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr)
+{
+    int fpus, fptag, i, nb_xmm_regs;
+    CPU86_LDouble tmp;
+    uint8_t *addr;
+
+    if (env->cpuid_features & CPUID_FXSR)
+    {
+        fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+        fptag = 0;
+        for(i = 0; i < 8; i++) {
+            fptag |= (env->fptags[i] << i);
+        }
+        stw(ptr, env->fpuc);
+        stw(ptr + 2, fpus);
+        stw(ptr + 4, fptag ^ 0xff);
+
+        addr = ptr + 0x20;
+        for(i = 0;i < 8; i++) {
+            tmp = ST(i);
+            helper_fstt_raw(tmp, addr);
+            addr += 16;
+        }
+
+        if (env->cr[4] & CR4_OSFXSR_MASK) {
+            /* XXX: finish it */
+            stl(ptr + 0x18, env->mxcsr); /* mxcsr */
+            stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+            nb_xmm_regs = 8 << data64;
+            addr = ptr + 0xa0;
+            for(i = 0; i < nb_xmm_regs; i++) {
+#if __GNUC__ < 4
+                stq(addr, env->xmm_regs[i].XMM_Q(0));
+                stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
+#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */
+                stl(addr, env->xmm_regs[i].XMM_L(0));
+                stl(addr + 4, env->xmm_regs[i].XMM_L(1));
+                stl(addr + 8, env->xmm_regs[i].XMM_L(2));
+                stl(addr + 12, env->xmm_regs[i].XMM_L(3));
+#endif
+                addr += 16;
+            }
+        }
+    }
+    else
+    {
+        PX86FPUSTATE fp = (PX86FPUSTATE)ptr;
+        int fptag;
+
+        fp->FCW = env->fpuc;
+        fp->FSW = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+        fptag = 0;
+        for (i=7; i>=0; i--) {
+	        fptag <<= 2;
+	        if (env->fptags[i]) {
+                fptag |= 3;
+            } else {
+                /* the FPU automatically computes it */
+            }
+        }
+        fp->FTW = fptag;
+
+        for(i = 0;i < 8; i++) {
+            tmp = ST(i);
+            helper_fstt_raw(tmp, &fp->regs[i].reg[0]);
+        }
+    }
+}
+
+//*****************************************************************************
+#undef lduw
+#undef ldl
+#undef ldq
+#define lduw(a) *(uint16_t *)(a)
+#define ldl(a)  *(uint32_t *)(a)
+#define ldq(a)  *(uint64_t *)(a)
+//*****************************************************************************
+void save_raw_fp_state(CPUX86State *env, uint8_t *ptr)
+{
+    int i, fpus, fptag, nb_xmm_regs;
+    CPU86_LDouble tmp;
+    uint8_t *addr;
+
+    if (env->cpuid_features & CPUID_FXSR)
+    {
+        env->fpuc = lduw(ptr);
+        fpus = lduw(ptr + 2);
+        fptag = lduw(ptr + 4);
+        env->fpstt = (fpus >> 11) & 7;
+        env->fpus = fpus & ~0x3800;
+        fptag ^= 0xff;
+        for(i = 0;i < 8; i++) {
+            env->fptags[i] = ((fptag >> i) & 1);
+        }
+
+        addr = ptr + 0x20;
+        for(i = 0;i < 8; i++) {
+            tmp = helper_fldt_raw(addr);
+            ST(i) = tmp;
+            addr += 16;
+        }
+
+        if (env->cr[4] & CR4_OSFXSR_MASK) {
+            /* XXX: finish it, endianness */
+            env->mxcsr = ldl(ptr + 0x18);
+            //ldl(ptr + 0x1c);
+            nb_xmm_regs = 8 << data64;
+            addr = ptr + 0xa0;
+            for(i = 0; i < nb_xmm_regs; i++) {
+#if HC_ARCH_BITS == 32
+                /* this is a workaround for http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35135 */
+                env->xmm_regs[i].XMM_L(0) = ldl(addr);
+                env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
+                env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
+                env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
+#else
+                env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+                env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+#endif
+                addr += 16;
+            }
+        }
+    }
+    else
+    {
+        PX86FPUSTATE fp = (PX86FPUSTATE)ptr;
+        int fptag, j;
+
+        env->fpuc = fp->FCW;
+        env->fpstt = (fp->FSW >> 11) & 7;
+        env->fpus = fp->FSW & ~0x3800;
+        fptag = fp->FTW;
+        for(i = 0;i < 8; i++) {
+            env->fptags[i] = ((fptag & 3) == 3);
+            fptag >>= 2;
+        }
+        j = env->fpstt;
+        for(i = 0;i < 8; i++) {
+            tmp = helper_fldt_raw(&fp->regs[i].reg[0]);
+            ST(i) = tmp;
+        }
+    }
+}
+//*****************************************************************************
+//*****************************************************************************
+
+#endif /* VBOX */
+
Index: /trunk/src/recompiler_new/target-i386/helper2.c
===================================================================
--- /trunk/src/recompiler_new/target-i386/helper2.c	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/helper2.c	(revision 13168)
@@ -0,0 +1,1077 @@
+/*
+ *  i386 helpers (without register variable usage)
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#ifndef VBOX
+#include <signal.h>
+#include <assert.h>
+#else
+# include <VBox/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */
+#endif
+
+#include "cpu.h"
+#include "exec-all.h"
+
+//#define DEBUG_MMU
+
+#ifdef USE_CODE_COPY
+#include <asm/ldt.h>
+#include <linux/unistd.h>
+#include <linux/version.h>
+
+int modify_ldt(int func, void *ptr, unsigned long bytecount)
+{
+	return syscall(__NR_modify_ldt, func, ptr, bytecount);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66)
+#define modify_ldt_ldt_s user_desc
+#endif
+#endif /* USE_CODE_COPY */
+
+#ifdef VBOX
+CPUX86State *cpu_x86_init(CPUX86State *env)
+{
+#else  /* !VBOX */
+CPUX86State *cpu_x86_init(void)
+{
+    CPUX86State *env;
+#endif /* !VBOX */
+    static int inited;
+
+#ifndef VBOX
+    env = qemu_mallocz(sizeof(CPUX86State));
+    if (!env)
+        return NULL;
+#endif /* !VBOX */
+    cpu_exec_init(env);
+
+    /* init various static tables */
+    if (!inited) {
+        inited = 1;
+        optimize_flags_init();
+    }
+#ifdef USE_CODE_COPY
+    /* testing code for code copy case */
+    {
+        struct modify_ldt_ldt_s ldt;
+
+        ldt.entry_number = 1;
+        ldt.base_addr = (unsigned long)env;
+        ldt.limit = (sizeof(CPUState) + 0xfff) >> 12;
+        ldt.seg_32bit = 1;
+        ldt.contents = MODIFY_LDT_CONTENTS_DATA;
+        ldt.read_exec_only = 0;
+        ldt.limit_in_pages = 1;
+        ldt.seg_not_present = 0;
+        ldt.useable = 1;
+        modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+        
+        asm volatile ("movl %0, %%fs" : : "r" ((1 << 3) | 7));
+    }
+#endif
+#ifndef VBOX /* cpuid_features is initialized by caller */
+    {
+        int family, model, stepping;
+#ifdef TARGET_X86_64
+        env->cpuid_vendor1 = 0x68747541; /* "Auth" */
+        env->cpuid_vendor2 = 0x69746e65; /* "enti" */
+        env->cpuid_vendor3 = 0x444d4163; /* "cAMD" */
+        family = 6;
+        model = 2;
+        stepping = 3;
+#else
+        env->cpuid_vendor1 = 0x756e6547; /* "Genu" */
+        env->cpuid_vendor2 = 0x49656e69; /* "ineI" */
+        env->cpuid_vendor3 = 0x6c65746e; /* "ntel" */
+#if 0
+        /* pentium 75-200 */
+        family = 5;
+        model = 2;
+        stepping = 11;
+#else
+        /* pentium pro */
+        family = 6;
+        model = 3;
+        stepping = 3;
+#endif
+#endif
+        env->cpuid_level = 2;
+        env->cpuid_version = (family << 8) | (model << 4) | stepping;
+        env->cpuid_features = (CPUID_FP87 | CPUID_DE | CPUID_PSE |
+                               CPUID_TSC | CPUID_MSR | CPUID_MCE |
+                               CPUID_CX8 | CPUID_PGE | CPUID_CMOV |
+                               CPUID_PAT);
+        env->pat = 0x0007040600070406ULL;
+        env->cpuid_ext_features = CPUID_EXT_SSE3;
+        env->cpuid_features |= CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_PAE | CPUID_SEP;
+        env->cpuid_features |= CPUID_APIC;
+        env->cpuid_xlevel = 0;
+        {
+            const char *model_id = "QEMU Virtual CPU version " QEMU_VERSION;
+            int c, len, i;
+            len = strlen(model_id);
+            for(i = 0; i < 48; i++) {
+                if (i >= len)
+                    c = '\0';
+                else
+                    c = model_id[i];
+                env->cpuid_model[i >> 2] |= c << (8 * (i & 3));
+            }
+        }
+#ifdef TARGET_X86_64
+        /* currently not enabled for std i386 because not fully tested */
+        env->cpuid_ext2_features = (env->cpuid_features & 0x0183F3FF);
+        env->cpuid_ext2_features |= CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX;
+        env->cpuid_xlevel = 0x80000008;
+
+        /* these features are needed for Win64 and aren't fully implemented */
+        env->cpuid_features |= CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA;
+        /* this feature is needed for Solaris and isn't fully implemented */
+        env->cpuid_features |= CPUID_PSE36;
+#endif
+    }
+#endif /* VBOX */
+    cpu_reset(env);
+#ifdef USE_KQEMU
+    kqemu_init(env);
+#endif
+    return env;
+}
+
+/* NOTE: must be called outside the CPU execute loop */
+void cpu_reset(CPUX86State *env)
+{
+    int i;
+
+    memset(env, 0, offsetof(CPUX86State, breakpoints));
+
+    tlb_flush(env, 1);
+
+    /* init to reset state */
+
+#ifdef CONFIG_SOFTMMU
+    env->hflags |= HF_SOFTMMU_MASK;
+#endif
+
+    cpu_x86_update_cr0(env, 0x60000010);
+    env->a20_mask = 0xffffffff;
+    env->smbase = 0x30000;
+
+    env->idt.limit = 0xffff;
+    env->gdt.limit = 0xffff;
+    env->ldt.limit = 0xffff;
+    env->ldt.flags = DESC_P_MASK;
+    env->tr.limit = 0xffff;
+    env->tr.flags = DESC_P_MASK;
+    
+    cpu_x86_load_seg_cache(env, R_CS, 0xf000, 0xffff0000, 0xffff, 0); 
+    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffff, 0);
+    
+    env->eip = 0xfff0;
+    env->regs[R_EDX] = 0x600; /* indicate P6 processor */
+    
+    env->eflags = 0x2;
+    
+    /* FPU init */
+    for(i = 0;i < 8; i++)
+        env->fptags[i] = 1;
+    env->fpuc = 0x37f;
+
+    env->mxcsr = 0x1f80;
+}
+
+#ifndef VBOX
+void cpu_x86_close(CPUX86State *env)
+{
+    free(env);
+}
+#endif
+
+/***********************************************************/
+/* x86 debug */
+
+static const char *cc_op_str[] = {
+    "DYNAMIC",
+    "EFLAGS",
+
+    "MULB",
+    "MULW",
+    "MULL",
+    "MULQ",
+
+    "ADDB",
+    "ADDW",
+    "ADDL",
+    "ADDQ",
+
+    "ADCB",
+    "ADCW",
+    "ADCL",
+    "ADCQ",
+
+    "SUBB",
+    "SUBW",
+    "SUBL",
+    "SUBQ",
+
+    "SBBB",
+    "SBBW",
+    "SBBL",
+    "SBBQ",
+
+    "LOGICB",
+    "LOGICW",
+    "LOGICL",
+    "LOGICQ",
+
+    "INCB",
+    "INCW",
+    "INCL",
+    "INCQ",
+
+    "DECB",
+    "DECW",
+    "DECL",
+    "DECQ",
+
+    "SHLB",
+    "SHLW",
+    "SHLL",
+    "SHLQ",
+
+    "SARB",
+    "SARW",
+    "SARL",
+    "SARQ",
+};
+
+void cpu_dump_state(CPUState *env, FILE *f, 
+                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+                    int flags)
+{
+    int eflags, i, nb;
+    char cc_op_name[32];
+    static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
+
+    eflags = env->eflags;
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_CS64_MASK) {
+        cpu_fprintf(f, 
+                    "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n"
+                    "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n"
+                    "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n"
+                    "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n"
+                    "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
+                    env->regs[R_EAX], 
+                    env->regs[R_EBX], 
+                    env->regs[R_ECX], 
+                    env->regs[R_EDX], 
+                    env->regs[R_ESI], 
+                    env->regs[R_EDI], 
+                    env->regs[R_EBP], 
+                    env->regs[R_ESP], 
+                    env->regs[8], 
+                    env->regs[9], 
+                    env->regs[10], 
+                    env->regs[11], 
+                    env->regs[12], 
+                    env->regs[13], 
+                    env->regs[14], 
+                    env->regs[15], 
+                    env->eip, eflags,
+                    eflags & DF_MASK ? 'D' : '-',
+                    eflags & CC_O ? 'O' : '-',
+                    eflags & CC_S ? 'S' : '-',
+                    eflags & CC_Z ? 'Z' : '-',
+                    eflags & CC_A ? 'A' : '-',
+                    eflags & CC_P ? 'P' : '-',
+                    eflags & CC_C ? 'C' : '-',
+                    env->hflags & HF_CPL_MASK, 
+                    (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
+                    (env->a20_mask >> 20) & 1,
+                    (env->hflags >> HF_SMM_SHIFT) & 1,
+                    (env->hflags >> HF_HALTED_SHIFT) & 1);
+    } else 
+#endif
+    {
+        cpu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n"
+                    "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n"
+                    "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
+                    (uint32_t)env->regs[R_EAX], 
+                    (uint32_t)env->regs[R_EBX], 
+                    (uint32_t)env->regs[R_ECX], 
+                    (uint32_t)env->regs[R_EDX], 
+                    (uint32_t)env->regs[R_ESI], 
+                    (uint32_t)env->regs[R_EDI], 
+                    (uint32_t)env->regs[R_EBP], 
+                    (uint32_t)env->regs[R_ESP], 
+                    (uint32_t)env->eip, eflags,
+                    eflags & DF_MASK ? 'D' : '-',
+                    eflags & CC_O ? 'O' : '-',
+                    eflags & CC_S ? 'S' : '-',
+                    eflags & CC_Z ? 'Z' : '-',
+                    eflags & CC_A ? 'A' : '-',
+                    eflags & CC_P ? 'P' : '-',
+                    eflags & CC_C ? 'C' : '-',
+                    env->hflags & HF_CPL_MASK, 
+                    (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
+                    (env->a20_mask >> 20) & 1,
+                    (env->hflags >> HF_SMM_SHIFT) & 1,
+                    (env->hflags >> HF_HALTED_SHIFT) & 1);
+    }
+
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_LMA_MASK) {
+        for(i = 0; i < 6; i++) {
+            SegmentCache *sc = &env->segs[i];
+            cpu_fprintf(f, "%s =%04x %016" PRIx64 " %08x %08x\n",
+                        seg_name[i],
+                        sc->selector,
+                        sc->base,
+                        sc->limit,
+                        sc->flags);
+        }
+        cpu_fprintf(f, "LDT=%04x %016" PRIx64 " %08x %08x\n",
+                    env->ldt.selector,
+                    env->ldt.base,
+                    env->ldt.limit,
+                    env->ldt.flags);
+        cpu_fprintf(f, "TR =%04x %016" PRIx64 " %08x %08x\n",
+                    env->tr.selector,
+                    env->tr.base,
+                    env->tr.limit,
+                    env->tr.flags);
+        cpu_fprintf(f, "GDT=     %016" PRIx64 " %08x\n",
+                    env->gdt.base, env->gdt.limit);
+        cpu_fprintf(f, "IDT=     %016" PRIx64 " %08x\n",
+                    env->idt.base, env->idt.limit);
+        cpu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n",
+                    (uint32_t)env->cr[0], 
+                    env->cr[2], 
+                    env->cr[3], 
+                    (uint32_t)env->cr[4]);
+    } else
+#endif
+    {
+        for(i = 0; i < 6; i++) {
+            SegmentCache *sc = &env->segs[i];
+            cpu_fprintf(f, "%s =%04x %08x %08x %08x\n",
+                        seg_name[i],
+                        sc->selector,
+                        (uint32_t)sc->base,
+                        sc->limit,
+                        sc->flags);
+        }
+        cpu_fprintf(f, "LDT=%04x %08x %08x %08x\n",
+                    env->ldt.selector,
+                    (uint32_t)env->ldt.base,
+                    env->ldt.limit,
+                    env->ldt.flags);
+        cpu_fprintf(f, "TR =%04x %08x %08x %08x\n",
+                    env->tr.selector,
+                    (uint32_t)env->tr.base,
+                    env->tr.limit,
+                    env->tr.flags);
+        cpu_fprintf(f, "GDT=     %08x %08x\n",
+                    (uint32_t)env->gdt.base, env->gdt.limit);
+        cpu_fprintf(f, "IDT=     %08x %08x\n",
+                    (uint32_t)env->idt.base, env->idt.limit);
+        cpu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n",
+                    (uint32_t)env->cr[0], 
+                    (uint32_t)env->cr[2], 
+                    (uint32_t)env->cr[3], 
+                    (uint32_t)env->cr[4]);
+    }
+    if (flags & X86_DUMP_CCOP) {
+        if ((unsigned)env->cc_op < CC_OP_NB)
+            qemu_snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]);
+        else
+            qemu_snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op);
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_CS64_MASK) {
+            cpu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n",
+                        env->cc_src, env->cc_dst, 
+                        cc_op_name);
+        } else 
+#endif
+        {
+            cpu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n",
+                        (uint32_t)env->cc_src, (uint32_t)env->cc_dst, 
+                        cc_op_name);
+        }
+    }
+    if (flags & X86_DUMP_FPU) {
+        int fptag;
+        fptag = 0;
+        for(i = 0; i < 8; i++) {
+            fptag |= ((!env->fptags[i]) << i);
+        }
+        cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n",
+                    env->fpuc,
+                    (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11,
+                    env->fpstt,
+                    fptag,
+                    env->mxcsr);
+        for(i=0;i<8;i++) {
+#if defined(USE_X86LDOUBLE)
+            union {
+                long double d;
+                struct {
+                    uint64_t lower;
+                    uint16_t upper;
+                } l;
+            } tmp;
+            tmp.d = env->fpregs[i].d;
+            cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x",
+                        i, tmp.l.lower, tmp.l.upper);
+#else
+            cpu_fprintf(f, "FPR%d=%016" PRIx64,
+                        i, env->fpregs[i].mmx.q);
+#endif
+            if ((i & 1) == 1)
+                cpu_fprintf(f, "\n");
+            else
+                cpu_fprintf(f, " ");
+        }
+        if (env->hflags & HF_CS64_MASK) 
+            nb = 16;
+        else
+            nb = 8;
+        for(i=0;i<nb;i++) {
+            cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
+                        i, 
+                        env->xmm_regs[i].XMM_L(3),
+                        env->xmm_regs[i].XMM_L(2),
+                        env->xmm_regs[i].XMM_L(1),
+                        env->xmm_regs[i].XMM_L(0));
+            if ((i & 1) == 1)
+                cpu_fprintf(f, "\n");
+            else
+                cpu_fprintf(f, " ");
+        }
+    }
+}
+
+/***********************************************************/
+/* x86 mmu */
+/* XXX: add PGE support */
+
+void cpu_x86_set_a20(CPUX86State *env, int a20_state)
+{
+    a20_state = (a20_state != 0);
+    if (a20_state != ((env->a20_mask >> 20) & 1)) {
+#if defined(DEBUG_MMU)
+        printf("A20 update: a20=%d\n", a20_state);
+#endif
+        /* if the cpu is currently executing code, we must unlink it and
+           all the potentially executing TB */
+        cpu_interrupt(env, CPU_INTERRUPT_EXITTB);
+
+        /* when a20 is changed, all the MMU mappings are invalid, so
+           we must flush everything */
+        tlb_flush(env, 1);
+        env->a20_mask = 0xffefffff | (a20_state << 20);
+    }
+}
+
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0)
+{
+    int pe_state;
+
+#if defined(DEBUG_MMU)
+    printf("CR0 update: CR0 %x->0x%08x efer=%x\n", (uint32_t)env->cr[0], new_cr0, env->efer);
+#endif
+    if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) !=
+        (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) {
+        tlb_flush(env, 1);
+    }
+
+#ifdef TARGET_X86_64
+    if (!(env->cr[0] & CR0_PG_MASK) && (new_cr0 & CR0_PG_MASK) &&
+        (env->efer & MSR_EFER_LME)) {
+        /* enter in long mode */
+        /* XXX: generate an exception */
+        if (!(env->cr[4] & CR4_PAE_MASK))
+            return;
+        env->efer |= MSR_EFER_LMA;
+        env->hflags |= HF_LMA_MASK;
+    } else if ((env->cr[0] & CR0_PG_MASK) && !(new_cr0 & CR0_PG_MASK) &&
+               (env->efer & MSR_EFER_LMA)) {
+        /* exit long mode */
+        env->efer &= ~MSR_EFER_LMA;
+        env->hflags &= ~(HF_LMA_MASK | HF_CS64_MASK);
+        env->eip &= 0xffffffff;
+    }
+#endif
+    env->cr[0] = new_cr0 | CR0_ET_MASK;
+    
+    /* update PE flag in hidden flags */
+    pe_state = (env->cr[0] & CR0_PE_MASK);
+    env->hflags = (env->hflags & ~HF_PE_MASK) | (pe_state << HF_PE_SHIFT);
+    /* ensure that ADDSEG is always set in real mode */
+    env->hflags |= ((pe_state ^ 1) << HF_ADDSEG_SHIFT);
+    /* update FPU flags */
+    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
+        ((new_cr0 << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
+#ifdef VBOX
+    remR3ChangeCpuMode(env);
+#endif
+}
+
+/* XXX: in legacy PAE mode, generate a GPF if reserved bits are set in
+   the PDPT */
+void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
+{
+    env->cr[3] = new_cr3;
+    if (env->cr[0] & CR0_PG_MASK) {
+#if defined(DEBUG_MMU)
+        printf("CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3);
+#endif
+        tlb_flush(env, 0);
+    }
+}
+
+void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
+{
+#if defined(DEBUG_MMU)
+    printf("CR4 update: CR4=%08x -> %08x\n", (uint32_t)env->cr[4], (uint32_t)new_cr4);
+#endif
+    if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) !=
+        (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) {
+        tlb_flush(env, 1);
+    }
+    /* SSE handling */
+    if (!(env->cpuid_features & CPUID_SSE))
+        new_cr4 &= ~CR4_OSFXSR_MASK;
+    if (new_cr4 & CR4_OSFXSR_MASK)
+        env->hflags |= HF_OSFXSR_MASK;
+    else
+        env->hflags &= ~HF_OSFXSR_MASK;
+
+    env->cr[4] = new_cr4;
+#ifdef VBOX
+    remR3ChangeCpuMode(env);
+#endif
+}
+
+/* XXX: also flush 4MB pages */
+void cpu_x86_flush_tlb(CPUX86State *env, target_ulong addr)
+{
+#if defined(DEBUG) && defined(VBOX)
+    uint32_t pde;
+
+    /* page directory entry */
+    pde = remR3PhysReadU32(((env->cr[3] & ~0xfff) + ((addr >> 20) & ~3)) & env->a20_mask);
+
+    /* if PSE bit is set, then we use a 4MB page */
+    if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+        printf("cpu_x86_flush_tlb: 4 MB page!!!!!\n");
+    }
+#endif
+    tlb_flush_page(env, addr);
+}
+
+#if defined(CONFIG_USER_ONLY) 
+
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, 
+                             int is_write, int is_user, int is_softmmu)
+{
+    /* user mode only emulation */
+    is_write &= 1;
+    env->cr[2] = addr;
+    env->error_code = (is_write << PG_ERROR_W_BIT);
+    env->error_code |= PG_ERROR_U_MASK;
+    env->exception_index = EXCP0E_PAGE;
+    return 1;
+}
+
+target_ulong cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+    return addr;
+}
+
+#else
+
+#define PHYS_ADDR_MASK 0xfffff000
+
+/* return value:
+   -1 = cannot handle fault 
+   0  = nothing more to do 
+   1  = generate PF fault
+   2  = soft MMU activation required for this block
+*/
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, 
+                             int is_write1, int is_user, int is_softmmu)
+{
+    uint64_t ptep, pte;
+    uint32_t pdpe_addr, pde_addr, pte_addr;
+    int error_code, is_dirty, prot, page_size, ret, is_write;
+    unsigned long paddr, page_offset;
+    target_ulong vaddr, virt_addr;
+    
+#if defined(DEBUG_MMU)
+    printf("MMU fault: addr=" TARGET_FMT_lx " w=%d u=%d eip=" TARGET_FMT_lx "\n", 
+           addr, is_write1, is_user, env->eip);
+#endif
+    is_write = is_write1 & 1;
+    
+    if (!(env->cr[0] & CR0_PG_MASK)) {
+        pte = addr;
+        virt_addr = addr & TARGET_PAGE_MASK;
+        prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        page_size = 4096;
+        goto do_mapping;
+    }
+
+    if (env->cr[4] & CR4_PAE_MASK) {
+        uint64_t pde, pdpe;
+
+        /* XXX: we only use 32 bit physical addresses */
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t pml4e_addr;
+            uint64_t pml4e;
+            int32_t sext;
+
+            /* test virtual address sign extension */
+            sext = (int64_t)addr >> 47;
+            if (sext != 0 && sext != -1) {
+                env->error_code = 0;
+                env->exception_index = EXCP0D_GPF;
+                return 1;
+            }
+            
+            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & 
+                env->a20_mask;
+            pml4e = ldq_phys(pml4e_addr);
+            if (!(pml4e & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            if (!(env->efer & MSR_EFER_NXE) && (pml4e & PG_NX_MASK)) {
+                error_code = PG_ERROR_RSVD_MASK;
+                goto do_fault;
+            }
+            if (!(pml4e & PG_ACCESSED_MASK)) {
+                pml4e |= PG_ACCESSED_MASK;
+                stl_phys_notdirty(pml4e_addr, pml4e);
+            }
+            ptep = pml4e ^ PG_NX_MASK;
+            pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) & 
+                env->a20_mask;
+            pdpe = ldq_phys(pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            if (!(env->efer & MSR_EFER_NXE) && (pdpe & PG_NX_MASK)) {
+                error_code = PG_ERROR_RSVD_MASK;
+                goto do_fault;
+            }
+            ptep &= pdpe ^ PG_NX_MASK;
+            if (!(pdpe & PG_ACCESSED_MASK)) {
+                pdpe |= PG_ACCESSED_MASK;
+                stl_phys_notdirty(pdpe_addr, pdpe);
+            }
+        } else
+#endif
+        {
+            /* XXX: load them when cr3 is loaded ? */
+            pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & 
+                env->a20_mask;
+            pdpe = ldq_phys(pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+        }
+
+        pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
+            env->a20_mask;
+        pde = ldq_phys(pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            error_code = 0;
+            goto do_fault;
+        }
+        if (!(env->efer & MSR_EFER_NXE) && (pde & PG_NX_MASK)) {
+            error_code = PG_ERROR_RSVD_MASK;
+            goto do_fault;
+        }
+        ptep &= pde ^ PG_NX_MASK;
+        if (pde & PG_PSE_MASK) {
+            /* 2 MB page */
+            page_size = 2048 * 1024;
+            ptep ^= PG_NX_MASK;
+            if ((ptep & PG_NX_MASK) && is_write1 == 2)
+                goto do_fault_protect;
+            if (is_user) {
+                if (!(ptep & PG_USER_MASK))
+                    goto do_fault_protect;
+                if (is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            } else {
+                if ((env->cr[0] & CR0_WP_MASK) && 
+                    is_write && !(ptep & PG_RW_MASK)) 
+                    goto do_fault_protect;
+            }
+            is_dirty = is_write && !(pde & PG_DIRTY_MASK);
+            if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
+                pde |= PG_ACCESSED_MASK;
+                if (is_dirty)
+                    pde |= PG_DIRTY_MASK;
+                stl_phys_notdirty(pde_addr, pde);
+            }
+            /* align to page_size */
+            pte = pde & ((PHYS_ADDR_MASK & ~(page_size - 1)) | 0xfff); 
+            virt_addr = addr & ~(page_size - 1);
+        } else {
+            /* 4 KB page */
+            if (!(pde & PG_ACCESSED_MASK)) {
+                pde |= PG_ACCESSED_MASK;
+                stl_phys_notdirty(pde_addr, pde);
+            }
+            pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
+                env->a20_mask;
+            pte = ldq_phys(pte_addr);
+            if (!(pte & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            if (!(env->efer & MSR_EFER_NXE) && (pte & PG_NX_MASK)) {
+                error_code = PG_ERROR_RSVD_MASK;
+                goto do_fault;
+            }
+            /* combine pde and pte nx, user and rw protections */
+            ptep &= pte ^ PG_NX_MASK;
+            ptep ^= PG_NX_MASK;
+            if ((ptep & PG_NX_MASK) && is_write1 == 2)
+                goto do_fault_protect; 
+            if (is_user) {
+                if (!(ptep & PG_USER_MASK))
+                    goto do_fault_protect;
+                if (is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            } else {
+                if ((env->cr[0] & CR0_WP_MASK) &&
+                    is_write && !(ptep & PG_RW_MASK)) 
+                    goto do_fault_protect;
+            }
+            is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+            if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+                pte |= PG_ACCESSED_MASK;
+                if (is_dirty)
+                    pte |= PG_DIRTY_MASK;
+                stl_phys_notdirty(pte_addr, pte);
+            }
+            page_size = 4096;
+            virt_addr = addr & ~0xfff;
+            pte = pte & (PHYS_ADDR_MASK | 0xfff);
+        }
+    } else {
+        uint32_t pde;
+
+        /* page directory entry */
+        pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & 
+            env->a20_mask;
+        pde = ldl_phys(pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            error_code = 0;
+            goto do_fault;
+        }
+        /* if PSE bit is set, then we use a 4MB page */
+        if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+            page_size = 4096 * 1024;
+            if (is_user) {
+                if (!(pde & PG_USER_MASK))
+                    goto do_fault_protect;
+                if (is_write && !(pde & PG_RW_MASK))
+                    goto do_fault_protect;
+            } else {
+                if ((env->cr[0] & CR0_WP_MASK) && 
+                    is_write && !(pde & PG_RW_MASK)) 
+                    goto do_fault_protect;
+            }
+            is_dirty = is_write && !(pde & PG_DIRTY_MASK);
+            if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
+                pde |= PG_ACCESSED_MASK;
+                if (is_dirty)
+                    pde |= PG_DIRTY_MASK;
+                stl_phys_notdirty(pde_addr, pde);
+            }
+        
+            pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+            ptep = pte;
+            virt_addr = addr & ~(page_size - 1);
+        } else {
+            if (!(pde & PG_ACCESSED_MASK)) {
+                pde |= PG_ACCESSED_MASK;
+                stl_phys_notdirty(pde_addr, pde);
+            }
+
+            /* page directory entry */
+            pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & 
+                env->a20_mask;
+            pte = ldl_phys(pte_addr);
+            if (!(pte & PG_PRESENT_MASK)) {
+                error_code = 0;
+                goto do_fault;
+            }
+            /* combine pde and pte user and rw protections */
+            ptep = pte & pde;
+            if (is_user) {
+                if (!(ptep & PG_USER_MASK))
+                    goto do_fault_protect;
+                if (is_write && !(ptep & PG_RW_MASK))
+                    goto do_fault_protect;
+            } else {
+                if ((env->cr[0] & CR0_WP_MASK) &&
+                    is_write && !(ptep & PG_RW_MASK)) 
+                    goto do_fault_protect;
+            }
+            is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+            if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+                pte |= PG_ACCESSED_MASK;
+                if (is_dirty)
+                    pte |= PG_DIRTY_MASK;
+                stl_phys_notdirty(pte_addr, pte);
+            }
+            page_size = 4096;
+            virt_addr = addr & ~0xfff;
+        }
+    }
+    /* the page can be put in the TLB */
+    prot = PAGE_READ;
+    if (!(ptep & PG_NX_MASK))
+        prot |= PAGE_EXEC;
+    if (pte & PG_DIRTY_MASK) {
+        /* only set write access if already dirty... otherwise wait
+           for dirty access */
+        if (is_user) {
+            if (ptep & PG_RW_MASK)
+                prot |= PAGE_WRITE;
+        } else {
+            if (!(env->cr[0] & CR0_WP_MASK) ||
+                (ptep & PG_RW_MASK))
+                prot |= PAGE_WRITE;
+        }
+    }
+ do_mapping:
+    pte = pte & env->a20_mask;
+
+    /* Even if 4MB pages, we map only one 4KB page in the cache to
+       avoid filling it too fast */
+    page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
+    paddr = (pte & TARGET_PAGE_MASK) + page_offset;
+    vaddr = virt_addr + page_offset;
+    
+    ret = tlb_set_page_exec(env, vaddr, paddr, prot, is_user, is_softmmu);
+    return ret;
+ do_fault_protect:
+    error_code = PG_ERROR_P_MASK;
+ do_fault:
+    env->cr[2] = addr;
+    error_code |= (is_write << PG_ERROR_W_BIT);
+    if (is_user)
+        error_code |= PG_ERROR_U_MASK;
+    if (is_write1 == 2 && 
+        (env->efer & MSR_EFER_NXE) && 
+        (env->cr[4] & CR4_PAE_MASK))
+        error_code |= PG_ERROR_I_D_MASK;
+    env->error_code = error_code;
+    env->exception_index = EXCP0E_PAGE;
+    return 1;
+}
+
+target_ulong cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+    uint32_t pde_addr, pte_addr;
+    uint32_t pde, pte, paddr, page_offset, page_size;
+
+    if (env->cr[4] & CR4_PAE_MASK) {
+        uint32_t pdpe_addr, pde_addr, pte_addr;
+        uint32_t pdpe;
+
+        /* XXX: we only use 32 bit physical addresses */
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            uint32_t pml4e_addr, pml4e;
+            int32_t sext;
+
+            /* test virtual address sign extension */
+            sext = (int64_t)addr >> 47;
+            if (sext != 0 && sext != -1)
+                return -1;
+            
+            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & 
+                env->a20_mask;
+            pml4e = ldl_phys(pml4e_addr);
+            if (!(pml4e & PG_PRESENT_MASK))
+                return -1;
+            
+            pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) & 
+                env->a20_mask;
+            pdpe = ldl_phys(pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK))
+                return -1;
+        } else 
+#endif
+        {
+            pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & 
+                env->a20_mask;
+            pdpe = ldl_phys(pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK))
+                return -1;
+        }
+
+        pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
+            env->a20_mask;
+        pde = ldl_phys(pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            return -1;
+        }
+        if (pde & PG_PSE_MASK) {
+            /* 2 MB page */
+            page_size = 2048 * 1024;
+            pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+        } else {
+            /* 4 KB page */
+            pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
+                env->a20_mask;
+            page_size = 4096;
+            pte = ldl_phys(pte_addr);
+        }
+    } else {
+        if (!(env->cr[0] & CR0_PG_MASK)) {
+            pte = addr;
+            page_size = 4096;
+        } else {
+            /* page directory entry */
+            pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
+            pde = ldl_phys(pde_addr);
+            if (!(pde & PG_PRESENT_MASK)) 
+                return -1;
+            if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+                pte = pde & ~0x003ff000; /* align to 4MB */
+                page_size = 4096 * 1024;
+            } else {
+                /* page directory entry */
+                pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
+                pte = ldl_phys(pte_addr);
+                if (!(pte & PG_PRESENT_MASK))
+                    return -1;
+                page_size = 4096;
+            }
+        }
+        pte = pte & env->a20_mask;
+    }
+
+    page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
+    paddr = (pte & TARGET_PAGE_MASK) + page_offset;
+    return paddr;
+}
+#endif /* !CONFIG_USER_ONLY */
+
+#if defined(USE_CODE_COPY)
+struct fpstate {
+    uint16_t fpuc;
+    uint16_t dummy1;
+    uint16_t fpus;
+    uint16_t dummy2;
+    uint16_t fptag;
+    uint16_t dummy3;
+
+    uint32_t fpip;
+    uint32_t fpcs;
+    uint32_t fpoo;
+    uint32_t fpos;
+    uint8_t fpregs1[8 * 10];
+};
+
+void restore_native_fp_state(CPUState *env)
+{
+    int fptag, i, j;
+    struct fpstate fp1, *fp = &fp1;
+    
+    fp->fpuc = env->fpuc;
+    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for (i=7; i>=0; i--) {
+	fptag <<= 2;
+	if (env->fptags[i]) {
+            fptag |= 3;
+        } else {
+            /* the FPU automatically computes it */
+        }
+    }
+    fp->fptag = fptag;
+    j = env->fpstt;
+    for(i = 0;i < 8; i++) {
+        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
+        j = (j + 1) & 7;
+    }
+    asm volatile ("frstor %0" : "=m" (*fp));
+    env->native_fp_regs = 1;
+}
+ 
+void save_native_fp_state(CPUState *env)
+{
+    int fptag, i, j;
+    uint16_t fpuc;
+    struct fpstate fp1, *fp = &fp1;
+
+    asm volatile ("fsave %0" : : "m" (*fp));
+    env->fpuc = fp->fpuc;
+    env->fpstt = (fp->fpus >> 11) & 7;
+    env->fpus = fp->fpus & ~0x3800;
+    fptag = fp->fptag;
+    for(i = 0;i < 8; i++) {
+        env->fptags[i] = ((fptag & 3) == 3);
+        fptag >>= 2;
+    }
+    j = env->fpstt;
+    for(i = 0;i < 8; i++) {
+        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
+        j = (j + 1) & 7;
+    }
+    /* we must restore the default rounding state */
+    /* XXX: we do not restore the exception state */
+    fpuc = 0x037f | (env->fpuc & (3 << 10));
+    asm volatile("fldcw %0" : : "m" (fpuc));
+    env->native_fp_regs = 0;
+}
+#endif
Index: /trunk/src/recompiler_new/target-i386/op.c
===================================================================
--- /trunk/src/recompiler_new/target-i386/op.c	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/op.c	(revision 13168)
@@ -0,0 +1,2595 @@
+/*
+ *  i386 micro operations
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#define ASM_SOFTMMU
+#include "exec.h"
+
+/* n must be a constant to be efficient */
+static inline target_long lshift(target_long x, int n)
+{
+    if (n >= 0)
+        return x << n;
+    else
+        return x >> (-n);
+}
+
+/* we define the various pieces of code used by the JIT */
+
+#define REG EAX
+#define REGNAME _EAX
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG ECX
+#define REGNAME _ECX
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG EDX
+#define REGNAME _EDX
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG EBX
+#define REGNAME _EBX
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG ESP
+#define REGNAME _ESP
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG EBP
+#define REGNAME _EBP
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG ESI
+#define REGNAME _ESI
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG EDI
+#define REGNAME _EDI
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#ifdef TARGET_X86_64
+
+#define REG (env->regs[8])
+#define REGNAME _R8
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG (env->regs[9])
+#define REGNAME _R9
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG (env->regs[10])
+#define REGNAME _R10
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG (env->regs[11])
+#define REGNAME _R11
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG (env->regs[12])
+#define REGNAME _R12
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG (env->regs[13])
+#define REGNAME _R13
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG (env->regs[14])
+#define REGNAME _R14
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#define REG (env->regs[15])
+#define REGNAME _R15
+#include "opreg_template.h"
+#undef REG
+#undef REGNAME
+
+#endif
+
+/* operations with flags */
+
+/* update flags with T0 and T1 (add/sub case) */
+void OPPROTO op_update2_cc(void)
+{
+    CC_SRC = T1;
+    CC_DST = T0;
+}
+
+/* update flags with T0 (logic operation case) */
+void OPPROTO op_update1_cc(void)
+{
+    CC_DST = T0;
+}
+
+void OPPROTO op_update_neg_cc(void)
+{
+    CC_SRC = -T0;
+    CC_DST = T0;
+}
+
+void OPPROTO op_cmpl_T0_T1_cc(void)
+{
+    CC_SRC = T1;
+    CC_DST = T0 - T1;
+}
+
+void OPPROTO op_update_inc_cc(void)
+{
+    CC_SRC = cc_table[CC_OP].compute_c();
+    CC_DST = T0;
+}
+
+void OPPROTO op_testl_T0_T1_cc(void)
+{
+    CC_DST = T0 & T1;
+}
+
+/* operations without flags */
+
+void OPPROTO op_addl_T0_T1(void)
+{
+    T0 += T1;
+}
+
+void OPPROTO op_orl_T0_T1(void)
+{
+    T0 |= T1;
+}
+
+void OPPROTO op_andl_T0_T1(void)
+{
+    T0 &= T1;
+}
+
+void OPPROTO op_subl_T0_T1(void)
+{
+    T0 -= T1;
+}
+
+void OPPROTO op_xorl_T0_T1(void)
+{
+    T0 ^= T1;
+}
+
+void OPPROTO op_negl_T0(void)
+{
+    T0 = -T0;
+}
+
+void OPPROTO op_incl_T0(void)
+{
+    T0++;
+}
+
+void OPPROTO op_decl_T0(void)
+{
+    T0--;
+}
+
+void OPPROTO op_notl_T0(void)
+{
+    T0 = ~T0;
+}
+
+void OPPROTO op_bswapl_T0(void)
+{
+    T0 = bswap32(T0);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_bswapq_T0(void)
+{
+    helper_bswapq_T0();
+}
+#endif
+
+/* multiply/divide */
+
+/* XXX: add eflags optimizations */
+/* XXX: add non P4 style flags */
+
+void OPPROTO op_mulb_AL_T0(void)
+{
+    unsigned int res;
+    res = (uint8_t)EAX * (uint8_t)T0;
+    EAX = (EAX & ~0xffff) | res;
+    CC_DST = res;
+    CC_SRC = (res & 0xff00);
+}
+
+void OPPROTO op_imulb_AL_T0(void)
+{
+    int res;
+    res = (int8_t)EAX * (int8_t)T0;
+    EAX = (EAX & ~0xffff) | (res & 0xffff);
+    CC_DST = res;
+    CC_SRC = (res != (int8_t)res);
+}
+
+void OPPROTO op_mulw_AX_T0(void)
+{
+    unsigned int res;
+    res = (uint16_t)EAX * (uint16_t)T0;
+    EAX = (EAX & ~0xffff) | (res & 0xffff);
+    EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);
+    CC_DST = res;
+    CC_SRC = res >> 16;
+}
+
+void OPPROTO op_imulw_AX_T0(void)
+{
+    int res;
+    res = (int16_t)EAX * (int16_t)T0;
+    EAX = (EAX & ~0xffff) | (res & 0xffff);
+    EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);
+    CC_DST = res;
+    CC_SRC = (res != (int16_t)res);
+}
+
+void OPPROTO op_mull_EAX_T0(void)
+{
+    uint64_t res;
+    res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0);
+    EAX = (uint32_t)res;
+    EDX = (uint32_t)(res >> 32);
+    CC_DST = (uint32_t)res;
+    CC_SRC = (uint32_t)(res >> 32);
+}
+
+void OPPROTO op_imull_EAX_T0(void)
+{
+    int64_t res;
+    res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0);
+    EAX = (uint32_t)(res);
+    EDX = (uint32_t)(res >> 32);
+    CC_DST = res;
+    CC_SRC = (res != (int32_t)res);
+}
+
+void OPPROTO op_imulw_T0_T1(void)
+{
+    int res;
+    res = (int16_t)T0 * (int16_t)T1;
+    T0 = res;
+    CC_DST = res;
+    CC_SRC = (res != (int16_t)res);
+}
+
+void OPPROTO op_imull_T0_T1(void)
+{
+    int64_t res;
+    res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1);
+    T0 = res;
+    CC_DST = res;
+    CC_SRC = (res != (int32_t)res);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_mulq_EAX_T0(void)
+{
+    helper_mulq_EAX_T0();
+}
+
+void OPPROTO op_imulq_EAX_T0(void)
+{
+    helper_imulq_EAX_T0();
+}
+
+void OPPROTO op_imulq_T0_T1(void)
+{
+    helper_imulq_T0_T1();
+}
+#endif
+
+/* division, flags are undefined */
+
+void OPPROTO op_divb_AL_T0(void)
+{
+    unsigned int num, den, q, r;
+
+    num = (EAX & 0xffff);
+    den = (T0 & 0xff);
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q > 0xff)
+        raise_exception(EXCP00_DIVZ);
+    q &= 0xff;
+    r = (num % den) & 0xff;
+    EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void OPPROTO op_idivb_AL_T0(void)
+{
+    int num, den, q, r;
+
+    num = (int16_t)EAX;
+    den = (int8_t)T0;
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q != (int8_t)q)
+        raise_exception(EXCP00_DIVZ);
+    q &= 0xff;
+    r = (num % den) & 0xff;
+    EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void OPPROTO op_divw_AX_T0(void)
+{
+    unsigned int num, den, q, r;
+
+    num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+    den = (T0 & 0xffff);
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q > 0xffff)
+        raise_exception(EXCP00_DIVZ);
+    q &= 0xffff;
+    r = (num % den) & 0xffff;
+    EAX = (EAX & ~0xffff) | q;
+    EDX = (EDX & ~0xffff) | r;
+}
+
+void OPPROTO op_idivw_AX_T0(void)
+{
+    int num, den, q, r;
+
+    num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+    den = (int16_t)T0;
+    if (den == 0) {
+        raise_exception(EXCP00_DIVZ);
+    }
+    q = (num / den);
+    if (q != (int16_t)q)
+        raise_exception(EXCP00_DIVZ);
+    q &= 0xffff;
+    r = (num % den) & 0xffff;
+    EAX = (EAX & ~0xffff) | q;
+    EDX = (EDX & ~0xffff) | r;
+}
+
+void OPPROTO op_divl_EAX_T0(void)
+{
+    helper_divl_EAX_T0();
+}
+
+void OPPROTO op_idivl_EAX_T0(void)
+{
+    helper_idivl_EAX_T0();
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_divq_EAX_T0(void)
+{
+    helper_divq_EAX_T0();
+}
+
+void OPPROTO op_idivq_EAX_T0(void)
+{
+    helper_idivq_EAX_T0();
+}
+#endif
+
+/* constant load & misc op */
+
+/* XXX: consistent names */
+void OPPROTO op_movl_T0_imu(void)
+{
+    T0 = (uint32_t)PARAM1;
+}
+
+void OPPROTO op_movl_T0_im(void)
+{
+    T0 = (int32_t)PARAM1;
+}
+
+void OPPROTO op_addl_T0_im(void)
+{
+    T0 += PARAM1;
+}
+
+void OPPROTO op_andl_T0_ffff(void)
+{
+    T0 = T0 & 0xffff;
+}
+
+void OPPROTO op_andl_T0_im(void)
+{
+    T0 = T0 & PARAM1;
+}
+
+void OPPROTO op_movl_T0_T1(void)
+{
+    T0 = T1;
+}
+
+void OPPROTO op_movl_T1_imu(void)
+{
+    T1 = (uint32_t)PARAM1;
+}
+
+void OPPROTO op_movl_T1_im(void)
+{
+    T1 = (int32_t)PARAM1;
+}
+
+void OPPROTO op_addl_T1_im(void)
+{
+    T1 += PARAM1;
+}
+
+void OPPROTO op_movl_T1_A0(void)
+{
+    T1 = A0;
+}
+
+void OPPROTO op_movl_A0_im(void)
+{
+    A0 = (uint32_t)PARAM1;
+}
+
+void OPPROTO op_addl_A0_im(void)
+{
+    A0 = (uint32_t)(A0 + PARAM1);
+}
+
+void OPPROTO op_movl_A0_seg(void)
+{
+#ifdef VBOX
+    /** @todo Not very efficient, but the least invasive. */
+    /** @todo Pass segment register index as parameter in translate.c. */
+    uint32_t idx = (PARAM1 - offsetof(CPUX86State,segs[0].base)) / sizeof(SegmentCache);
+
+    if (env->segs[idx].newselector && !(env->eflags & VM_MASK)) {
+        sync_seg(env, idx, env->segs[idx].newselector);
+    }
+#if 0 /* breaks Solaris */
+    /* Loading a null selector into a segment register is valid, but using it is most definitely not! */
+    if (    (env->cr[0] & (CR0_PE_MASK|CR0_PG_MASK)) == (CR0_PE_MASK|CR0_PG_MASK)
+        &&  !(env->eflags & VM_MASK)
+        &&  env->segs[idx].selector == 0) {
+        raise_exception(EXCP0D_GPF);
+    }
+#endif
+    A0 = (uint32_t)env->segs[idx].base;
+    FORCE_RET();
+#else  /* !VBOX */
+    A0 = (uint32_t)*(target_ulong *)((char *)env + PARAM1);
+#endif /* !VBOX */
+}
+
+void OPPROTO op_addl_A0_seg(void)
+{
+#ifdef VBOX
+    uint32_t idx = (PARAM1 - offsetof(CPUX86State,segs[0].base)) / sizeof(SegmentCache);
+
+    if (env->segs[idx].newselector && !(env->eflags & VM_MASK)) {
+        sync_seg(env, idx, env->segs[idx].newselector);
+    }
+#if 0 /* breaks Solaris */
+    /* Loading a null selector into a segment register is valid, but using it is most definitely not! */
+    if (    (env->cr[0] & (CR0_PE_MASK|CR0_PG_MASK)) == (CR0_PE_MASK|CR0_PG_MASK)
+        &&  !(env->eflags & VM_MASK)
+        &&  env->segs[idx].selector == 0) {
+        raise_exception(EXCP0D_GPF);
+    }
+#endif
+    A0 = (uint32_t)(A0 + env->segs[idx].base);
+    FORCE_RET();
+#else  /* !VBOX */
+    A0 = (uint32_t)(A0 + *(target_ulong *)((char *)env + PARAM1));
+#endif /* !VBOX */
+}
+
+void OPPROTO op_addl_A0_AL(void)
+{
+    A0 = (uint32_t)(A0 + (EAX & 0xff));
+}
+
+#ifdef WORDS_BIGENDIAN
+typedef union UREG64 {
+    struct { uint16_t v3, v2, v1, v0; } w;
+    struct { uint32_t v1, v0; } l;
+    uint64_t q;
+} UREG64;
+#else
+typedef union UREG64 {
+    struct { uint16_t v0, v1, v2, v3; } w;
+    struct { uint32_t v0, v1; } l;
+    uint64_t q;
+} UREG64;
+#endif
+
+#ifdef TARGET_X86_64
+
+#define PARAMQ1 \
+({\
+    UREG64 __p;\
+    __p.l.v1 = PARAM1;\
+    __p.l.v0 = PARAM2;\
+    __p.q;\
+}) 
+
+void OPPROTO op_movq_T0_im64(void)
+{
+    T0 = PARAMQ1;
+}
+
+void OPPROTO op_movq_T1_im64(void)
+{
+    T1 = PARAMQ1;
+}
+
+void OPPROTO op_movq_A0_im(void)
+{
+    A0 = (int32_t)PARAM1;
+}
+
+void OPPROTO op_movq_A0_im64(void)
+{
+    A0 = PARAMQ1;
+}
+
+void OPPROTO op_addq_A0_im(void)
+{
+    A0 = (A0 + (int32_t)PARAM1);
+}
+
+void OPPROTO op_addq_A0_im64(void)
+{
+    A0 = (A0 + PARAMQ1);
+}
+
+void OPPROTO op_movq_A0_seg(void)
+{
+#ifdef VBOX
+    uint32_t idx = (PARAM1 - offsetof(CPUX86State,segs[0].base)) / sizeof(SegmentCache);
+
+    if (env->segs[idx].newselector && !(env->eflags & VM_MASK))
+        sync_seg(env, idx, env->segs[idx].newselector);
+    A0 = (target_ulong)env->segs[idx].base;
+#else  /* !VBOX */
+    A0 = *(target_ulong *)((char *)env + PARAM1);
+#endif /* !VBOX */
+}
+
+void OPPROTO op_addq_A0_seg(void)
+{
+#ifdef VBOX
+    uint32_t idx = (PARAM1 - offsetof(CPUX86State,segs[0].base)) / sizeof(SegmentCache);
+
+    if (env->segs[idx].newselector && !(env->eflags & VM_MASK))
+        sync_seg(env, idx, env->segs[idx].newselector);
+    A0 += (target_ulong)env->segs[idx].base;
+#else  /* !VBOX */
+    A0 += *(target_ulong *)((char *)env + PARAM1);
+#endif /* !VBOX */
+}
+
+void OPPROTO op_addq_A0_AL(void)
+{
+    A0 = (A0 + (EAX & 0xff));
+}
+
+#endif
+
+void OPPROTO op_andl_A0_ffff(void)
+{
+    A0 = A0 & 0xffff;
+}
+
+/* memory access */
+
+#define MEMSUFFIX _raw
+#include "ops_mem.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#define MEMSUFFIX _kernel
+#include "ops_mem.h"
+
+#define MEMSUFFIX _user
+#include "ops_mem.h"
+#endif
+
+/* indirect jump */
+
+void OPPROTO op_jmp_T0(void)
+{
+    EIP = T0;
+}
+
+void OPPROTO op_movl_eip_im(void)
+{
+    EIP = (uint32_t)PARAM1;
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_movq_eip_im(void)
+{
+    EIP = (int32_t)PARAM1;
+}
+
+void OPPROTO op_movq_eip_im64(void)
+{
+    EIP = PARAMQ1;
+}
+#endif
+
+void OPPROTO op_hlt(void)
+{
+    helper_hlt();
+}
+
+void OPPROTO op_monitor(void)
+{
+    helper_monitor();
+}
+
+void OPPROTO op_mwait(void)
+{
+    helper_mwait();
+}
+
+void OPPROTO op_debug(void)
+{
+    env->exception_index = EXCP_DEBUG;
+    cpu_loop_exit();
+}
+
+void OPPROTO op_raise_interrupt(void)
+{
+    int intno, next_eip_addend;
+    intno = PARAM1;
+    next_eip_addend = PARAM2;
+    raise_interrupt(intno, 1, 0, next_eip_addend);
+}
+
+void OPPROTO op_raise_exception(void)
+{
+    int exception_index;
+    exception_index = PARAM1;
+    raise_exception(exception_index);
+}
+
+void OPPROTO op_into(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    if (eflags & CC_O) {
+        raise_interrupt(EXCP04_INTO, 1, 0, PARAM1);
+    }
+    FORCE_RET();
+}
+
+void OPPROTO op_cli(void)
+{
+    env->eflags &= ~IF_MASK;
+}
+
+void OPPROTO op_sti(void)
+{
+    env->eflags |= IF_MASK;
+}
+
+void OPPROTO op_set_inhibit_irq(void)
+{
+    env->hflags |= HF_INHIBIT_IRQ_MASK;
+}
+
+void OPPROTO op_reset_inhibit_irq(void)
+{
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+}
+
+void OPPROTO op_rsm(void)
+{
+    helper_rsm();
+}
+
+#ifndef VBOX 
+#if 0
+/* vm86plus instructions */
+void OPPROTO op_cli_vm(void)
+{
+    env->eflags &= ~VIF_MASK;
+}
+
+void OPPROTO op_sti_vm(void)
+{
+    env->eflags |= VIF_MASK;
+    if (env->eflags & VIP_MASK) {
+        EIP = PARAM1;
+        raise_exception(EXCP0D_GPF);
+    }
+    FORCE_RET();
+}
+#endif
+
+#else /* VBOX */
+void OPPROTO op_cli_vme(void)
+{
+    env->eflags &= ~VIF_MASK;
+}
+
+void OPPROTO op_sti_vme(void)
+{
+    /* First check, then change eflags according to the AMD manual */
+    if (env->eflags & VIP_MASK) {
+        raise_exception(EXCP0D_GPF);
+    }
+    env->eflags |= VIF_MASK;
+    FORCE_RET();
+}
+#endif /* VBOX */
+
+void OPPROTO op_boundw(void)
+{
+    int low, high, v;
+    low = ldsw(A0);
+    high = ldsw(A0 + 2);
+    v = (int16_t)T0;
+    if (v < low || v > high) {
+        raise_exception(EXCP05_BOUND);
+    }
+    FORCE_RET();
+}
+
+void OPPROTO op_boundl(void)
+{
+    int low, high, v;
+    low = ldl(A0);
+    high = ldl(A0 + 4);
+    v = T0;
+    if (v < low || v > high) {
+        raise_exception(EXCP05_BOUND);
+    }
+    FORCE_RET();
+}
+
+void OPPROTO op_cmpxchg8b(void)
+{
+    helper_cmpxchg8b();
+}
+
+void OPPROTO op_single_step(void)
+{
+    helper_single_step();
+}
+
+void OPPROTO op_movl_T0_0(void)
+{
+    T0 = 0;
+}
+
+#ifdef VBOX
+void OPPROTO op_check_external_event(void)
+{
+    if (    (env->interrupt_request & (  CPU_INTERRUPT_EXTERNAL_EXIT
+                                       | CPU_INTERRUPT_EXTERNAL_TIMER
+                                       | CPU_INTERRUPT_EXTERNAL_DMA))
+        ||  (   (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD)
+             && (env->eflags & IF_MASK)
+             && !(env->hflags & HF_INHIBIT_IRQ_MASK) ) )
+    {
+        helper_external_event();
+    }
+}
+
+void OPPROTO op_record_call(void)
+{
+    helper_record_call();
+}
+
+#endif /* VBOX */
+
+void OPPROTO op_exit_tb(void)
+{
+    EXIT_TB();
+}
+
+/* multiple size ops */
+
+#define ldul ldl
+
+#define SHIFT 0
+#include "ops_template.h"
+#undef SHIFT
+
+#define SHIFT 1
+#include "ops_template.h"
+#undef SHIFT
+
+#define SHIFT 2
+#include "ops_template.h"
+#undef SHIFT
+
+#ifdef TARGET_X86_64
+
+#define SHIFT 3
+#include "ops_template.h"
+#undef SHIFT
+
+#endif
+
+/* sign extend */
+
+void OPPROTO op_movsbl_T0_T0(void)
+{
+    T0 = (int8_t)T0;
+}
+
+void OPPROTO op_movzbl_T0_T0(void)
+{
+    T0 = (uint8_t)T0;
+}
+
+void OPPROTO op_movswl_T0_T0(void)
+{
+    T0 = (int16_t)T0;
+}
+
+void OPPROTO op_movzwl_T0_T0(void)
+{
+    T0 = (uint16_t)T0;
+}
+
+void OPPROTO op_movswl_EAX_AX(void)
+{
+    EAX = (uint32_t)((int16_t)EAX);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_movslq_T0_T0(void)
+{
+    T0 = (int32_t)T0;
+}
+
+void OPPROTO op_movslq_RAX_EAX(void)
+{
+    EAX = (int32_t)EAX;
+}
+#endif
+
+void OPPROTO op_movsbw_AX_AL(void)
+{
+    EAX = (EAX & ~0xffff) | ((int8_t)EAX & 0xffff);
+}
+
+void OPPROTO op_movslq_EDX_EAX(void)
+{
+    EDX = (uint32_t)((int32_t)EAX >> 31);
+}
+
+void OPPROTO op_movswl_DX_AX(void)
+{
+    EDX = (EDX & ~0xffff) | (((int16_t)EAX >> 15) & 0xffff);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_movsqo_RDX_RAX(void)
+{
+    EDX = (int64_t)EAX >> 63;
+}
+#endif
+
+/* string ops helpers */
+
+void OPPROTO op_addl_ESI_T0(void)
+{
+    ESI = (uint32_t)(ESI + T0);
+}
+
+void OPPROTO op_addw_ESI_T0(void)
+{
+    ESI = (ESI & ~0xffff) | ((ESI + T0) & 0xffff);
+}
+
+void OPPROTO op_addl_EDI_T0(void)
+{
+    EDI = (uint32_t)(EDI + T0);
+}
+
+void OPPROTO op_addw_EDI_T0(void)
+{
+    EDI = (EDI & ~0xffff) | ((EDI + T0) & 0xffff);
+}
+
+void OPPROTO op_decl_ECX(void)
+{
+    ECX = (uint32_t)(ECX - 1);
+}
+
+void OPPROTO op_decw_ECX(void)
+{
+    ECX = (ECX & ~0xffff) | ((ECX - 1) & 0xffff);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_addq_ESI_T0(void)
+{
+    ESI = (ESI + T0);
+}
+
+void OPPROTO op_addq_EDI_T0(void)
+{
+    EDI = (EDI + T0);
+}
+
+void OPPROTO op_decq_ECX(void)
+{
+    ECX--;
+}
+#endif
+
+/* push/pop utils */
+
+void op_addl_A0_SS(void)
+{
+    A0 = (uint32_t)(A0 + env->segs[R_SS].base);
+}
+
+void op_subl_A0_2(void)
+{
+    A0 = (uint32_t)(A0 - 2);
+}
+
+void op_subl_A0_4(void)
+{
+    A0 = (uint32_t)(A0 - 4);
+}
+
+void op_addl_ESP_4(void)
+{
+    ESP = (uint32_t)(ESP + 4);
+}
+
+void op_addl_ESP_2(void)
+{
+    ESP = (uint32_t)(ESP + 2);
+}
+
+void op_addw_ESP_4(void)
+{
+    ESP = (ESP & ~0xffff) | ((ESP + 4) & 0xffff);
+}
+
+void op_addw_ESP_2(void)
+{
+    ESP = (ESP & ~0xffff) | ((ESP + 2) & 0xffff);
+}
+
+void op_addl_ESP_im(void)
+{
+    ESP = (uint32_t)(ESP + PARAM1);
+}
+
+void op_addw_ESP_im(void)
+{
+    ESP = (ESP & ~0xffff) | ((ESP + PARAM1) & 0xffff);
+}
+
+#ifdef TARGET_X86_64
+void op_subq_A0_2(void)
+{
+    A0 -= 2;
+}
+
+void op_subq_A0_8(void)
+{
+    A0 -= 8;
+}
+
+void op_addq_ESP_8(void)
+{
+    ESP += 8;
+}
+
+void op_addq_ESP_im(void)
+{
+    ESP += PARAM1;
+}
+#endif
+
+void OPPROTO op_rdtsc(void)
+{
+    helper_rdtsc();
+}
+
+void OPPROTO op_cpuid(void)
+{
+    helper_cpuid();
+}
+
+void OPPROTO op_enter_level(void)
+{
+    helper_enter_level(PARAM1, PARAM2);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_enter64_level(void)
+{
+    helper_enter64_level(PARAM1, PARAM2);
+}
+#endif
+
+void OPPROTO op_sysenter(void)
+{
+    helper_sysenter();
+}
+
+void OPPROTO op_sysexit(void)
+{
+    helper_sysexit();
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_syscall(void)
+{
+    helper_syscall(PARAM1);
+}
+
+void OPPROTO op_sysret(void)
+{
+    helper_sysret(PARAM1);
+}
+#endif
+
+void OPPROTO op_rdmsr(void)
+{
+    helper_rdmsr();
+}
+
+void OPPROTO op_wrmsr(void)
+{
+    helper_wrmsr();
+}
+
+/* bcd */
+
+/* XXX: exception */
+void OPPROTO op_aam(void)
+{
+    int base = PARAM1;
+    int al, ah;
+    al = EAX & 0xff;
+    ah = al / base;
+    al = al % base;
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_DST = al;
+}
+
+void OPPROTO op_aad(void)
+{
+    int base = PARAM1;
+    int al, ah;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+    al = ((ah * base) + al) & 0xff;
+    EAX = (EAX & ~0xffff) | al;
+    CC_DST = al;
+}
+
+void OPPROTO op_aaa(void)
+{
+    int icarry;
+    int al, ah, af;
+    int eflags;
+
+    eflags = cc_table[CC_OP].compute_all();
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+
+    icarry = (al > 0xf9);
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al + 6) & 0x0f;
+        ah = (ah + 1 + icarry) & 0xff;
+        eflags |= CC_C | CC_A;
+    } else {
+        eflags &= ~(CC_C | CC_A);
+        al &= 0x0f;
+    }
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_SRC = eflags;
+    FORCE_RET();
+}
+
+void OPPROTO op_aas(void)
+{
+    int icarry;
+    int al, ah, af;
+    int eflags;
+
+    eflags = cc_table[CC_OP].compute_all();
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+    ah = (EAX >> 8) & 0xff;
+
+    icarry = (al < 6);
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al - 6) & 0x0f;
+        ah = (ah - 1 - icarry) & 0xff;
+        eflags |= CC_C | CC_A;
+    } else {
+        eflags &= ~(CC_C | CC_A);
+        al &= 0x0f;
+    }
+    EAX = (EAX & ~0xffff) | al | (ah << 8);
+    CC_SRC = eflags;
+    FORCE_RET();
+}
+
+void OPPROTO op_daa(void)
+{
+    int al, af, cf;
+    int eflags;
+
+    eflags = cc_table[CC_OP].compute_all();
+    cf = eflags & CC_C;
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+
+    eflags = 0;
+    if (((al & 0x0f) > 9 ) || af) {
+        al = (al + 6) & 0xff;
+        eflags |= CC_A;
+    }
+    if ((al > 0x9f) || cf) {
+        al = (al + 0x60) & 0xff;
+        eflags |= CC_C;
+    }
+    EAX = (EAX & ~0xff) | al;
+    /* well, speed is not an issue here, so we compute the flags by hand */
+    eflags |= (al == 0) << 6; /* zf */
+    eflags |= parity_table[al]; /* pf */
+    eflags |= (al & 0x80); /* sf */
+    CC_SRC = eflags;
+    FORCE_RET();
+}
+
+void OPPROTO op_das(void)
+{
+    int al, al1, af, cf;
+    int eflags;
+
+    eflags = cc_table[CC_OP].compute_all();
+    cf = eflags & CC_C;
+    af = eflags & CC_A;
+    al = EAX & 0xff;
+
+    eflags = 0;
+    al1 = al;
+    if (((al & 0x0f) > 9 ) || af) {
+        eflags |= CC_A;
+        if (al < 6 || cf)
+            eflags |= CC_C;
+        al = (al - 6) & 0xff;
+    }
+    if ((al1 > 0x99) || cf) {
+        al = (al - 0x60) & 0xff;
+        eflags |= CC_C;
+    }
+    EAX = (EAX & ~0xff) | al;
+    /* well, speed is not an issue here, so we compute the flags by hand */
+    eflags |= (al == 0) << 6; /* zf */
+    eflags |= parity_table[al]; /* pf */
+    eflags |= (al & 0x80); /* sf */
+    CC_SRC = eflags;
+    FORCE_RET();
+}
+
+/* segment handling */
+
+/* never use it with R_CS */
+void OPPROTO op_movl_seg_T0(void)
+{
+    load_seg(PARAM1, T0);
+}
+
+/* faster VM86 version */
+void OPPROTO op_movl_seg_T0_vm(void)
+{
+    int selector;
+    SegmentCache *sc;
+    
+    selector = T0 & 0xffff;
+    /* env->segs[] access */
+    sc = (SegmentCache *)((char *)env + PARAM1);
+    sc->selector = selector;
+    sc->base = (selector << 4);
+#ifdef VBOX
+    sc->flags = 0; /* clear attributes */
+#endif
+}
+
+void OPPROTO op_movl_T0_seg(void)
+{
+    T0 = env->segs[PARAM1].selector;
+}
+
+void OPPROTO op_lsl(void)
+{
+    helper_lsl();
+}
+
+void OPPROTO op_lar(void)
+{
+    helper_lar();
+}
+
+void OPPROTO op_verr(void)
+{
+    helper_verr();
+}
+
+void OPPROTO op_verw(void)
+{
+    helper_verw();
+}
+
+void OPPROTO op_arpl(void)
+{
+    if ((T0 & 3) < (T1 & 3)) {
+        /* XXX: emulate bug or 0xff3f0000 oring as in bochs ? */
+        T0 = (T0 & ~3) | (T1 & 3);
+        T1 = CC_Z;
+   } else {
+        T1 = 0;
+    }
+    FORCE_RET();
+}
+            
+void OPPROTO op_arpl_update(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    CC_SRC = (eflags & ~CC_Z) | T1;
+}
+    
+/* T0: segment, T1:eip */
+void OPPROTO op_ljmp_protected_T0_T1(void)
+{
+    helper_ljmp_protected_T0_T1(PARAM1);
+}
+
+void OPPROTO op_lcall_real_T0_T1(void)
+{
+    helper_lcall_real_T0_T1(PARAM1, PARAM2);
+}
+
+void OPPROTO op_lcall_protected_T0_T1(void)
+{
+    helper_lcall_protected_T0_T1(PARAM1, PARAM2);
+}
+
+void OPPROTO op_iret_real(void)
+{
+    helper_iret_real(PARAM1);
+}
+
+void OPPROTO op_iret_protected(void)
+{
+    helper_iret_protected(PARAM1, PARAM2);
+}
+
+void OPPROTO op_lret_protected(void)
+{
+    helper_lret_protected(PARAM1, PARAM2);
+}
+
+void OPPROTO op_lldt_T0(void)
+{
+    helper_lldt_T0();
+}
+
+void OPPROTO op_ltr_T0(void)
+{
+    helper_ltr_T0();
+}
+
+/* CR registers access */
+void OPPROTO op_movl_crN_T0(void)
+{
+    helper_movl_crN_T0(PARAM1);
+}
+
+#if !defined(CONFIG_USER_ONLY) 
+void OPPROTO op_movtl_T0_cr8(void)
+{
+    T0 = cpu_get_apic_tpr(env);
+}
+#endif
+
+/* DR registers access */
+void OPPROTO op_movl_drN_T0(void)
+{
+    helper_movl_drN_T0(PARAM1);
+}
+
+void OPPROTO op_lmsw_T0(void)
+{
+    /* only 4 lower bits of CR0 are modified. PE cannot be set to zero
+       if already set to one. */
+    T0 = (env->cr[0] & ~0xe) | (T0 & 0xf);
+    helper_movl_crN_T0(0);
+}
+
+void OPPROTO op_invlpg_A0(void)
+{
+    helper_invlpg(A0);
+}
+
+void OPPROTO op_movl_T0_env(void)
+{
+    T0 = *(uint32_t *)((char *)env + PARAM1);
+}
+
+void OPPROTO op_movl_env_T0(void)
+{
+    *(uint32_t *)((char *)env + PARAM1) = T0;
+}
+
+void OPPROTO op_movl_env_T1(void)
+{
+    *(uint32_t *)((char *)env + PARAM1) = T1;
+}
+
+void OPPROTO op_movtl_T0_env(void)
+{
+    T0 = *(target_ulong *)((char *)env + PARAM1);
+}
+
+void OPPROTO op_movtl_env_T0(void)
+{
+    *(target_ulong *)((char *)env + PARAM1) = T0;
+}
+
+void OPPROTO op_movtl_T1_env(void)
+{
+    T1 = *(target_ulong *)((char *)env + PARAM1);
+}
+
+void OPPROTO op_movtl_env_T1(void)
+{
+    *(target_ulong *)((char *)env + PARAM1) = T1;
+}
+
+void OPPROTO op_clts(void)
+{
+    env->cr[0] &= ~CR0_TS_MASK;
+    env->hflags &= ~HF_TS_MASK;
+}
+
+/* flags handling */
+
+void OPPROTO op_goto_tb0(void)
+{
+    GOTO_TB(op_goto_tb0, PARAM1, 0);
+}
+
+void OPPROTO op_goto_tb1(void)
+{
+    GOTO_TB(op_goto_tb1, PARAM1, 1);
+}
+
+void OPPROTO op_jmp_label(void)
+{
+    GOTO_LABEL_PARAM(1);
+}
+
+void OPPROTO op_jnz_T0_label(void)
+{
+    if (T0)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO op_jz_T0_label(void)
+{
+    if (!T0)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+/* slow set cases (compute x86 flags) */
+void OPPROTO op_seto_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags >> 11) & 1;
+}
+
+void OPPROTO op_setb_T0_cc(void)
+{
+    T0 = cc_table[CC_OP].compute_c();
+}
+
+void OPPROTO op_setz_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags >> 6) & 1;
+}
+
+void OPPROTO op_setbe_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags & (CC_Z | CC_C)) != 0;
+}
+
+void OPPROTO op_sets_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags >> 7) & 1;
+}
+
+void OPPROTO op_setp_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags >> 2) & 1;
+}
+
+void OPPROTO op_setl_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = ((eflags ^ (eflags >> 4)) >> 7) & 1;
+}
+
+void OPPROTO op_setle_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z)) != 0;
+}
+
+void OPPROTO op_xor_T0_1(void)
+{
+    T0 ^= 1;
+}
+
+void OPPROTO op_set_cc_op(void)
+{
+    CC_OP = PARAM1;
+}
+
+void OPPROTO op_mov_T0_cc(void)
+{
+    T0 = cc_table[CC_OP].compute_all();
+}
+
+/* XXX: clear VIF/VIP in all ops ? */
+#ifdef VBOX
+/* XXX: AMD docs say they remain unchanged. */
+#endif
+
+void OPPROTO op_movl_eflags_T0(void)
+{
+    load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK));
+}
+
+void OPPROTO op_movw_eflags_T0(void)
+{
+    load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff);
+}
+
+void OPPROTO op_movl_eflags_T0_io(void)
+{
+    load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK));
+}
+
+void OPPROTO op_movw_eflags_T0_io(void)
+{
+    load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff);
+}
+
+void OPPROTO op_movl_eflags_T0_cpl0(void)
+{
+    load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK));
+}
+
+void OPPROTO op_movw_eflags_T0_cpl0(void)
+{
+    load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff);
+}
+
+#ifndef VBOX
+#if 0
+/* vm86plus version */
+void OPPROTO op_movw_eflags_T0_vm(void)
+{
+    int eflags;
+    eflags = T0;
+    CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((eflags >> 10) & 1));
+    /* we also update some system flags as in user mode */
+    env->eflags = (env->eflags & ~(FL_UPDATE_MASK16 | VIF_MASK)) |
+        (eflags & FL_UPDATE_MASK16);
+    if (eflags & IF_MASK) {
+        env->eflags |= VIF_MASK;
+        if (env->eflags & VIP_MASK) {
+            EIP = PARAM1;
+            raise_exception(EXCP0D_GPF);
+        }
+    }
+    FORCE_RET();
+}
+
+void OPPROTO op_movl_eflags_T0_vm(void)
+{
+    int eflags;
+    eflags = T0;
+    CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((eflags >> 10) & 1));
+    /* we also update some system flags as in user mode */
+    env->eflags = (env->eflags & ~(FL_UPDATE_MASK32 | VIF_MASK)) |
+        (eflags & FL_UPDATE_MASK32);
+    if (eflags & IF_MASK) {
+        env->eflags |= VIF_MASK;
+        if (env->eflags & VIP_MASK) {
+            EIP = PARAM1;
+            raise_exception(EXCP0D_GPF);
+        }
+    }
+    FORCE_RET();
+}
+#endif
+
+#else /* VBOX */
+/* IOPL != 3, CR4.VME=1 */
+void OPPROTO op_movw_eflags_T0_vme(void)
+{
+    unsigned int new_eflags = T0;
+
+    /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */
+    /* if TF will be set -> #GP */
+    if (    ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK)) 
+        ||  (new_eflags & TF_MASK)) {
+        raise_exception(EXCP0D_GPF);
+    } else {
+        load_eflags(new_eflags, (TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff);
+
+        if (new_eflags & IF_MASK) {
+            env->eflags |= VIF_MASK;
+        } else {
+            env->eflags &= ~VIF_MASK;
+        }
+    }
+
+    FORCE_RET();
+}
+#endif /* VBOX */
+
+/* XXX: compute only O flag */
+void OPPROTO op_movb_eflags_T0(void)
+{
+    int of;
+    of = cc_table[CC_OP].compute_all() & CC_O;
+    CC_SRC = (T0 & (CC_S | CC_Z | CC_A | CC_P | CC_C)) | of;
+}
+
+void OPPROTO op_movl_T0_eflags(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    eflags |= (DF & DF_MASK);
+    eflags |= env->eflags & ~(VM_MASK | RF_MASK);
+    T0 = eflags;
+}
+
+/* vm86plus version */
+#ifdef VBOX /* #if 0 */
+void OPPROTO op_movl_T0_eflags_vme(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    eflags |= (DF & DF_MASK);
+    eflags |= env->eflags & ~(VM_MASK | RF_MASK | IF_MASK);
+    if (env->eflags & VIF_MASK)
+        eflags |= IF_MASK;
+    T0 = eflags;
+}
+#endif /* VBOX / 0 */
+
+void OPPROTO op_cld(void)
+{
+    DF = 1;
+}
+
+void OPPROTO op_std(void)
+{
+    DF = -1;
+}
+
+void OPPROTO op_clc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    eflags &= ~CC_C;
+    CC_SRC = eflags;
+}
+
+void OPPROTO op_stc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    eflags |= CC_C;
+    CC_SRC = eflags;
+}
+
+void OPPROTO op_cmc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    eflags ^= CC_C;
+    CC_SRC = eflags;
+}
+
+void OPPROTO op_salc(void)
+{
+    int cf;
+    cf = cc_table[CC_OP].compute_c();
+    EAX = (EAX & ~0xff) | ((-cf) & 0xff);
+}
+
+static int compute_all_eflags(void)
+{
+    return CC_SRC;
+}
+
+static int compute_c_eflags(void)
+{
+    return CC_SRC & CC_C;
+}
+
+CCTable cc_table[CC_OP_NB] = {
+    [CC_OP_DYNAMIC] = { /* should never happen */ },
+
+    [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags },
+
+    [CC_OP_MULB] = { compute_all_mulb, compute_c_mull },
+    [CC_OP_MULW] = { compute_all_mulw, compute_c_mull },
+    [CC_OP_MULL] = { compute_all_mull, compute_c_mull },
+
+    [CC_OP_ADDB] = { compute_all_addb, compute_c_addb },
+    [CC_OP_ADDW] = { compute_all_addw, compute_c_addw  },
+    [CC_OP_ADDL] = { compute_all_addl, compute_c_addl  },
+
+    [CC_OP_ADCB] = { compute_all_adcb, compute_c_adcb },
+    [CC_OP_ADCW] = { compute_all_adcw, compute_c_adcw  },
+    [CC_OP_ADCL] = { compute_all_adcl, compute_c_adcl  },
+
+    [CC_OP_SUBB] = { compute_all_subb, compute_c_subb  },
+    [CC_OP_SUBW] = { compute_all_subw, compute_c_subw  },
+    [CC_OP_SUBL] = { compute_all_subl, compute_c_subl  },
+    
+    [CC_OP_SBBB] = { compute_all_sbbb, compute_c_sbbb  },
+    [CC_OP_SBBW] = { compute_all_sbbw, compute_c_sbbw  },
+    [CC_OP_SBBL] = { compute_all_sbbl, compute_c_sbbl  },
+    
+    [CC_OP_LOGICB] = { compute_all_logicb, compute_c_logicb },
+    [CC_OP_LOGICW] = { compute_all_logicw, compute_c_logicw },
+    [CC_OP_LOGICL] = { compute_all_logicl, compute_c_logicl },
+    
+    [CC_OP_INCB] = { compute_all_incb, compute_c_incl },
+    [CC_OP_INCW] = { compute_all_incw, compute_c_incl },
+    [CC_OP_INCL] = { compute_all_incl, compute_c_incl },
+    
+    [CC_OP_DECB] = { compute_all_decb, compute_c_incl },
+    [CC_OP_DECW] = { compute_all_decw, compute_c_incl },
+    [CC_OP_DECL] = { compute_all_decl, compute_c_incl },
+    
+    [CC_OP_SHLB] = { compute_all_shlb, compute_c_shlb },
+    [CC_OP_SHLW] = { compute_all_shlw, compute_c_shlw },
+    [CC_OP_SHLL] = { compute_all_shll, compute_c_shll },
+
+    [CC_OP_SARB] = { compute_all_sarb, compute_c_sarl },
+    [CC_OP_SARW] = { compute_all_sarw, compute_c_sarl },
+    [CC_OP_SARL] = { compute_all_sarl, compute_c_sarl },
+
+#ifdef TARGET_X86_64
+    [CC_OP_MULQ] = { compute_all_mulq, compute_c_mull },
+
+    [CC_OP_ADDQ] = { compute_all_addq, compute_c_addq  },
+
+    [CC_OP_ADCQ] = { compute_all_adcq, compute_c_adcq  },
+
+    [CC_OP_SUBQ] = { compute_all_subq, compute_c_subq  },
+    
+    [CC_OP_SBBQ] = { compute_all_sbbq, compute_c_sbbq  },
+    
+    [CC_OP_LOGICQ] = { compute_all_logicq, compute_c_logicq },
+    
+    [CC_OP_INCQ] = { compute_all_incq, compute_c_incl },
+
+    [CC_OP_DECQ] = { compute_all_decq, compute_c_incl },
+
+    [CC_OP_SHLQ] = { compute_all_shlq, compute_c_shlq },
+
+    [CC_OP_SARQ] = { compute_all_sarq, compute_c_sarl },
+#endif
+};
+
+/* floating point support. Some of the code for complicated x87
+   functions comes from the LGPL'ed x86 emulator found in the Willows
+   TWIN windows emulator. */
+
+/* fp load FT0 */
+
+void OPPROTO op_flds_FT0_A0(void)
+{
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i32 = ldl(A0);
+    FT0 = FP_CONVERT.f;
+#else
+    FT0 = ldfl(A0);
+#endif
+}
+
+void OPPROTO op_fldl_FT0_A0(void)
+{
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i64 = ldq(A0);
+    FT0 = FP_CONVERT.d;
+#else
+    FT0 = ldfq(A0);
+#endif
+}
+
+/* helpers are needed to avoid static constant reference. XXX: find a better way */
+#ifdef USE_INT_TO_FLOAT_HELPERS
+
+void helper_fild_FT0_A0(void)
+{
+    FT0 = (CPU86_LDouble)ldsw(A0);
+}
+
+void helper_fildl_FT0_A0(void)
+{
+    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+}
+
+void helper_fildll_FT0_A0(void)
+{
+    FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
+}
+
+void OPPROTO op_fild_FT0_A0(void)
+{
+    helper_fild_FT0_A0();
+}
+
+void OPPROTO op_fildl_FT0_A0(void)
+{
+    helper_fildl_FT0_A0();
+}
+
+void OPPROTO op_fildll_FT0_A0(void)
+{
+    helper_fildll_FT0_A0();
+}
+
+#else
+
+void OPPROTO op_fild_FT0_A0(void)
+{
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i32 = ldsw(A0);
+    FT0 = (CPU86_LDouble)FP_CONVERT.i32;
+#else
+    FT0 = (CPU86_LDouble)ldsw(A0);
+#endif
+}
+
+void OPPROTO op_fildl_FT0_A0(void)
+{
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FT0 = (CPU86_LDouble)FP_CONVERT.i32;
+#else
+    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+#endif
+}
+
+void OPPROTO op_fildll_FT0_A0(void)
+{
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i64 = (int64_t) ldq(A0);
+    FT0 = (CPU86_LDouble)FP_CONVERT.i64;
+#else
+    FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
+#endif
+}
+#endif
+
+/* fp load ST0 */
+
+void OPPROTO op_flds_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i32 = ldl(A0);
+    env->fpregs[new_fpstt].d = FP_CONVERT.f;
+#else
+    env->fpregs[new_fpstt].d = ldfl(A0);
+#endif
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void OPPROTO op_fldl_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i64 = ldq(A0);
+    env->fpregs[new_fpstt].d = FP_CONVERT.d;
+#else
+    env->fpregs[new_fpstt].d = ldfq(A0);
+#endif
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void OPPROTO op_fldt_ST0_A0(void)
+{
+    helper_fldt_ST0_A0();
+}
+
+/* helpers are needed to avoid static constant reference. XXX: find a better way */
+#ifdef USE_INT_TO_FLOAT_HELPERS
+
+void helper_fild_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)ldsw(A0);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildl_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildll_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0));
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void OPPROTO op_fild_ST0_A0(void)
+{
+    helper_fild_ST0_A0();
+}
+
+void OPPROTO op_fildl_ST0_A0(void)
+{
+    helper_fildl_ST0_A0();
+}
+
+void OPPROTO op_fildll_ST0_A0(void)
+{
+    helper_fildll_ST0_A0();
+}
+
+#else
+
+void OPPROTO op_fild_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i32 = ldsw(A0);
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
+#else
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)ldsw(A0);
+#endif
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void OPPROTO op_fildl_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
+#else
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+#endif
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void OPPROTO op_fildll_ST0_A0(void)
+{
+    int new_fpstt;
+    new_fpstt = (env->fpstt - 1) & 7;
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.i64 = (int64_t) ldq(A0);
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i64;
+#else
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0));
+#endif
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+#endif
+
+/* fp store */
+
+void OPPROTO op_fsts_ST0_A0(void)
+{
+#ifdef USE_FP_CONVERT
+    FP_CONVERT.f = (float)ST0;
+    stfl(A0, FP_CONVERT.f);
+#else
+    stfl(A0, (float)ST0);
+#endif
+    FORCE_RET();
+}
+
+void OPPROTO op_fstl_ST0_A0(void)
+{
+    stfq(A0, (double)ST0);
+    FORCE_RET();
+}
+
+void OPPROTO op_fstt_ST0_A0(void)
+{
+    helper_fstt_ST0_A0();
+}
+
+void OPPROTO op_fist_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int val;
+
+    d = ST0;
+    val = floatx_to_int32(d, &env->fp_status);
+    if (val != (int16_t)val)
+        val = -32768;
+    stw(A0, val);
+    FORCE_RET();
+}
+
+void OPPROTO op_fistl_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int val;
+
+    d = ST0;
+    val = floatx_to_int32(d, &env->fp_status);
+    stl(A0, val);
+    FORCE_RET();
+}
+
+void OPPROTO op_fistll_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int64_t val;
+
+    d = ST0;
+    val = floatx_to_int64(d, &env->fp_status);
+    stq(A0, val);
+    FORCE_RET();
+}
+
+void OPPROTO op_fistt_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int val;
+
+    d = ST0;
+    val = floatx_to_int32_round_to_zero(d, &env->fp_status);
+    if (val != (int16_t)val)
+        val = -32768;
+    stw(A0, val);
+    FORCE_RET();
+}
+
+void OPPROTO op_fisttl_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int val;
+
+    d = ST0;
+    val = floatx_to_int32_round_to_zero(d, &env->fp_status);
+    stl(A0, val);
+    FORCE_RET();
+}
+
+void OPPROTO op_fisttll_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int64_t val;
+
+    d = ST0;
+    val = floatx_to_int64_round_to_zero(d, &env->fp_status);
+    stq(A0, val);
+    FORCE_RET();
+}
+
+void OPPROTO op_fbld_ST0_A0(void)
+{
+    helper_fbld_ST0_A0();
+}
+
+void OPPROTO op_fbst_ST0_A0(void)
+{
+    helper_fbst_ST0_A0();
+}
+
+/* FPU move */
+
+void OPPROTO op_fpush(void)
+{
+    fpush();
+}
+
+void OPPROTO op_fpop(void)
+{
+    fpop();
+}
+
+void OPPROTO op_fdecstp(void)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fpus &= (~0x4700);
+}
+
+void OPPROTO op_fincstp(void)
+{
+    env->fpstt = (env->fpstt + 1) & 7;
+    env->fpus &= (~0x4700);
+}
+
+void OPPROTO op_ffree_STN(void)
+{
+    env->fptags[(env->fpstt + PARAM1) & 7] = 1;
+}
+
+void OPPROTO op_fmov_ST0_FT0(void)
+{
+    ST0 = FT0;
+}
+
+void OPPROTO op_fmov_FT0_STN(void)
+{
+    FT0 = ST(PARAM1);
+}
+
+void OPPROTO op_fmov_ST0_STN(void)
+{
+    ST0 = ST(PARAM1);
+}
+
+void OPPROTO op_fmov_STN_ST0(void)
+{
+    ST(PARAM1) = ST0;
+}
+
+void OPPROTO op_fxchg_ST0_STN(void)
+{
+    CPU86_LDouble tmp;
+    tmp = ST(PARAM1);
+    ST(PARAM1) = ST0;
+    ST0 = tmp;
+}
+
+/* FPU operations */
+
+const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
+
+void OPPROTO op_fcom_ST0_FT0(void)
+{
+    int ret;
+
+    ret = floatx_compare(ST0, FT0, &env->fp_status);
+    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+    FORCE_RET();
+}
+
+void OPPROTO op_fucom_ST0_FT0(void)
+{
+    int ret;
+
+    ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
+    FORCE_RET();
+}
+
+const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void OPPROTO op_fcomi_ST0_FT0(void)
+{
+    int eflags;
+    int ret;
+
+    ret = floatx_compare(ST0, FT0, &env->fp_status);
+    eflags = cc_table[CC_OP].compute_all();
+    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+    CC_SRC = eflags;
+    FORCE_RET();
+}
+
+void OPPROTO op_fucomi_ST0_FT0(void)
+{
+    int eflags;
+    int ret;
+
+    ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+    eflags = cc_table[CC_OP].compute_all();
+    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+    CC_SRC = eflags;
+    FORCE_RET();
+}
+
+void OPPROTO op_fcmov_ST0_STN_T0(void)
+{
+    if (T0) {
+        ST0 = ST(PARAM1);
+    }
+    FORCE_RET();
+}
+
+void OPPROTO op_fadd_ST0_FT0(void)
+{
+    ST0 += FT0;
+}
+
+void OPPROTO op_fmul_ST0_FT0(void)
+{
+    ST0 *= FT0;
+}
+
+void OPPROTO op_fsub_ST0_FT0(void)
+{
+    ST0 -= FT0;
+}
+
+void OPPROTO op_fsubr_ST0_FT0(void)
+{
+    ST0 = FT0 - ST0;
+}
+
+void OPPROTO op_fdiv_ST0_FT0(void)
+{
+    ST0 = helper_fdiv(ST0, FT0);
+}
+
+void OPPROTO op_fdivr_ST0_FT0(void)
+{
+    ST0 = helper_fdiv(FT0, ST0);
+}
+
+/* fp operations between STN and ST0 */
+
+void OPPROTO op_fadd_STN_ST0(void)
+{
+    ST(PARAM1) += ST0;
+}
+
+void OPPROTO op_fmul_STN_ST0(void)
+{
+    ST(PARAM1) *= ST0;
+}
+
+void OPPROTO op_fsub_STN_ST0(void)
+{
+    ST(PARAM1) -= ST0;
+}
+
+void OPPROTO op_fsubr_STN_ST0(void)
+{
+    CPU86_LDouble *p;
+    p = &ST(PARAM1);
+    *p = ST0 - *p;
+}
+
+void OPPROTO op_fdiv_STN_ST0(void)
+{
+    CPU86_LDouble *p;
+    p = &ST(PARAM1);
+    *p = helper_fdiv(*p, ST0);
+}
+
+void OPPROTO op_fdivr_STN_ST0(void)
+{
+    CPU86_LDouble *p;
+    p = &ST(PARAM1);
+    *p = helper_fdiv(ST0, *p);
+}
+
+/* misc FPU operations */
+void OPPROTO op_fchs_ST0(void)
+{
+    ST0 = floatx_chs(ST0);
+}
+
+void OPPROTO op_fabs_ST0(void)
+{
+    ST0 = floatx_abs(ST0);
+}
+
+void OPPROTO op_fxam_ST0(void)
+{
+    helper_fxam_ST0();
+}
+
+void OPPROTO op_fld1_ST0(void)
+{
+    ST0 = f15rk[1];
+}
+
+void OPPROTO op_fldl2t_ST0(void)
+{
+    ST0 = f15rk[6];
+}
+
+void OPPROTO op_fldl2e_ST0(void)
+{
+    ST0 = f15rk[5];
+}
+
+void OPPROTO op_fldpi_ST0(void)
+{
+    ST0 = f15rk[2];
+}
+
+void OPPROTO op_fldlg2_ST0(void)
+{
+    ST0 = f15rk[3];
+}
+
+void OPPROTO op_fldln2_ST0(void)
+{
+    ST0 = f15rk[4];
+}
+
+void OPPROTO op_fldz_ST0(void)
+{
+    ST0 = f15rk[0];
+}
+
+void OPPROTO op_fldz_FT0(void)
+{
+    FT0 = f15rk[0];
+}
+
+/* associated heplers to reduce generated code length and to simplify
+   relocation (FP constants are usually stored in .rodata section) */
+
+void OPPROTO op_f2xm1(void)
+{
+    helper_f2xm1();
+}
+
+void OPPROTO op_fyl2x(void)
+{
+    helper_fyl2x();
+}
+
+void OPPROTO op_fptan(void)
+{
+    helper_fptan();
+}
+
+void OPPROTO op_fpatan(void)
+{
+    helper_fpatan();
+}
+
+void OPPROTO op_fxtract(void)
+{
+    helper_fxtract();
+}
+
+void OPPROTO op_fprem1(void)
+{
+    helper_fprem1();
+}
+
+
+void OPPROTO op_fprem(void)
+{
+    helper_fprem();
+}
+
+void OPPROTO op_fyl2xp1(void)
+{
+    helper_fyl2xp1();
+}
+
+void OPPROTO op_fsqrt(void)
+{
+    helper_fsqrt();
+}
+
+void OPPROTO op_fsincos(void)
+{
+    helper_fsincos();
+}
+
+void OPPROTO op_frndint(void)
+{
+    helper_frndint();
+}
+
+void OPPROTO op_fscale(void)
+{
+    helper_fscale();
+}
+
+void OPPROTO op_fsin(void)
+{
+    helper_fsin();
+}
+
+void OPPROTO op_fcos(void)
+{
+    helper_fcos();
+}
+
+void OPPROTO op_fnstsw_A0(void)
+{
+    int fpus;
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    stw(A0, fpus);
+    FORCE_RET();
+}
+
+void OPPROTO op_fnstsw_EAX(void)
+{
+    int fpus;
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    EAX = (EAX & ~0xffff) | fpus;
+}
+
+void OPPROTO op_fnstcw_A0(void)
+{
+    stw(A0, env->fpuc);
+    FORCE_RET();
+}
+
+void OPPROTO op_fldcw_A0(void)
+{
+    env->fpuc = lduw(A0);
+    update_fp_status();
+}
+
+void OPPROTO op_fclex(void)
+{
+    env->fpus &= 0x7f00;
+}
+
+void OPPROTO op_fwait(void)
+{
+    if (env->fpus & FPUS_SE)
+        fpu_raise_exception();
+    FORCE_RET();
+}
+
+void OPPROTO op_fninit(void)
+{
+    env->fpus = 0;
+    env->fpstt = 0;
+    env->fpuc = 0x37f;
+    env->fptags[0] = 1;
+    env->fptags[1] = 1;
+    env->fptags[2] = 1;
+    env->fptags[3] = 1;
+    env->fptags[4] = 1;
+    env->fptags[5] = 1;
+    env->fptags[6] = 1;
+    env->fptags[7] = 1;
+}
+
+void OPPROTO op_fnstenv_A0(void)
+{
+    helper_fstenv(A0, PARAM1);
+}
+
+void OPPROTO op_fldenv_A0(void)
+{
+    helper_fldenv(A0, PARAM1);
+}
+
+void OPPROTO op_fnsave_A0(void)
+{
+    helper_fsave(A0, PARAM1);
+}
+
+void OPPROTO op_frstor_A0(void)
+{
+    helper_frstor(A0, PARAM1);
+}
+
+/* threading support */
+void OPPROTO op_lock(void)
+{
+    cpu_lock();
+}
+
+void OPPROTO op_unlock(void)
+{
+    cpu_unlock();
+}
+
+/* SSE support */
+static inline void memcpy16(void *d, void *s)
+{
+    ((uint32_t *)d)[0] = ((uint32_t *)s)[0];
+    ((uint32_t *)d)[1] = ((uint32_t *)s)[1];
+    ((uint32_t *)d)[2] = ((uint32_t *)s)[2];
+    ((uint32_t *)d)[3] = ((uint32_t *)s)[3];
+}
+
+void OPPROTO op_movo(void)
+{
+    /* XXX: badly generated code */
+    XMMReg *d, *s;
+    d = (XMMReg *)((char *)env + PARAM1);
+    s = (XMMReg *)((char *)env + PARAM2);
+    memcpy16(d, s);
+}
+
+void OPPROTO op_movq(void)
+{
+    uint64_t *d, *s;
+    d = (uint64_t *)((char *)env + PARAM1);
+    s = (uint64_t *)((char *)env + PARAM2);
+    *d = *s;
+}
+
+void OPPROTO op_movl(void)
+{
+    uint32_t *d, *s;
+    d = (uint32_t *)((char *)env + PARAM1);
+    s = (uint32_t *)((char *)env + PARAM2);
+    *d = *s;
+}
+
+void OPPROTO op_movq_env_0(void)
+{
+    uint64_t *d;
+    d = (uint64_t *)((char *)env + PARAM1);
+    *d = 0;
+}
+
+void OPPROTO op_fxsave_A0(void)
+{
+    helper_fxsave(A0, PARAM1);
+}
+
+void OPPROTO op_fxrstor_A0(void)
+{
+    helper_fxrstor(A0, PARAM1);
+}
+
+/* XXX: optimize by storing fptt and fptags in the static cpu state */
+void OPPROTO op_enter_mmx(void)
+{
+    env->fpstt = 0;
+    *(uint32_t *)(env->fptags) = 0;
+    *(uint32_t *)(env->fptags + 4) = 0;
+}
+
+void OPPROTO op_emms(void)
+{
+    /* set to empty state */
+    *(uint32_t *)(env->fptags) = 0x01010101;
+    *(uint32_t *)(env->fptags + 4) = 0x01010101;
+}
+
+#define SHIFT 0
+#include "ops_sse.h"
+
+#define SHIFT 1
+#include "ops_sse.h"
+
+#ifdef VBOX
+/* Instantiate the structure signatures. */
+# define REM_STRUCT_OP 1
+# include "../Sun/structs.h"
+#endif
+
Index: /trunk/src/recompiler_new/target-i386/opreg_template.h
===================================================================
--- /trunk/src/recompiler_new/target-i386/opreg_template.h	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/opreg_template.h	(revision 13168)
@@ -0,0 +1,199 @@
+/*
+ *  i386 micro operations (templates for various register related
+ *  operations)
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+void OPPROTO glue(op_movl_A0,REGNAME)(void)
+{
+    A0 = (uint32_t)REG;
+}
+
+void OPPROTO glue(op_addl_A0,REGNAME)(void)
+{
+    A0 = (uint32_t)(A0 + REG);
+}
+
+void OPPROTO glue(glue(op_addl_A0,REGNAME),_s1)(void)
+{
+    A0 = (uint32_t)(A0 + (REG << 1));
+}
+
+void OPPROTO glue(glue(op_addl_A0,REGNAME),_s2)(void)
+{
+    A0 = (uint32_t)(A0 + (REG << 2));
+}
+
+void OPPROTO glue(glue(op_addl_A0,REGNAME),_s3)(void)
+{
+    A0 = (uint32_t)(A0 + (REG << 3));
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO glue(op_movq_A0,REGNAME)(void)
+{
+    A0 = REG;
+}
+
+void OPPROTO glue(op_addq_A0,REGNAME)(void)
+{
+    A0 = (A0 + REG);
+}
+
+void OPPROTO glue(glue(op_addq_A0,REGNAME),_s1)(void)
+{
+    A0 = (A0 + (REG << 1));
+}
+
+void OPPROTO glue(glue(op_addq_A0,REGNAME),_s2)(void)
+{
+    A0 = (A0 + (REG << 2));
+}
+
+void OPPROTO glue(glue(op_addq_A0,REGNAME),_s3)(void)
+{
+    A0 = (A0 + (REG << 3));
+}
+#endif
+
+void OPPROTO glue(op_movl_T0,REGNAME)(void)
+{
+    T0 = REG;
+}
+
+void OPPROTO glue(op_movl_T1,REGNAME)(void)
+{
+    T1 = REG;
+}
+
+void OPPROTO glue(op_movh_T0,REGNAME)(void)
+{
+    T0 = REG >> 8;
+}
+
+void OPPROTO glue(op_movh_T1,REGNAME)(void)
+{
+    T1 = REG >> 8;
+}
+
+void OPPROTO glue(glue(op_movl,REGNAME),_T0)(void)
+{
+    REG = (uint32_t)T0;
+}
+
+void OPPROTO glue(glue(op_movl,REGNAME),_T1)(void)
+{
+    REG = (uint32_t)T1;
+}
+
+void OPPROTO glue(glue(op_movl,REGNAME),_A0)(void)
+{
+    REG = (uint32_t)A0;
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO glue(glue(op_movq,REGNAME),_T0)(void)
+{
+    REG = T0;
+}
+
+void OPPROTO glue(glue(op_movq,REGNAME),_T1)(void)
+{
+    REG = T1;
+}
+
+void OPPROTO glue(glue(op_movq,REGNAME),_A0)(void)
+{
+    REG = A0;
+}
+#endif
+
+/* mov T1 to REG if T0 is true */
+void OPPROTO glue(glue(op_cmovw,REGNAME),_T1_T0)(void)
+{
+    if (T0)
+        REG = (REG & ~0xffff) | (T1 & 0xffff);
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_cmovl,REGNAME),_T1_T0)(void)
+{
+    if (T0)
+        REG = (uint32_t)T1;
+    FORCE_RET();
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO glue(glue(op_cmovq,REGNAME),_T1_T0)(void)
+{
+    if (T0)
+        REG = T1;
+    FORCE_RET();
+}
+#endif
+
+/* NOTE: T0 high order bits are ignored */
+void OPPROTO glue(glue(op_movw,REGNAME),_T0)(void)
+{
+    REG = (REG & ~0xffff) | (T0 & 0xffff);
+}
+
+/* NOTE: T0 high order bits are ignored */
+void OPPROTO glue(glue(op_movw,REGNAME),_T1)(void)
+{
+    REG = (REG & ~0xffff) | (T1 & 0xffff);
+}
+
+/* NOTE: A0 high order bits are ignored */
+void OPPROTO glue(glue(op_movw,REGNAME),_A0)(void)
+{
+    REG = (REG & ~0xffff) | (A0 & 0xffff);
+}
+
+/* NOTE: T0 high order bits are ignored */
+void OPPROTO glue(glue(op_movb,REGNAME),_T0)(void)
+{
+    REG = (REG & ~0xff) | (T0 & 0xff);
+}
+
+/* NOTE: T0 high order bits are ignored */
+void OPPROTO glue(glue(op_movh,REGNAME),_T0)(void)
+{
+    REG = (REG & ~0xff00) | ((T0 & 0xff) << 8);
+}
+
+/* NOTE: T1 high order bits are ignored */
+void OPPROTO glue(glue(op_movb,REGNAME),_T1)(void)
+{
+    REG = (REG & ~0xff) | (T1 & 0xff);
+}
+
+/* NOTE: T1 high order bits are ignored */
+void OPPROTO glue(glue(op_movh,REGNAME),_T1)(void)
+{
+    REG = (REG & ~0xff00) | ((T1 & 0xff) << 8);
+}
+
Index: /trunk/src/recompiler_new/target-i386/ops_mem.h
===================================================================
--- /trunk/src/recompiler_new/target-i386/ops_mem.h	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/ops_mem.h	(revision 13168)
@@ -0,0 +1,156 @@
+void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T0_A0)(void)
+{
+    T0 = glue(ldub, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldsb, MEMSUFFIX), _T0_A0)(void)
+{
+    T0 = glue(ldsb, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_lduw, MEMSUFFIX), _T0_A0)(void)
+{
+    T0 = glue(lduw, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldsw, MEMSUFFIX), _T0_A0)(void)
+{
+    T0 = glue(ldsw, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
+{
+    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
+{
+    T1 = glue(ldub, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldsb, MEMSUFFIX), _T1_A0)(void)
+{
+    T1 = glue(ldsb, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_lduw, MEMSUFFIX), _T1_A0)(void)
+{
+    T1 = glue(lduw, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldsw, MEMSUFFIX), _T1_A0)(void)
+{
+    T1 = glue(ldsw, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
+{
+    T1 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
+{
+    glue(stb, MEMSUFFIX)(A0, T0);
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_stw, MEMSUFFIX), _T0_A0)(void)
+{
+    glue(stw, MEMSUFFIX)(A0, T0);
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_stl, MEMSUFFIX), _T0_A0)(void)
+{
+    glue(stl, MEMSUFFIX)(A0, T0);
+    FORCE_RET();
+}
+
+#if 0
+void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T1_A0)(void)
+{
+    glue(stb, MEMSUFFIX)(A0, T1);
+    FORCE_RET();
+}
+#endif
+
+void OPPROTO glue(glue(op_stw, MEMSUFFIX), _T1_A0)(void)
+{
+    glue(stw, MEMSUFFIX)(A0, T1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_stl, MEMSUFFIX), _T1_A0)(void)
+{
+    glue(stl, MEMSUFFIX)(A0, T1);
+    FORCE_RET();
+}
+
+/* SSE/MMX support */
+void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _env_A0)(void)
+{
+    uint64_t *p;
+    p = (uint64_t *)((char *)env + PARAM1);
+    *p = glue(ldq, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_stq, MEMSUFFIX), _env_A0)(void)
+{
+    uint64_t *p;
+    p = (uint64_t *)((char *)env + PARAM1);
+    glue(stq, MEMSUFFIX)(A0, *p);
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_ldo, MEMSUFFIX), _env_A0)(void)
+{
+    XMMReg *p;
+    p = (XMMReg *)((char *)env + PARAM1);
+    p->XMM_Q(0) = glue(ldq, MEMSUFFIX)(A0);
+    p->XMM_Q(1) = glue(ldq, MEMSUFFIX)(A0 + 8);
+}
+
+void OPPROTO glue(glue(op_sto, MEMSUFFIX), _env_A0)(void)
+{
+    XMMReg *p;
+    p = (XMMReg *)((char *)env + PARAM1);
+    glue(stq, MEMSUFFIX)(A0, p->XMM_Q(0));
+    glue(stq, MEMSUFFIX)(A0 + 8, p->XMM_Q(1));
+    FORCE_RET();
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
+{
+    T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
+{
+    T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
+{
+    T0 = glue(ldq, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T1_A0)(void)
+{
+    T1 = glue(ldq, MEMSUFFIX)(A0);
+}
+
+void OPPROTO glue(glue(op_stq, MEMSUFFIX), _T0_A0)(void)
+{
+    glue(stq, MEMSUFFIX)(A0, T0);
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_stq, MEMSUFFIX), _T1_A0)(void)
+{
+    glue(stq, MEMSUFFIX)(A0, T1);
+    FORCE_RET();
+}
+#endif
+
+#undef MEMSUFFIX
Index: /trunk/src/recompiler_new/target-i386/ops_sse.h
===================================================================
--- /trunk/src/recompiler_new/target-i386/ops_sse.h	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/ops_sse.h	(revision 13168)
@@ -0,0 +1,1418 @@
+/*
+ *  MMX/SSE/SSE2/PNI support
+ * 
+ *  Copyright (c) 2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#if SHIFT == 0
+#define Reg MMXReg
+#define XMM_ONLY(x...)
+#define B(n) MMX_B(n)
+#define W(n) MMX_W(n)
+#define L(n) MMX_L(n)
+#define Q(n) q
+#define SUFFIX _mmx
+#else
+#define Reg XMMReg
+#define XMM_ONLY(x...) x
+#define B(n) XMM_B(n)
+#define W(n) XMM_W(n)
+#define L(n) XMM_L(n)
+#define Q(n) XMM_Q(n)
+#define SUFFIX _xmm
+#endif
+
+void OPPROTO glue(op_psrlw, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    if (s->Q(0) > 15) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->W(0) >>= shift;
+        d->W(1) >>= shift;
+        d->W(2) >>= shift;
+        d->W(3) >>= shift;
+#if SHIFT == 1
+        d->W(4) >>= shift;
+        d->W(5) >>= shift;
+        d->W(6) >>= shift;
+        d->W(7) >>= shift;
+#endif
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_psraw, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    if (s->Q(0) > 15) {
+        shift = 15;
+    } else {
+        shift = s->B(0);
+    }
+    d->W(0) = (int16_t)d->W(0) >> shift;
+    d->W(1) = (int16_t)d->W(1) >> shift;
+    d->W(2) = (int16_t)d->W(2) >> shift;
+    d->W(3) = (int16_t)d->W(3) >> shift;
+#if SHIFT == 1
+    d->W(4) = (int16_t)d->W(4) >> shift;
+    d->W(5) = (int16_t)d->W(5) >> shift;
+    d->W(6) = (int16_t)d->W(6) >> shift;
+    d->W(7) = (int16_t)d->W(7) >> shift;
+#endif
+}
+
+void OPPROTO glue(op_psllw, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    if (s->Q(0) > 15) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->W(0) <<= shift;
+        d->W(1) <<= shift;
+        d->W(2) <<= shift;
+        d->W(3) <<= shift;
+#if SHIFT == 1
+        d->W(4) <<= shift;
+        d->W(5) <<= shift;
+        d->W(6) <<= shift;
+        d->W(7) <<= shift;
+#endif
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_psrld, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    if (s->Q(0) > 31) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->L(0) >>= shift;
+        d->L(1) >>= shift;
+#if SHIFT == 1
+        d->L(2) >>= shift;
+        d->L(3) >>= shift;
+#endif
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_psrad, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    if (s->Q(0) > 31) {
+        shift = 31;
+    } else {
+        shift = s->B(0);
+    }
+    d->L(0) = (int32_t)d->L(0) >> shift;
+    d->L(1) = (int32_t)d->L(1) >> shift;
+#if SHIFT == 1
+    d->L(2) = (int32_t)d->L(2) >> shift;
+    d->L(3) = (int32_t)d->L(3) >> shift;
+#endif
+}
+
+void OPPROTO glue(op_pslld, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    if (s->Q(0) > 31) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->L(0) <<= shift;
+        d->L(1) <<= shift;
+#if SHIFT == 1
+        d->L(2) <<= shift;
+        d->L(3) <<= shift;
+#endif
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_psrlq, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    if (s->Q(0) > 63) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->Q(0) >>= shift;
+#if SHIFT == 1
+        d->Q(1) >>= shift;
+#endif
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_psllq, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    if (s->Q(0) > 63) {
+        d->Q(0) = 0;
+#if SHIFT == 1
+        d->Q(1) = 0;
+#endif
+    } else {
+        shift = s->B(0);
+        d->Q(0) <<= shift;
+#if SHIFT == 1
+        d->Q(1) <<= shift;
+#endif
+    }
+    FORCE_RET();
+}
+
+#if SHIFT == 1
+void OPPROTO glue(op_psrldq, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift, i;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    shift = s->L(0);
+    if (shift > 16)
+        shift = 16;
+    for(i = 0; i < 16 - shift; i++)
+        d->B(i) = d->B(i + shift);
+    for(i = 16 - shift; i < 16; i++)
+        d->B(i) = 0;
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_pslldq, SUFFIX)(void)
+{
+    Reg *d, *s;
+    int shift, i;
+
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    shift = s->L(0);
+    if (shift > 16)
+        shift = 16;
+    for(i = 15; i >= shift; i--)
+        d->B(i) = d->B(i - shift);
+    for(i = 0; i < shift; i++)
+        d->B(i) = 0;
+    FORCE_RET();
+}
+#endif
+
+#define SSE_OP_B(name, F)\
+void OPPROTO glue(name, SUFFIX) (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->B(0) = F(d->B(0), s->B(0));\
+    d->B(1) = F(d->B(1), s->B(1));\
+    d->B(2) = F(d->B(2), s->B(2));\
+    d->B(3) = F(d->B(3), s->B(3));\
+    d->B(4) = F(d->B(4), s->B(4));\
+    d->B(5) = F(d->B(5), s->B(5));\
+    d->B(6) = F(d->B(6), s->B(6));\
+    d->B(7) = F(d->B(7), s->B(7));\
+    XMM_ONLY(\
+    d->B(8) = F(d->B(8), s->B(8));\
+    d->B(9) = F(d->B(9), s->B(9));\
+    d->B(10) = F(d->B(10), s->B(10));\
+    d->B(11) = F(d->B(11), s->B(11));\
+    d->B(12) = F(d->B(12), s->B(12));\
+    d->B(13) = F(d->B(13), s->B(13));\
+    d->B(14) = F(d->B(14), s->B(14));\
+    d->B(15) = F(d->B(15), s->B(15));\
+    )\
+}
+
+#define SSE_OP_W(name, F)\
+void OPPROTO glue(name, SUFFIX) (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->W(0) = F(d->W(0), s->W(0));\
+    d->W(1) = F(d->W(1), s->W(1));\
+    d->W(2) = F(d->W(2), s->W(2));\
+    d->W(3) = F(d->W(3), s->W(3));\
+    XMM_ONLY(\
+    d->W(4) = F(d->W(4), s->W(4));\
+    d->W(5) = F(d->W(5), s->W(5));\
+    d->W(6) = F(d->W(6), s->W(6));\
+    d->W(7) = F(d->W(7), s->W(7));\
+    )\
+}
+
+#define SSE_OP_L(name, F)\
+void OPPROTO glue(name, SUFFIX) (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->L(0) = F(d->L(0), s->L(0));\
+    d->L(1) = F(d->L(1), s->L(1));\
+    XMM_ONLY(\
+    d->L(2) = F(d->L(2), s->L(2));\
+    d->L(3) = F(d->L(3), s->L(3));\
+    )\
+}
+
+#define SSE_OP_Q(name, F)\
+void OPPROTO glue(name, SUFFIX) (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->Q(0) = F(d->Q(0), s->Q(0));\
+    XMM_ONLY(\
+    d->Q(1) = F(d->Q(1), s->Q(1));\
+    )\
+}
+
+#if SHIFT == 0
+static inline int satub(int x)
+{
+    if (x < 0)
+        return 0;
+    else if (x > 255)
+        return 255;
+    else
+        return x;
+}
+
+static inline int satuw(int x)
+{
+    if (x < 0)
+        return 0;
+    else if (x > 65535)
+        return 65535;
+    else
+        return x;
+}
+
+static inline int satsb(int x)
+{
+    if (x < -128)
+        return -128;
+    else if (x > 127)
+        return 127;
+    else
+        return x;
+}
+
+static inline int satsw(int x)
+{
+    if (x < -32768)
+        return -32768;
+    else if (x > 32767)
+        return 32767;
+    else
+        return x;
+}
+
+#define FADD(a, b) ((a) + (b))
+#define FADDUB(a, b) satub((a) + (b))
+#define FADDUW(a, b) satuw((a) + (b))
+#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
+#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b))
+
+#define FSUB(a, b) ((a) - (b))
+#define FSUBUB(a, b) satub((a) - (b))
+#define FSUBUW(a, b) satuw((a) - (b))
+#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
+#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b))
+#define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
+#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
+#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
+#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
+
+#define FAND(a, b) (a) & (b)
+#define FANDN(a, b) ((~(a)) & (b))
+#define FOR(a, b) (a) | (b)
+#define FXOR(a, b) (a) ^ (b)
+
+#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0
+#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0
+#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0
+#define FCMPEQ(a, b) (a) == (b) ? -1 : 0
+
+#define FMULLW(a, b) (a) * (b)
+#define FMULHUW(a, b) (a) * (b) >> 16
+#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
+
+#define FAVG(a, b) ((a) + (b) + 1) >> 1
+#endif
+
+SSE_OP_B(op_paddb, FADD)
+SSE_OP_W(op_paddw, FADD)
+SSE_OP_L(op_paddl, FADD)
+SSE_OP_Q(op_paddq, FADD)
+
+SSE_OP_B(op_psubb, FSUB)
+SSE_OP_W(op_psubw, FSUB)
+SSE_OP_L(op_psubl, FSUB)
+SSE_OP_Q(op_psubq, FSUB)
+
+SSE_OP_B(op_paddusb, FADDUB)
+SSE_OP_B(op_paddsb, FADDSB)
+SSE_OP_B(op_psubusb, FSUBUB)
+SSE_OP_B(op_psubsb, FSUBSB)
+
+SSE_OP_W(op_paddusw, FADDUW)
+SSE_OP_W(op_paddsw, FADDSW)
+SSE_OP_W(op_psubusw, FSUBUW)
+SSE_OP_W(op_psubsw, FSUBSW)
+
+SSE_OP_B(op_pminub, FMINUB)
+SSE_OP_B(op_pmaxub, FMAXUB)
+
+SSE_OP_W(op_pminsw, FMINSW)
+SSE_OP_W(op_pmaxsw, FMAXSW)
+
+SSE_OP_Q(op_pand, FAND)
+SSE_OP_Q(op_pandn, FANDN)
+SSE_OP_Q(op_por, FOR)
+SSE_OP_Q(op_pxor, FXOR)
+
+SSE_OP_B(op_pcmpgtb, FCMPGTB)
+SSE_OP_W(op_pcmpgtw, FCMPGTW)
+SSE_OP_L(op_pcmpgtl, FCMPGTL)
+
+SSE_OP_B(op_pcmpeqb, FCMPEQ)
+SSE_OP_W(op_pcmpeqw, FCMPEQ)
+SSE_OP_L(op_pcmpeql, FCMPEQ)
+
+SSE_OP_W(op_pmullw, FMULLW)
+SSE_OP_W(op_pmulhuw, FMULHUW)
+SSE_OP_W(op_pmulhw, FMULHW)
+
+SSE_OP_B(op_pavgb, FAVG)
+SSE_OP_W(op_pavgw, FAVG)
+
+void OPPROTO glue(op_pmuludq, SUFFIX) (void)
+{
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
+#if SHIFT == 1
+    d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
+#endif
+}
+
+void OPPROTO glue(op_pmaddwd, SUFFIX) (void)
+{
+    int i;
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    for(i = 0; i < (2 << SHIFT); i++) {
+        d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
+            (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1);
+    }
+    FORCE_RET();
+}
+
+#if SHIFT == 0
+static inline int abs1(int a)
+{
+    if (a < 0)
+        return -a;
+    else
+        return a;
+}
+#endif
+void OPPROTO glue(op_psadbw, SUFFIX) (void)
+{
+    unsigned int val;
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    val = 0;
+    val += abs1(d->B(0) - s->B(0));
+    val += abs1(d->B(1) - s->B(1));
+    val += abs1(d->B(2) - s->B(2));
+    val += abs1(d->B(3) - s->B(3));
+    val += abs1(d->B(4) - s->B(4));
+    val += abs1(d->B(5) - s->B(5));
+    val += abs1(d->B(6) - s->B(6));
+    val += abs1(d->B(7) - s->B(7));
+    d->Q(0) = val;
+#if SHIFT == 1
+    val = 0;
+    val += abs1(d->B(8) - s->B(8));
+    val += abs1(d->B(9) - s->B(9));
+    val += abs1(d->B(10) - s->B(10));
+    val += abs1(d->B(11) - s->B(11));
+    val += abs1(d->B(12) - s->B(12));
+    val += abs1(d->B(13) - s->B(13));
+    val += abs1(d->B(14) - s->B(14));
+    val += abs1(d->B(15) - s->B(15));
+    d->Q(1) = val;
+#endif
+}
+
+void OPPROTO glue(op_maskmov, SUFFIX) (void)
+{
+    int i;
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    for(i = 0; i < (8 << SHIFT); i++) {
+        if (s->B(i) & 0x80)
+            stb(A0 + i, d->B(i));
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_movl_mm_T0, SUFFIX) (void)
+{
+    Reg *d;
+    d = (Reg *)((char *)env + PARAM1);
+    d->L(0) = T0;
+    d->L(1) = 0;
+#if SHIFT == 1
+    d->Q(1) = 0;
+#endif
+}
+
+void OPPROTO glue(op_movl_T0_mm, SUFFIX) (void)
+{
+    Reg *s;
+    s = (Reg *)((char *)env + PARAM1);
+    T0 = s->L(0);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO glue(op_movq_mm_T0, SUFFIX) (void)
+{
+    Reg *d;
+    d = (Reg *)((char *)env + PARAM1);
+    d->Q(0) = T0;
+#if SHIFT == 1
+    d->Q(1) = 0;
+#endif
+}
+
+void OPPROTO glue(op_movq_T0_mm, SUFFIX) (void)
+{
+    Reg *s;
+    s = (Reg *)((char *)env + PARAM1);
+    T0 = s->Q(0);
+}
+#endif
+
+#if SHIFT == 0
+void OPPROTO glue(op_pshufw, SUFFIX) (void)
+{
+#if __GCC__ == 3 || defined(RT_ARCH_AMD64) /* VBOX hack in #else */
+    Reg r, *d, *s;
+    int order;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    order = PARAM3;
+    r.W(0) = s->W(order & 3);
+    r.W(1) = s->W((order >> 2) & 3);
+    r.W(2) = s->W((order >> 4) & 3);
+    r.W(3) = s->W((order >> 6) & 3);
+    *d = r;
+#else
+    Reg *s;
+    int order;
+    uint32_t l0, l1;
+    s = (Reg *)((char *)env + PARAM2);
+    order = PARAM3;
+    l0 = s->W(order & 3);
+    l0 |= (uint32_t)s->W((order >> 2) & 3) << 16;
+    l1 = s->W((order >> 4) & 3);
+    l1 |= (uint32_t)s->W((order >> 6) & 3) << 16;
+
+    s = (Reg *)((char *)env + PARAM1);
+    s->_l[0] = l0;
+    s->_l[1] = l1;
+#endif 
+}
+#else
+void OPPROTO op_shufps(void)
+{
+    Reg r, *d, *s;
+    int order;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    order = PARAM3;
+    r.L(0) = d->L(order & 3);
+    r.L(1) = d->L((order >> 2) & 3);
+    r.L(2) = s->L((order >> 4) & 3);
+    r.L(3) = s->L((order >> 6) & 3);
+    *d = r;
+}
+
+void OPPROTO op_shufpd(void)
+{
+    Reg r, *d, *s;
+    int order;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    order = PARAM3;
+    r.Q(0) = d->Q(order & 1);
+    r.Q(1) = s->Q((order >> 1) & 1);
+    *d = r;
+}
+
+void OPPROTO glue(op_pshufd, SUFFIX) (void)
+{
+    Reg r, *d, *s;
+    int order;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    order = PARAM3;
+    r.L(0) = s->L(order & 3);
+    r.L(1) = s->L((order >> 2) & 3);
+    r.L(2) = s->L((order >> 4) & 3);
+    r.L(3) = s->L((order >> 6) & 3);
+    *d = r;
+}
+
+void OPPROTO glue(op_pshuflw, SUFFIX) (void)
+{
+    Reg r, *d, *s;
+    int order;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    order = PARAM3;
+    r.W(0) = s->W(order & 3);
+    r.W(1) = s->W((order >> 2) & 3);
+    r.W(2) = s->W((order >> 4) & 3);
+    r.W(3) = s->W((order >> 6) & 3);
+    r.Q(1) = s->Q(1);
+    *d = r;
+}
+
+void OPPROTO glue(op_pshufhw, SUFFIX) (void)
+{
+    Reg r, *d, *s;
+    int order;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    order = PARAM3;
+    r.Q(0) = s->Q(0);
+    r.W(4) = s->W(4 + (order & 3));
+    r.W(5) = s->W(4 + ((order >> 2) & 3));
+    r.W(6) = s->W(4 + ((order >> 4) & 3));
+    r.W(7) = s->W(4 + ((order >> 6) & 3));
+    *d = r;
+}
+#endif
+
+#if SHIFT == 1
+/* FPU ops */
+/* XXX: not accurate */
+
+#define SSE_OP_S(name, F)\
+void OPPROTO op_ ## name ## ps (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+    d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
+    d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
+    d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
+}\
+\
+void OPPROTO op_ ## name ## ss (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+}\
+void OPPROTO op_ ## name ## pd (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+    d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
+}\
+\
+void OPPROTO op_ ## name ## sd (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+}
+
+#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)
+#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)
+#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)
+#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)
+#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)
+#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
+#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
+
+SSE_OP_S(add, FPU_ADD)
+SSE_OP_S(sub, FPU_SUB)
+SSE_OP_S(mul, FPU_MUL)
+SSE_OP_S(div, FPU_DIV)
+SSE_OP_S(min, FPU_MIN)
+SSE_OP_S(max, FPU_MAX)
+SSE_OP_S(sqrt, FPU_SQRT)
+
+
+/* float to float conversions */
+void OPPROTO op_cvtps2pd(void)
+{
+    float32 s0, s1;
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    s0 = s->XMM_S(0);
+    s1 = s->XMM_S(1);
+    d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
+    d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
+}
+
+void OPPROTO op_cvtpd2ps(void)
+{
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
+    d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
+    d->Q(1) = 0;
+}
+
+void OPPROTO op_cvtss2sd(void)
+{
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
+}
+
+void OPPROTO op_cvtsd2ss(void)
+{
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+    d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
+}
+
+/* integer to float */
+void OPPROTO op_cvtdq2ps(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
+    d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
+    d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status);
+    d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
+}
+
+void OPPROTO op_cvtdq2pd(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    int32_t l0, l1;
+    l0 = (int32_t)s->XMM_L(0);
+    l1 = (int32_t)s->XMM_L(1);
+    d->XMM_D(0) = int32_to_float64(l0, &env->sse_status);
+    d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
+}
+
+void OPPROTO op_cvtpi2ps(void)
+{
+    XMMReg *d = (Reg *)((char *)env + PARAM1);
+    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
+    d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
+    d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
+}
+
+void OPPROTO op_cvtpi2pd(void)
+{
+    XMMReg *d = (Reg *)((char *)env + PARAM1);
+    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
+    d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
+    d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
+}
+
+void OPPROTO op_cvtsi2ss(void)
+{
+    XMMReg *d = (Reg *)((char *)env + PARAM1);
+    d->XMM_S(0) = int32_to_float32(T0, &env->sse_status);
+}
+
+void OPPROTO op_cvtsi2sd(void)
+{
+    XMMReg *d = (Reg *)((char *)env + PARAM1);
+    d->XMM_D(0) = int32_to_float64(T0, &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_cvtsq2ss(void)
+{
+    XMMReg *d = (Reg *)((char *)env + PARAM1);
+    d->XMM_S(0) = int64_to_float32(T0, &env->sse_status);
+}
+
+void OPPROTO op_cvtsq2sd(void)
+{
+    XMMReg *d = (Reg *)((char *)env + PARAM1);
+    d->XMM_D(0) = int64_to_float64(T0, &env->sse_status);
+}
+#endif
+
+/* float to integer */
+void OPPROTO op_cvtps2dq(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
+    d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
+    d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status);
+    d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
+}
+
+void OPPROTO op_cvtpd2dq(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
+    d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
+    d->XMM_Q(1) = 0;
+}
+
+void OPPROTO op_cvtps2pi(void)
+{
+    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
+    d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
+}
+
+void OPPROTO op_cvtpd2pi(void)
+{
+    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
+    d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
+}
+
+void OPPROTO op_cvtss2si(void)
+{
+    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
+    T0 = float32_to_int32(s->XMM_S(0), &env->sse_status);
+}
+
+void OPPROTO op_cvtsd2si(void)
+{
+    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
+    T0 = float64_to_int32(s->XMM_D(0), &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_cvtss2sq(void)
+{
+    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
+    T0 = float32_to_int64(s->XMM_S(0), &env->sse_status);
+}
+
+void OPPROTO op_cvtsd2sq(void)
+{
+    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
+    T0 = float64_to_int64(s->XMM_D(0), &env->sse_status);
+}
+#endif
+
+/* float to integer truncated */
+void OPPROTO op_cvttps2dq(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+    d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
+    d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status);
+    d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
+}
+
+void OPPROTO op_cvttpd2dq(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+    d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
+    d->XMM_Q(1) = 0;
+}
+
+void OPPROTO op_cvttps2pi(void)
+{
+    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+    d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
+}
+
+void OPPROTO op_cvttpd2pi(void)
+{
+    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+    d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
+}
+
+void OPPROTO op_cvttss2si(void)
+{
+    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
+    T0 = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+}
+
+void OPPROTO op_cvttsd2si(void)
+{
+    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
+    T0 = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+void OPPROTO op_cvttss2sq(void)
+{
+    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
+    T0 = float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
+}
+
+void OPPROTO op_cvttsd2sq(void)
+{
+    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
+    T0 = float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
+}
+#endif
+
+void OPPROTO op_rsqrtps(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+    d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
+    d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
+    d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
+}
+
+void OPPROTO op_rsqrtss(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+}
+
+void OPPROTO op_rcpps(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+    d->XMM_S(1) = approx_rcp(s->XMM_S(1));
+    d->XMM_S(2) = approx_rcp(s->XMM_S(2));
+    d->XMM_S(3) = approx_rcp(s->XMM_S(3));
+}
+
+void OPPROTO op_rcpss(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+}
+
+void OPPROTO op_haddps(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    XMMReg r;
+    r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
+    r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
+    r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1);
+    r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3);
+    *d = r;
+}
+
+void OPPROTO op_haddpd(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    XMMReg r;
+    r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
+    r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
+    *d = r;
+}
+
+void OPPROTO op_hsubps(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    XMMReg r;
+    r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
+    r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
+    r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1);
+    r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3);
+    *d = r;
+}
+
+void OPPROTO op_hsubpd(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    XMMReg r;
+    r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
+    r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
+    *d = r;
+}
+
+void OPPROTO op_addsubps(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
+    d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
+    d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
+    d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
+}
+
+void OPPROTO op_addsubpd(void)
+{
+    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
+    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
+    d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
+    d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
+}
+
+/* XXX: unordered */
+#define SSE_OP_CMP(name, F)\
+void OPPROTO op_ ## name ## ps (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+    d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
+    d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
+    d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
+}\
+\
+void OPPROTO op_ ## name ## ss (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+}\
+void OPPROTO op_ ## name ## pd (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+    d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
+}\
+\
+void OPPROTO op_ ## name ## sd (void)\
+{\
+    Reg *d, *s;\
+    d = (Reg *)((char *)env + PARAM1);\
+    s = (Reg *)((char *)env + PARAM2);\
+    d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+}
+
+#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0
+#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
+
+SSE_OP_CMP(cmpeq, FPU_CMPEQ)
+SSE_OP_CMP(cmplt, FPU_CMPLT)
+SSE_OP_CMP(cmple, FPU_CMPLE)
+SSE_OP_CMP(cmpunord, FPU_CMPUNORD)
+SSE_OP_CMP(cmpneq, FPU_CMPNEQ)
+SSE_OP_CMP(cmpnlt, FPU_CMPNLT)
+SSE_OP_CMP(cmpnle, FPU_CMPNLE)
+SSE_OP_CMP(cmpord, FPU_CMPORD)
+
+const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void OPPROTO op_ucomiss(void)
+{
+    int ret;
+    float32 s0, s1;
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    s0 = d->XMM_S(0);
+    s1 = s->XMM_S(0);
+    ret = float32_compare_quiet(s0, s1, &env->sse_status);
+    CC_SRC = comis_eflags[ret + 1];
+    FORCE_RET();
+}
+
+void OPPROTO op_comiss(void)
+{
+    int ret;
+    float32 s0, s1;
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    s0 = d->XMM_S(0);
+    s1 = s->XMM_S(0);
+    ret = float32_compare(s0, s1, &env->sse_status);
+    CC_SRC = comis_eflags[ret + 1];
+    FORCE_RET();
+}
+
+void OPPROTO op_ucomisd(void)
+{
+    int ret;
+    float64 d0, d1;
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    d0 = d->XMM_D(0);
+    d1 = s->XMM_D(0);
+    ret = float64_compare_quiet(d0, d1, &env->sse_status);
+    CC_SRC = comis_eflags[ret + 1];
+    FORCE_RET();
+}
+
+void OPPROTO op_comisd(void)
+{
+    int ret;
+    float64 d0, d1;
+    Reg *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    d0 = d->XMM_D(0);
+    d1 = s->XMM_D(0);
+    ret = float64_compare(d0, d1, &env->sse_status);
+    CC_SRC = comis_eflags[ret + 1];
+    FORCE_RET();
+}
+
+void OPPROTO op_movmskps(void)
+{
+    int b0, b1, b2, b3;
+    Reg *s;
+    s = (Reg *)((char *)env + PARAM1);
+    b0 = s->XMM_L(0) >> 31;
+    b1 = s->XMM_L(1) >> 31;
+    b2 = s->XMM_L(2) >> 31;
+    b3 = s->XMM_L(3) >> 31;
+    T0 = b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
+}
+
+void OPPROTO op_movmskpd(void)
+{
+    int b0, b1;
+    Reg *s;
+    s = (Reg *)((char *)env + PARAM1);
+    b0 = s->XMM_L(1) >> 31;
+    b1 = s->XMM_L(3) >> 31;
+    T0 = b0 | (b1 << 1);
+}
+
+#endif
+
+void OPPROTO glue(op_pmovmskb, SUFFIX)(void)
+{
+    Reg *s;
+    s = (Reg *)((char *)env + PARAM1);
+    T0 = 0;
+    T0 |= (s->XMM_B(0) >> 7);
+    T0 |= (s->XMM_B(1) >> 6) & 0x02;
+    T0 |= (s->XMM_B(2) >> 5) & 0x04;
+    T0 |= (s->XMM_B(3) >> 4) & 0x08;
+    T0 |= (s->XMM_B(4) >> 3) & 0x10;
+    T0 |= (s->XMM_B(5) >> 2) & 0x20;
+    T0 |= (s->XMM_B(6) >> 1) & 0x40;
+    T0 |= (s->XMM_B(7)) & 0x80;
+#if SHIFT == 1
+    T0 |= (s->XMM_B(8) << 1) & 0x0100;
+    T0 |= (s->XMM_B(9) << 2) & 0x0200;
+    T0 |= (s->XMM_B(10) << 3) & 0x0400;
+    T0 |= (s->XMM_B(11) << 4) & 0x0800;
+    T0 |= (s->XMM_B(12) << 5) & 0x1000;
+    T0 |= (s->XMM_B(13) << 6) & 0x2000;
+    T0 |= (s->XMM_B(14) << 7) & 0x4000;
+    T0 |= (s->XMM_B(15) << 8) & 0x8000;
+#endif
+}
+
+void OPPROTO glue(op_pinsrw, SUFFIX) (void)
+{
+    Reg *d = (Reg *)((char *)env + PARAM1);
+    int pos = PARAM2;
+    
+    d->W(pos) = T0;
+}
+
+void OPPROTO glue(op_pextrw, SUFFIX) (void)
+{
+    Reg *s = (Reg *)((char *)env + PARAM1);
+    int pos = PARAM2;
+    
+    T0 = s->W(pos);
+}
+
+void OPPROTO glue(op_packsswb, SUFFIX) (void)
+{
+    Reg r, *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    r.B(0) = satsb((int16_t)d->W(0));
+    r.B(1) = satsb((int16_t)d->W(1));
+    r.B(2) = satsb((int16_t)d->W(2));
+    r.B(3) = satsb((int16_t)d->W(3));
+#if SHIFT == 1
+    r.B(4) = satsb((int16_t)d->W(4));
+    r.B(5) = satsb((int16_t)d->W(5));
+    r.B(6) = satsb((int16_t)d->W(6));
+    r.B(7) = satsb((int16_t)d->W(7));
+#endif
+    r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0));
+    r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1));
+    r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2));
+    r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3));
+#if SHIFT == 1
+    r.B(12) = satsb((int16_t)s->W(4));
+    r.B(13) = satsb((int16_t)s->W(5));
+    r.B(14) = satsb((int16_t)s->W(6));
+    r.B(15) = satsb((int16_t)s->W(7));
+#endif
+    *d = r;
+}
+
+void OPPROTO glue(op_packuswb, SUFFIX) (void)
+{
+    Reg r, *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    r.B(0) = satub((int16_t)d->W(0));
+    r.B(1) = satub((int16_t)d->W(1));
+    r.B(2) = satub((int16_t)d->W(2));
+    r.B(3) = satub((int16_t)d->W(3));
+#if SHIFT == 1
+    r.B(4) = satub((int16_t)d->W(4));
+    r.B(5) = satub((int16_t)d->W(5));
+    r.B(6) = satub((int16_t)d->W(6));
+    r.B(7) = satub((int16_t)d->W(7));
+#endif
+    r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0));
+    r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1));
+    r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2));
+    r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3));
+#if SHIFT == 1
+    r.B(12) = satub((int16_t)s->W(4));
+    r.B(13) = satub((int16_t)s->W(5));
+    r.B(14) = satub((int16_t)s->W(6));
+    r.B(15) = satub((int16_t)s->W(7));
+#endif
+    *d = r;
+}
+
+void OPPROTO glue(op_packssdw, SUFFIX) (void)
+{
+    Reg r, *d, *s;
+    d = (Reg *)((char *)env + PARAM1);
+    s = (Reg *)((char *)env + PARAM2);
+
+    r.W(0) = satsw(d->L(0));
+    r.W(1) = satsw(d->L(1));
+#if SHIFT == 1
+    r.W(2) = satsw(d->L(2));
+    r.W(3) = satsw(d->L(3));
+#endif
+    r.W((2 << SHIFT) + 0) = satsw(s->L(0));
+    r.W((2 << SHIFT) + 1) = satsw(s->L(1));
+#if SHIFT == 1
+    r.W(6) = satsw(s->L(2));
+    r.W(7) = satsw(s->L(3));
+#endif
+    *d = r;
+}
+
+#define UNPCK_OP(base_name, base)                               \
+                                                                \
+void OPPROTO glue(op_punpck ## base_name ## bw, SUFFIX) (void)   \
+{                                                               \
+    Reg r, *d, *s;                                              \
+    d = (Reg *)((char *)env + PARAM1);                          \
+    s = (Reg *)((char *)env + PARAM2);                          \
+                                                                \
+    r.B(0) = d->B((base << (SHIFT + 2)) + 0);                   \
+    r.B(1) = s->B((base << (SHIFT + 2)) + 0);                   \
+    r.B(2) = d->B((base << (SHIFT + 2)) + 1);                   \
+    r.B(3) = s->B((base << (SHIFT + 2)) + 1);                   \
+    r.B(4) = d->B((base << (SHIFT + 2)) + 2);                   \
+    r.B(5) = s->B((base << (SHIFT + 2)) + 2);                   \
+    r.B(6) = d->B((base << (SHIFT + 2)) + 3);                   \
+    r.B(7) = s->B((base << (SHIFT + 2)) + 3);                   \
+XMM_ONLY(                                                       \
+    r.B(8) = d->B((base << (SHIFT + 2)) + 4);                   \
+    r.B(9) = s->B((base << (SHIFT + 2)) + 4);                   \
+    r.B(10) = d->B((base << (SHIFT + 2)) + 5);                  \
+    r.B(11) = s->B((base << (SHIFT + 2)) + 5);                  \
+    r.B(12) = d->B((base << (SHIFT + 2)) + 6);                  \
+    r.B(13) = s->B((base << (SHIFT + 2)) + 6);                  \
+    r.B(14) = d->B((base << (SHIFT + 2)) + 7);                  \
+    r.B(15) = s->B((base << (SHIFT + 2)) + 7);                  \
+)                                                               \
+    *d = r;                                                     \
+}                                                               \
+                                                                \
+void OPPROTO glue(op_punpck ## base_name ## wd, SUFFIX) (void)   \
+{                                                               \
+    Reg r, *d, *s;                                              \
+    d = (Reg *)((char *)env + PARAM1);                          \
+    s = (Reg *)((char *)env + PARAM2);                          \
+                                                                \
+    r.W(0) = d->W((base << (SHIFT + 1)) + 0);                   \
+    r.W(1) = s->W((base << (SHIFT + 1)) + 0);                   \
+    r.W(2) = d->W((base << (SHIFT + 1)) + 1);                   \
+    r.W(3) = s->W((base << (SHIFT + 1)) + 1);                   \
+XMM_ONLY(                                                       \
+    r.W(4) = d->W((base << (SHIFT + 1)) + 2);                   \
+    r.W(5) = s->W((base << (SHIFT + 1)) + 2);                   \
+    r.W(6) = d->W((base << (SHIFT + 1)) + 3);                   \
+    r.W(7) = s->W((base << (SHIFT + 1)) + 3);                   \
+)                                                               \
+    *d = r;                                                     \
+}                                                               \
+                                                                \
+void OPPROTO glue(op_punpck ## base_name ## dq, SUFFIX) (void)   \
+{                                                               \
+    Reg r, *d, *s;                                              \
+    d = (Reg *)((char *)env + PARAM1);                          \
+    s = (Reg *)((char *)env + PARAM2);                          \
+                                                                \
+    r.L(0) = d->L((base << SHIFT) + 0);                         \
+    r.L(1) = s->L((base << SHIFT) + 0);                         \
+XMM_ONLY(                                                       \
+    r.L(2) = d->L((base << SHIFT) + 1);                         \
+    r.L(3) = s->L((base << SHIFT) + 1);                         \
+)                                                               \
+    *d = r;                                                     \
+}                                                               \
+                                                                \
+XMM_ONLY(                                                       \
+void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void)  \
+{                                                               \
+    Reg r, *d, *s;                                              \
+    d = (Reg *)((char *)env + PARAM1);                          \
+    s = (Reg *)((char *)env + PARAM2);                          \
+                                                                \
+    r.Q(0) = d->Q(base);                                        \
+    r.Q(1) = s->Q(base);                                        \
+    *d = r;                                                     \
+}                                                               \
+)
+
+UNPCK_OP(l, 0)
+UNPCK_OP(h, 1)
+
+#undef SHIFT
+#undef XMM_ONLY
+#undef Reg
+#undef B
+#undef W
+#undef L
+#undef Q
+#undef SUFFIX
Index: /trunk/src/recompiler_new/target-i386/ops_template.h
===================================================================
--- /trunk/src/recompiler_new/target-i386/ops_template.h	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/ops_template.h	(revision 13168)
@@ -0,0 +1,606 @@
+/*
+ *  i386 micro operations (included several times to generate
+ *  different operand sizes)
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#define DATA_BITS (1 << (3 + SHIFT))
+#define SHIFT_MASK (DATA_BITS - 1)
+#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1))
+#if DATA_BITS <= 32
+#define SHIFT1_MASK 0x1f
+#else
+#define SHIFT1_MASK 0x3f
+#endif
+
+#if DATA_BITS == 8
+#define SUFFIX b
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#define DATA_MASK 0xff
+#elif DATA_BITS == 16
+#define SUFFIX w
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#define DATA_MASK 0xffff
+#elif DATA_BITS == 32
+#define SUFFIX l
+#define DATA_TYPE uint32_t
+#define DATA_STYPE int32_t
+#define DATA_MASK 0xffffffff
+#elif DATA_BITS == 64
+#define SUFFIX q
+#define DATA_TYPE uint64_t
+#define DATA_STYPE int64_t
+#define DATA_MASK 0xffffffffffffffffULL
+#else
+#error unhandled operand size
+#endif
+
+/* dynamic flags computation */
+
+static int glue(compute_all_add, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_DST - CC_SRC;
+    cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_add, SUFFIX)(void)
+{
+    int cf;
+    target_long src1;
+    src1 = CC_SRC;
+    cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+    return cf;
+}
+
+static int glue(compute_all_adc, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_DST - CC_SRC - 1;
+    cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_adc, SUFFIX)(void)
+{
+    int cf;
+    target_long src1;
+    src1 = CC_SRC;
+    cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
+    return cf;
+}
+
+static int glue(compute_all_sub, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+    cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sub, SUFFIX)(void)
+{
+    int cf;
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+    cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+    return cf;
+}
+
+static int glue(compute_all_sbb, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC + 1;
+    src2 = CC_SRC;
+    cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sbb, SUFFIX)(void)
+{
+    int cf;
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC + 1;
+    src2 = CC_SRC;
+    cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
+    return cf;
+}
+
+static int glue(compute_all_logic, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = 0;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = 0;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_logic, SUFFIX)(void)
+{
+    return 0;
+}
+
+static int glue(compute_all_inc, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_DST - 1;
+    src2 = 1;
+    cf = CC_SRC;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
+    return cf | pf | af | zf | sf | of;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_inc, SUFFIX)(void)
+{
+    return CC_SRC;
+}
+#endif
+
+static int glue(compute_all_dec, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    target_long src1, src2;
+    src1 = CC_DST + 1;
+    src2 = 1;
+    cf = CC_SRC;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_all_shl, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0; /* undefined */
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    /* of is defined if shift count == 1 */
+    of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_shl, SUFFIX)(void)
+{
+    return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_sar, SUFFIX)(void)
+{
+    return CC_SRC & 1;
+}
+#endif
+
+static int glue(compute_all_sar, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = CC_SRC & 1;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0; /* undefined */
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    /* of is defined if shift count == 1 */
+    of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; 
+    return cf | pf | af | zf | sf | of;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_mul, SUFFIX)(void)
+{
+    int cf;
+    cf = (CC_SRC != 0);
+    return cf;
+}
+#endif
+
+/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
+   CF are modified and it is slower to do that. */
+static int glue(compute_all_mul, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = (CC_SRC != 0);
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0; /* undefined */
+    zf = ((DATA_TYPE)CC_DST == 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = cf << 11;
+    return cf | pf | af | zf | sf | of;
+}
+
+/* various optimized jumps cases */
+
+void OPPROTO glue(op_jb_sub, SUFFIX)(void)
+{
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+
+    if ((DATA_TYPE)src1 < (DATA_TYPE)src2)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jz_sub, SUFFIX)(void)
+{
+    if ((DATA_TYPE)CC_DST == 0)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jnz_sub, SUFFIX)(void)
+{
+    if ((DATA_TYPE)CC_DST != 0)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jbe_sub, SUFFIX)(void)
+{
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+
+    if ((DATA_TYPE)src1 <= (DATA_TYPE)src2)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_js_sub, SUFFIX)(void)
+{
+    if (CC_DST & SIGN_MASK)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jl_sub, SUFFIX)(void)
+{
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+
+    if ((DATA_STYPE)src1 < (DATA_STYPE)src2)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jle_sub, SUFFIX)(void)
+{
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+
+    if ((DATA_STYPE)src1 <= (DATA_STYPE)src2)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+/* oldies */
+
+#if DATA_BITS >= 16
+
+void OPPROTO glue(op_loopnz, SUFFIX)(void)
+{
+    if ((DATA_TYPE)ECX != 0 && !(T0 & CC_Z))
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_loopz, SUFFIX)(void)
+{
+    if ((DATA_TYPE)ECX != 0 && (T0 & CC_Z))
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jz_ecx, SUFFIX)(void)
+{
+    if ((DATA_TYPE)ECX == 0)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jnz_ecx, SUFFIX)(void)
+{
+    if ((DATA_TYPE)ECX != 0)
+        GOTO_LABEL_PARAM(1);
+    FORCE_RET();
+}
+
+#endif
+
+/* various optimized set cases */
+
+void OPPROTO glue(op_setb_T0_sub, SUFFIX)(void)
+{
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+
+    T0 = ((DATA_TYPE)src1 < (DATA_TYPE)src2);
+}
+
+void OPPROTO glue(op_setz_T0_sub, SUFFIX)(void)
+{
+    T0 = ((DATA_TYPE)CC_DST == 0);
+}
+
+void OPPROTO glue(op_setbe_T0_sub, SUFFIX)(void)
+{
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+
+    T0 = ((DATA_TYPE)src1 <= (DATA_TYPE)src2);
+}
+
+void OPPROTO glue(op_sets_T0_sub, SUFFIX)(void)
+{
+    T0 = lshift(CC_DST, -(DATA_BITS - 1)) & 1;
+}
+
+void OPPROTO glue(op_setl_T0_sub, SUFFIX)(void)
+{
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+
+    T0 = ((DATA_STYPE)src1 < (DATA_STYPE)src2);
+}
+
+void OPPROTO glue(op_setle_T0_sub, SUFFIX)(void)
+{
+    target_long src1, src2;
+    src1 = CC_DST + CC_SRC;
+    src2 = CC_SRC;
+
+    T0 = ((DATA_STYPE)src1 <= (DATA_STYPE)src2);
+}
+
+/* shifts */
+
+void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1)(void)
+{
+    int count;
+    count = T1 & SHIFT1_MASK;
+    T0 = T0 << count;
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1)(void)
+{
+    int count;
+    count = T1 & SHIFT1_MASK;
+    T0 &= DATA_MASK;
+    T0 = T0 >> count;
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1)(void)
+{
+    int count;
+    target_long src;
+
+    count = T1 & SHIFT1_MASK;
+    src = (DATA_STYPE)T0;
+    T0 = src >> count;
+    FORCE_RET();
+}
+
+#undef MEM_WRITE
+#include "ops_template_mem.h"
+
+#define MEM_WRITE 0
+#include "ops_template_mem.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#define MEM_WRITE 1
+#include "ops_template_mem.h"
+
+#define MEM_WRITE 2
+#include "ops_template_mem.h"
+#endif
+
+/* bit operations */
+#if DATA_BITS >= 16
+
+void OPPROTO glue(glue(op_bt, SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    count = T1 & SHIFT_MASK;
+    CC_SRC = T0 >> count;
+}
+
+void OPPROTO glue(glue(op_bts, SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    count = T1 & SHIFT_MASK;
+    T1 = T0 >> count;
+    T0 |= (((target_long)1) << count);
+}
+
+void OPPROTO glue(glue(op_btr, SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    count = T1 & SHIFT_MASK;
+    T1 = T0 >> count;
+    T0 &= ~(((target_long)1) << count);
+}
+
+void OPPROTO glue(glue(op_btc, SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    count = T1 & SHIFT_MASK;
+    T1 = T0 >> count;
+    T0 ^= (((target_long)1) << count);
+}
+
+void OPPROTO glue(glue(op_add_bit, SUFFIX), _A0_T1)(void)
+{
+    A0 += ((DATA_STYPE)T1 >> (3 + SHIFT)) << SHIFT;
+}
+
+void OPPROTO glue(glue(op_bsf, SUFFIX), _T0_cc)(void)
+{
+    int count;
+    target_long res;
+    
+    res = T0 & DATA_MASK;
+    if (res != 0) {
+        count = 0;
+        while ((res & 1) == 0) {
+            count++;
+            res >>= 1;
+        }
+        T1 = count;
+        CC_DST = 1; /* ZF = 0 */
+    } else {
+        CC_DST = 0; /* ZF = 1 */
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_bsr, SUFFIX), _T0_cc)(void)
+{
+    int count;
+    target_long res;
+
+    res = T0 & DATA_MASK;
+    if (res != 0) {
+        count = DATA_BITS - 1;
+        while ((res & SIGN_MASK) == 0) {
+            count--;
+            res <<= 1;
+        }
+        T1 = count;
+        CC_DST = 1; /* ZF = 0 */
+    } else {
+        CC_DST = 0; /* ZF = 1 */
+    }
+    FORCE_RET();
+}
+
+#endif
+
+#if DATA_BITS == 32
+void OPPROTO op_update_bt_cc(void)
+{
+    CC_SRC = T1;
+}
+#endif
+
+/* string operations */
+
+void OPPROTO glue(op_movl_T0_Dshift, SUFFIX)(void)
+{
+    T0 = DF << SHIFT;
+}
+
+/* port I/O */
+#if DATA_BITS <= 32
+void OPPROTO glue(glue(op_out, SUFFIX), _T0_T1)(void)
+{
+    glue(cpu_out, SUFFIX)(env, T0, T1 & DATA_MASK);
+}
+
+void OPPROTO glue(glue(op_in, SUFFIX), _T0_T1)(void)
+{
+    T1 = glue(cpu_in, SUFFIX)(env, T0);
+}
+
+void OPPROTO glue(glue(op_in, SUFFIX), _DX_T0)(void)
+{
+    T0 = glue(cpu_in, SUFFIX)(env, EDX & 0xffff);
+}
+
+void OPPROTO glue(glue(op_out, SUFFIX), _DX_T0)(void)
+{
+    glue(cpu_out, SUFFIX)(env, EDX & 0xffff, T0);
+}
+
+void OPPROTO glue(glue(op_check_io, SUFFIX), _T0)(void)
+{
+    glue(glue(check_io, SUFFIX), _T0)();
+}
+
+void OPPROTO glue(glue(op_check_io, SUFFIX), _DX)(void)
+{
+    glue(glue(check_io, SUFFIX), _DX)();
+}
+#endif
+
+#undef DATA_BITS
+#undef SHIFT_MASK
+#undef SHIFT1_MASK
+#undef SIGN_MASK
+#undef DATA_TYPE
+#undef DATA_STYPE
+#undef DATA_MASK
+#undef SUFFIX
Index: /trunk/src/recompiler_new/target-i386/ops_template_mem.h
===================================================================
--- /trunk/src/recompiler_new/target-i386/ops_template_mem.h	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/ops_template_mem.h	(revision 13168)
@@ -0,0 +1,492 @@
+/*
+ *  i386 micro operations (included several times to generate
+ *  different operand sizes)
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#ifdef MEM_WRITE
+
+#if MEM_WRITE == 0
+
+#if DATA_BITS == 8
+#define MEM_SUFFIX b_raw
+#elif DATA_BITS == 16
+#define MEM_SUFFIX w_raw
+#elif DATA_BITS == 32
+#define MEM_SUFFIX l_raw
+#elif DATA_BITS == 64
+#define MEM_SUFFIX q_raw
+#endif
+
+#elif MEM_WRITE == 1
+
+#if DATA_BITS == 8
+#define MEM_SUFFIX b_kernel
+#elif DATA_BITS == 16
+#define MEM_SUFFIX w_kernel
+#elif DATA_BITS == 32
+#define MEM_SUFFIX l_kernel
+#elif DATA_BITS == 64
+#define MEM_SUFFIX q_kernel
+#endif
+
+#elif MEM_WRITE == 2
+
+#if DATA_BITS == 8
+#define MEM_SUFFIX b_user
+#elif DATA_BITS == 16
+#define MEM_SUFFIX w_user
+#elif DATA_BITS == 32
+#define MEM_SUFFIX l_user
+#elif DATA_BITS == 64
+#define MEM_SUFFIX q_user
+#endif
+
+#else
+
+#error invalid MEM_WRITE
+
+#endif
+
+#else
+
+#define MEM_SUFFIX SUFFIX
+
+#endif
+
+void OPPROTO glue(glue(op_rol, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    target_long src;
+
+    if (T1 & SHIFT1_MASK) {
+        count = T1 & SHIFT_MASK;
+        src = T0;
+        T0 &= DATA_MASK;
+        T0 = (T0 << count) | (T0 >> (DATA_BITS - count));
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#else
+        /* gcc 3.2 workaround. This is really a bug in gcc. */
+        asm volatile("" : : "r" (T0));
+#endif
+        CC_SRC = (cc_table[CC_OP].compute_all() & ~(CC_O | CC_C)) | 
+            (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | 
+            (T0 & CC_C);
+        CC_OP = CC_OP_EFLAGS;
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_ror, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    target_long src;
+
+    if (T1 & SHIFT1_MASK) {
+        count = T1 & SHIFT_MASK;
+        src = T0;
+        T0 &= DATA_MASK;
+        T0 = (T0 >> count) | (T0 << (DATA_BITS - count));
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#else
+        /* gcc 3.2 workaround. This is really a bug in gcc. */
+        asm volatile("" : : "r" (T0));
+#endif
+        CC_SRC = (cc_table[CC_OP].compute_all() & ~(CC_O | CC_C)) |
+            (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | 
+            ((T0 >> (DATA_BITS - 1)) & CC_C);
+        CC_OP = CC_OP_EFLAGS;
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_rol, MEM_SUFFIX), _T0_T1)(void)
+{
+    int count;
+    count = T1 & SHIFT_MASK;
+    if (count) {
+        T0 &= DATA_MASK;
+        T0 = (T0 << count) | (T0 >> (DATA_BITS - count));
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_ror, MEM_SUFFIX), _T0_T1)(void)
+{
+    int count;
+    count = T1 & SHIFT_MASK;
+    if (count) {
+        T0 &= DATA_MASK;
+        T0 = (T0 >> count) | (T0 << (DATA_BITS - count));
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_rcl, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int count, eflags;
+    target_ulong src;
+    target_long res;
+
+    count = T1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+    count = rclw_table[count];
+#elif DATA_BITS == 8
+    count = rclb_table[count];
+#endif
+    if (count) {
+        eflags = cc_table[CC_OP].compute_all();
+        T0 &= DATA_MASK;
+        src = T0;
+        res = (T0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
+        if (count > 1)
+            res |= T0 >> (DATA_BITS + 1 - count);
+        T0 = res;
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = (eflags & ~(CC_C | CC_O)) |
+            (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | 
+            ((src >> (DATA_BITS - count)) & CC_C);
+        CC_OP = CC_OP_EFLAGS;
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_rcr, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int count, eflags;
+    target_ulong src;
+    target_long res;
+
+    count = T1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+    count = rclw_table[count];
+#elif DATA_BITS == 8
+    count = rclb_table[count];
+#endif
+    if (count) {
+        eflags = cc_table[CC_OP].compute_all();
+        T0 &= DATA_MASK;
+        src = T0;
+        res = (T0 >> count) | ((target_ulong)(eflags & CC_C) << (DATA_BITS - count));
+        if (count > 1)
+            res |= T0 << (DATA_BITS + 1 - count);
+        T0 = res;
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = (eflags & ~(CC_C | CC_O)) |
+            (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | 
+            ((src >> (count - 1)) & CC_C);
+        CC_OP = CC_OP_EFLAGS;
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_shl, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    target_long src;
+
+    count = T1 & SHIFT1_MASK;
+    if (count) {
+        src = (DATA_TYPE)T0 << (count - 1);
+        T0 = T0 << count;
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = src;
+        CC_DST = T0;
+        CC_OP = CC_OP_SHLB + SHIFT;
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_shr, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    target_long src;
+
+    count = T1 & SHIFT1_MASK;
+    if (count) {
+        T0 &= DATA_MASK;
+        src = T0 >> (count - 1);
+        T0 = T0 >> count;
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = src;
+        CC_DST = T0;
+        CC_OP = CC_OP_SARB + SHIFT;
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_sar, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    target_long src;
+
+    count = T1 & SHIFT1_MASK;
+    if (count) {
+        src = (DATA_STYPE)T0;
+        T0 = src >> count;
+        src = src >> (count - 1);
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = src;
+        CC_DST = T0;
+        CC_OP = CC_OP_SARB + SHIFT;
+    }
+    FORCE_RET();
+}
+
+#if DATA_BITS == 16
+/* XXX: overflow flag might be incorrect in some cases in shldw */
+void OPPROTO glue(glue(op_shld, MEM_SUFFIX), _T0_T1_im_cc)(void)
+{
+    int count;
+    unsigned int res, tmp;
+    count = PARAM1;
+    T1 &= 0xffff;
+    res = T1 | (T0 << 16);
+    tmp = res >> (32 - count);
+    res <<= count;
+    if (count > 16)
+        res |= T1 << (count - 16);
+    T0 = res >> 16;
+#ifdef MEM_WRITE
+    glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    CC_SRC = tmp;
+    CC_DST = T0;
+}
+
+void OPPROTO glue(glue(op_shld, MEM_SUFFIX), _T0_T1_ECX_cc)(void)
+{
+    int count;
+    unsigned int res, tmp;
+    count = ECX & 0x1f;
+    if (count) {
+        T1 &= 0xffff;
+        res = T1 | (T0 << 16);
+        tmp = res >> (32 - count);
+        res <<= count;
+        if (count > 16)
+          res |= T1 << (count - 16);
+        T0 = res >> 16;
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = tmp;
+        CC_DST = T0;
+        CC_OP = CC_OP_SARB + SHIFT;
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_shrd, MEM_SUFFIX), _T0_T1_im_cc)(void)
+{
+    int count;
+    unsigned int res, tmp;
+
+    count = PARAM1;
+    res = (T0 & 0xffff) | (T1 << 16);
+    tmp = res >> (count - 1);
+    res >>= count;
+    if (count > 16)
+        res |= T1 << (32 - count);
+    T0 = res;
+#ifdef MEM_WRITE
+    glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    CC_SRC = tmp;
+    CC_DST = T0;
+}
+
+
+void OPPROTO glue(glue(op_shrd, MEM_SUFFIX), _T0_T1_ECX_cc)(void)
+{
+    int count;
+    unsigned int res, tmp;
+
+    count = ECX & 0x1f;
+    if (count) {
+        res = (T0 & 0xffff) | (T1 << 16);
+        tmp = res >> (count - 1);
+        res >>= count;
+        if (count > 16)
+            res |= T1 << (32 - count);
+        T0 = res;
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = tmp;
+        CC_DST = T0;
+        CC_OP = CC_OP_SARB + SHIFT;
+    }
+    FORCE_RET();
+}
+#endif
+
+#if DATA_BITS >= 32
+void OPPROTO glue(glue(op_shld, MEM_SUFFIX), _T0_T1_im_cc)(void)
+{
+    int count;
+    target_long tmp;
+
+    count = PARAM1;
+    T0 &= DATA_MASK;
+    T1 &= DATA_MASK;
+    tmp = T0 << (count - 1);
+    T0 = (T0 << count) | (T1 >> (DATA_BITS - count));
+#ifdef MEM_WRITE
+    glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    CC_SRC = tmp;
+    CC_DST = T0;
+}
+
+void OPPROTO glue(glue(op_shld, MEM_SUFFIX), _T0_T1_ECX_cc)(void)
+{
+    int count;
+    target_long tmp;
+
+    count = ECX & SHIFT1_MASK;
+    if (count) {
+        T0 &= DATA_MASK;
+        T1 &= DATA_MASK;
+        tmp = T0 << (count - 1);
+        T0 = (T0 << count) | (T1 >> (DATA_BITS - count));
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = tmp;
+        CC_DST = T0;
+        CC_OP = CC_OP_SHLB + SHIFT;
+    }
+    FORCE_RET();
+}
+
+void OPPROTO glue(glue(op_shrd, MEM_SUFFIX), _T0_T1_im_cc)(void)
+{
+    int count;
+    target_long tmp;
+
+    count = PARAM1;
+    T0 &= DATA_MASK;
+    T1 &= DATA_MASK;
+    tmp = T0 >> (count - 1);
+    T0 = (T0 >> count) | (T1 << (DATA_BITS - count));
+#ifdef MEM_WRITE
+    glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    CC_SRC = tmp;
+    CC_DST = T0;
+}
+
+
+void OPPROTO glue(glue(op_shrd, MEM_SUFFIX), _T0_T1_ECX_cc)(void)
+{
+    int count;
+    target_long tmp;
+
+    count = ECX & SHIFT1_MASK;
+    if (count) {
+        T0 &= DATA_MASK;
+        T1 &= DATA_MASK;
+        tmp = T0 >> (count - 1);
+        T0 = (T0 >> count) | (T1 << (DATA_BITS - count));
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+        CC_SRC = tmp;
+        CC_DST = T0;
+        CC_OP = CC_OP_SARB + SHIFT;
+    }
+    FORCE_RET();
+}
+#endif
+
+/* carry add/sub (we only need to set CC_OP differently) */
+
+void OPPROTO glue(glue(op_adc, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int cf;
+    cf = cc_table[CC_OP].compute_c();
+    T0 = T0 + T1 + cf;
+#ifdef MEM_WRITE
+    glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    CC_SRC = T1;
+    CC_DST = T0;
+    CC_OP = CC_OP_ADDB + SHIFT + cf * 4;
+}
+
+void OPPROTO glue(glue(op_sbb, MEM_SUFFIX), _T0_T1_cc)(void)
+{
+    int cf;
+    cf = cc_table[CC_OP].compute_c();
+    T0 = T0 - T1 - cf;
+#ifdef MEM_WRITE
+    glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    CC_SRC = T1;
+    CC_DST = T0;
+    CC_OP = CC_OP_SUBB + SHIFT + cf * 4;
+}
+
+void OPPROTO glue(glue(op_cmpxchg, MEM_SUFFIX), _T0_T1_EAX_cc)(void)
+{
+    target_ulong src, dst;
+
+    src = T0;
+    dst = EAX - T0;
+    if ((DATA_TYPE)dst == 0) {
+        T0 = T1;
+#ifdef MEM_WRITE
+        glue(st, MEM_SUFFIX)(A0, T0);
+#endif
+    } else {
+        EAX = (EAX & ~DATA_MASK) | (T0 & DATA_MASK);
+    }
+    CC_SRC = src;
+    CC_DST = dst;
+    FORCE_RET();
+}
+
+#undef MEM_SUFFIX
+#undef MEM_WRITE
Index: /trunk/src/recompiler_new/target-i386/translate-copy.c
===================================================================
--- /trunk/src/recompiler_new/target-i386/translate-copy.c	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/translate-copy.c	(revision 13168)
@@ -0,0 +1,1332 @@
+/*
+ *  i386 on i386 translation
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#include "config.h"
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "disas.h"
+
+#ifdef USE_CODE_COPY
+
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/ucontext.h>
+
+extern char exec_loop;
+
+/* operand size */
+enum {
+    OT_BYTE = 0,
+    OT_WORD,
+    OT_LONG, 
+    OT_QUAD,
+};
+
+#define PREFIX_REPZ   0x01
+#define PREFIX_REPNZ  0x02
+#define PREFIX_LOCK   0x04
+#define PREFIX_DATA   0x08
+#define PREFIX_ADR    0x10
+
+typedef struct DisasContext {
+    /* current insn context */
+    int override; /* -1 if no override */
+    int prefix;
+    int aflag, dflag;
+    target_ulong pc; /* pc = eip + cs_base */
+    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
+                   static state change (stop translation) */
+    /* code output */
+    uint8_t *gen_code_ptr;
+    uint8_t *gen_code_start;
+    
+    /* current block context */
+    target_ulong cs_base; /* base of CS segment */
+    int pe;     /* protected mode */
+    int code32; /* 32 bit code segment */
+    int f_st;   /* currently unused */
+    int vm86;   /* vm86 mode */
+    int cpl;
+    int iopl;
+    int flags;
+    struct TranslationBlock *tb;
+} DisasContext;
+
+#define CPU_FIELD_OFFSET(field) offsetof(CPUState, field)
+
+#define CPU_SEG 0x64 /* fs override */
+
+static inline void gb(DisasContext *s, uint32_t val)
+{
+    *s->gen_code_ptr++ = val;
+}
+
+static inline void gw(DisasContext *s, uint32_t val)
+{
+    *s->gen_code_ptr++ = val;
+    *s->gen_code_ptr++ = val >> 8;
+}
+
+static inline void gl(DisasContext *s, uint32_t val)
+{
+    *s->gen_code_ptr++ = val;
+    *s->gen_code_ptr++ = val >> 8;
+    *s->gen_code_ptr++ = val >> 16;
+    *s->gen_code_ptr++ = val >> 24;
+}
+
+static inline void gjmp(DisasContext *s, long val)
+{
+    gb(s, 0xe9); /* jmp */
+    gl(s, val - (long)(s->gen_code_ptr + 4));
+}
+
+static inline void gen_movl_addr_im(DisasContext *s, 
+                                    uint32_t addr, uint32_t val)
+{
+    gb(s, CPU_SEG); /* seg movl im, addr */
+    gb(s, 0xc7); 
+    gb(s, 0x05);
+    gl(s, addr);
+    gl(s, val);
+}
+
+static inline void gen_movw_addr_im(DisasContext *s, 
+                                    uint32_t addr, uint32_t val)
+{
+    gb(s, CPU_SEG); /* seg movl im, addr */
+    gb(s, 0x66); 
+    gb(s, 0xc7); 
+    gb(s, 0x05);
+    gl(s, addr);
+    gw(s, val);
+}
+
+
+static void gen_jmp(DisasContext *s, uint32_t target_eip)
+{
+    TranslationBlock *tb = s->tb;
+
+    gb(s, 0xe9); /* jmp */
+    tb->tb_jmp_offset[0] = s->gen_code_ptr - s->gen_code_start;
+    gl(s, 0);
+
+    tb->tb_next_offset[0] = s->gen_code_ptr - s->gen_code_start;
+    gen_movl_addr_im(s, CPU_FIELD_OFFSET(eip), target_eip);
+    gen_movl_addr_im(s, CPU_FIELD_OFFSET(tmp0), (uint32_t)tb);
+    gjmp(s, (long)&exec_loop);
+
+    s->is_jmp = 1;
+}
+
+static void gen_jcc(DisasContext *s, int op,
+                    uint32_t target_eip, uint32_t next_eip)
+{
+    TranslationBlock *tb = s->tb;
+
+    gb(s, 0x0f); /* jcc */
+    gb(s, 0x80 + op);
+    tb->tb_jmp_offset[0] = s->gen_code_ptr - s->gen_code_start;
+    gl(s, 0);
+    gb(s, 0xe9); /* jmp */
+    tb->tb_jmp_offset[1] = s->gen_code_ptr - s->gen_code_start;
+    gl(s, 0);
+    
+    tb->tb_next_offset[0] = s->gen_code_ptr - s->gen_code_start;
+    gen_movl_addr_im(s, CPU_FIELD_OFFSET(eip), target_eip);
+    gen_movl_addr_im(s, CPU_FIELD_OFFSET(tmp0), (uint32_t)tb);
+    gjmp(s, (long)&exec_loop);
+
+    tb->tb_next_offset[1] = s->gen_code_ptr - s->gen_code_start;
+    gen_movl_addr_im(s, CPU_FIELD_OFFSET(eip), next_eip);
+    gen_movl_addr_im(s, CPU_FIELD_OFFSET(tmp0), (uint32_t)tb | 1);
+    gjmp(s, (long)&exec_loop);
+
+    s->is_jmp = 1;
+}
+
+static void gen_eob(DisasContext *s)
+{
+    gen_movl_addr_im(s, CPU_FIELD_OFFSET(tmp0), 0);
+    gjmp(s, (long)&exec_loop);
+
+    s->is_jmp = 1;
+}
+
+static inline void gen_lea_modrm(DisasContext *s, int modrm)
+{
+    int havesib;
+    int base, disp;
+    int index;
+    int scale;
+    int mod, rm, code;
+
+    mod = (modrm >> 6) & 3;
+    rm = modrm & 7;
+
+    if (s->aflag) {
+
+        havesib = 0;
+        base = rm;
+        index = 0;
+        scale = 0;
+        
+        if (base == 4) {
+            havesib = 1;
+            code = ldub_code(s->pc++);
+            scale = (code >> 6) & 3;
+            index = (code >> 3) & 7;
+            base = code & 7;
+        }
+
+        switch (mod) {
+        case 0:
+            if (base == 5) {
+                base = -1;
+                disp = ldl_code(s->pc);
+                s->pc += 4;
+            } else {
+                disp = 0;
+            }
+            break;
+        case 1:
+            disp = (int8_t)ldub_code(s->pc++);
+            break;
+        default:
+        case 2:
+            disp = ldl_code(s->pc);
+            s->pc += 4;
+            break;
+        }
+        
+    } else {
+        switch (mod) {
+        case 0:
+            if (rm == 6) {
+                disp = lduw_code(s->pc);
+                s->pc += 2;
+            } else {
+                disp = 0;
+            }
+            break;
+        case 1:
+            disp = (int8_t)ldub_code(s->pc++);
+            break;
+        default:
+        case 2:
+            disp = lduw_code(s->pc);
+            s->pc += 2;
+            break;
+        }
+    }
+}
+
+static inline void parse_modrm(DisasContext *s, int modrm)
+{
+    if ((modrm & 0xc0) != 0xc0)
+        gen_lea_modrm(s, modrm);        
+}
+
+static inline uint32_t insn_get(DisasContext *s, int ot)
+{
+    uint32_t ret;
+
+    switch(ot) {
+    case OT_BYTE:
+        ret = ldub_code(s->pc);
+        s->pc++;
+        break;
+    case OT_WORD:
+        ret = lduw_code(s->pc);
+        s->pc += 2;
+        break;
+    default:
+    case OT_LONG:
+        ret = ldl_code(s->pc);
+        s->pc += 4;
+        break;
+    }
+    return ret;
+}
+
+/* convert one instruction. s->is_jmp is set if the translation must
+   be stopped.  */
+static int disas_insn(DisasContext *s)
+{
+    target_ulong pc_start, pc_tmp, pc_start_insn;
+    int b, prefixes, aflag, dflag, next_eip, val;
+    int ot;
+    int modrm, mod, op, rm;
+
+    pc_start = s->pc;
+    prefixes = 0;
+    aflag = s->code32;
+    dflag = s->code32;
+    s->override = -1;
+ next_byte:
+    b = ldub_code(s->pc);
+    s->pc++;
+    /* check prefixes */
+    switch (b) {
+    case 0xf3:
+        prefixes |= PREFIX_REPZ;
+        goto next_byte;
+    case 0xf2:
+        prefixes |= PREFIX_REPNZ;
+        goto next_byte;
+    case 0xf0:
+        prefixes |= PREFIX_LOCK;
+        goto next_byte;
+    case 0x2e:
+        s->override = R_CS;
+        goto next_byte;
+    case 0x36:
+        s->override = R_SS;
+        goto next_byte;
+    case 0x3e:
+        s->override = R_DS;
+        goto next_byte;
+    case 0x26:
+        s->override = R_ES;
+        goto next_byte;
+    case 0x64:
+        s->override = R_FS;
+        goto next_byte;
+    case 0x65:
+        s->override = R_GS;
+        goto next_byte;
+    case 0x66:
+        prefixes |= PREFIX_DATA;
+        goto next_byte;
+    case 0x67:
+        prefixes |= PREFIX_ADR;
+        goto next_byte;
+    }
+
+    if (prefixes & PREFIX_DATA)
+        dflag ^= 1;
+    if (prefixes & PREFIX_ADR)
+        aflag ^= 1;
+
+    s->prefix = prefixes;
+    s->aflag = aflag;
+    s->dflag = dflag;
+
+    /* lock generation */
+    if (prefixes & PREFIX_LOCK)
+        goto unsupported_op;
+    if (s->override == R_FS || s->override == R_GS || s->override == R_CS)
+        goto unsupported_op;
+
+    pc_start_insn = s->pc - 1;
+    /* now check op code */
+ reswitch:
+    switch(b) {
+    case 0x0f:
+        /**************************/
+        /* extended op code */
+        b = ldub_code(s->pc++) | 0x100;
+        goto reswitch;
+        
+        /**************************/
+        /* arith & logic */
+    case 0x00 ... 0x05:
+    case 0x08 ... 0x0d:
+    case 0x10 ... 0x15:
+    case 0x18 ... 0x1d:
+    case 0x20 ... 0x25:
+    case 0x28 ... 0x2d:
+    case 0x30 ... 0x35:
+    case 0x38 ... 0x3d:
+        {
+            int f;
+            f = (b >> 1) & 3;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag ? OT_LONG : OT_WORD;
+            
+            switch(f) {
+            case 0: /* OP Ev, Gv */
+                modrm = ldub_code(s->pc++);
+                parse_modrm(s, modrm);
+                break;
+            case 1: /* OP Gv, Ev */
+                modrm = ldub_code(s->pc++);
+                parse_modrm(s, modrm);
+                break;
+            case 2: /* OP A, Iv */
+                insn_get(s, ot);
+                break;
+            }
+        }
+        break;
+
+    case 0x80: /* GRP1 */
+    case 0x81:
+    case 0x82:
+    case 0x83:
+        {
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag ? OT_LONG : OT_WORD;
+            
+            modrm = ldub_code(s->pc++);
+            parse_modrm(s, modrm);
+
+            switch(b) {
+            default:
+            case 0x80:
+            case 0x81:
+            case 0x82:
+                insn_get(s, ot);
+                break;
+            case 0x83:
+                insn_get(s, OT_BYTE);
+                break;
+            }
+        }
+        break;
+
+        /**************************/
+        /* inc, dec, and other misc arith */
+    case 0x40 ... 0x47: /* inc Gv */
+        break;
+    case 0x48 ... 0x4f: /* dec Gv */
+        break;
+    case 0xf6: /* GRP3 */
+    case 0xf7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+
+        modrm = ldub_code(s->pc++);
+        op = (modrm >> 3) & 7;
+        parse_modrm(s, modrm);
+
+        switch(op) {
+        case 0: /* test */
+            insn_get(s, ot);
+            break;
+        case 2: /* not */
+            break;
+        case 3: /* neg */
+            break;
+        case 4: /* mul */
+            break;
+        case 5: /* imul */
+            break;
+        case 6: /* div */
+            break;
+        case 7: /* idiv */
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+
+    case 0xfe: /* GRP4 */
+    case 0xff: /* GRP5 */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        if (op >= 2 && b == 0xfe) {
+            goto illegal_op;
+        }
+        pc_tmp = s->pc;
+        parse_modrm(s, modrm);
+
+        switch(op) {
+        case 0: /* inc Ev */
+            break;
+        case 1: /* dec Ev */
+            break;
+        case 2: /* call Ev */
+            /* XXX: optimize and handle MEM exceptions specifically
+               fs movl %eax, regs[0] 
+               movl Ev, %eax 
+               pushl next_eip
+               fs movl %eax, eip
+            */
+            goto unsupported_op;
+        case 3: /* lcall Ev */
+            goto unsupported_op;
+        case 4: /* jmp Ev */
+            /* XXX: optimize and handle MEM exceptions specifically
+               fs movl %eax, regs[0] 
+               movl Ev, %eax 
+               fs movl %eax, eip
+            */
+            goto unsupported_op;
+        case 5: /* ljmp Ev */
+            goto unsupported_op;
+        case 6: /* push Ev */
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+    case 0xa8: /* test eAX, Iv */
+    case 0xa9:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        insn_get(s, ot);
+        break;
+        
+    case 0x98: /* CWDE/CBW */
+        break;
+    case 0x99: /* CDQ/CWD */
+        break;
+    case 0x1af: /* imul Gv, Ev */
+    case 0x69: /* imul Gv, Ev, I */
+    case 0x6b:
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        parse_modrm(s, modrm);
+        if (b == 0x69) {
+            insn_get(s, ot);
+        } else if (b == 0x6b) {
+            insn_get(s, OT_BYTE);
+        } else {
+        }
+        break;
+
+    case 0x84: /* test Ev, Gv */
+    case 0x85: 
+        
+    case 0x1c0:
+    case 0x1c1: /* xadd Ev, Gv */
+
+    case 0x1b0:
+    case 0x1b1: /* cmpxchg Ev, Gv */
+
+    case 0x8f: /* pop Ev */
+
+    case 0x88:
+    case 0x89: /* mov Gv, Ev */
+
+    case 0x8a:
+    case 0x8b: /* mov Ev, Gv */
+
+    case 0x1b6: /* movzbS Gv, Eb */
+    case 0x1b7: /* movzwS Gv, Eb */
+    case 0x1be: /* movsbS Gv, Eb */
+    case 0x1bf: /* movswS Gv, Eb */
+
+    case 0x86:
+    case 0x87: /* xchg Ev, Gv */
+
+    case 0xd0:
+    case 0xd1: /* shift Ev,1 */
+
+    case 0xd2:
+    case 0xd3: /* shift Ev,cl */
+
+    case 0x1a5: /* shld cl */
+    case 0x1ad: /* shrd cl */
+
+    case 0x190 ... 0x19f: /* setcc Gv */
+
+    /* XXX: emulate cmov if not available ? */
+    case 0x140 ... 0x14f: /* cmov Gv, Ev */
+
+    case 0x1a3: /* bt Gv, Ev */
+    case 0x1ab: /* bts */
+    case 0x1b3: /* btr */
+    case 0x1bb: /* btc */
+
+    case 0x1bc: /* bsf */
+    case 0x1bd: /* bsr */
+
+        modrm = ldub_code(s->pc++);
+        parse_modrm(s, modrm);
+        break;
+
+    case 0x1c7: /* cmpxchg8b */
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        parse_modrm(s, modrm);
+        break;
+        
+        /**************************/
+        /* push/pop */
+    case 0x50 ... 0x57: /* push */
+    case 0x58 ... 0x5f: /* pop */
+    case 0x60: /* pusha */
+    case 0x61: /* popa */
+        break;
+
+    case 0x68: /* push Iv */
+    case 0x6a:
+        ot = dflag ? OT_LONG : OT_WORD;
+        if (b == 0x68)
+            insn_get(s, ot);
+        else
+            insn_get(s, OT_BYTE);
+        break;
+    case 0xc8: /* enter */
+        lduw_code(s->pc);
+        s->pc += 2;
+        ldub_code(s->pc++);
+        break;
+    case 0xc9: /* leave */
+        break;
+
+    case 0x06: /* push es */
+    case 0x0e: /* push cs */
+    case 0x16: /* push ss */
+    case 0x1e: /* push ds */
+        /* XXX: optimize:
+         push segs[n].selector
+        */
+        goto unsupported_op;
+    case 0x1a0: /* push fs */
+    case 0x1a8: /* push gs */
+        goto unsupported_op;
+    case 0x07: /* pop es */
+    case 0x17: /* pop ss */
+    case 0x1f: /* pop ds */
+        goto unsupported_op;
+    case 0x1a1: /* pop fs */
+    case 0x1a9: /* pop gs */
+        goto unsupported_op;
+    case 0x8e: /* mov seg, Gv */
+        /* XXX: optimize:
+           fs movl r, regs[]
+           movl segs[].selector, r
+           mov r, Gv
+           fs movl regs[], r
+        */
+        goto unsupported_op;
+    case 0x8c: /* mov Gv, seg */
+        goto unsupported_op;
+    case 0xc4: /* les Gv */
+        op = R_ES;
+        goto do_lxx;
+    case 0xc5: /* lds Gv */
+        op = R_DS;
+        goto do_lxx;
+    case 0x1b2: /* lss Gv */
+        op = R_SS;
+        goto do_lxx;
+    case 0x1b4: /* lfs Gv */
+        op = R_FS;
+        goto do_lxx;
+    case 0x1b5: /* lgs Gv */
+        op = R_GS;
+    do_lxx:
+        goto unsupported_op;
+        /************************/
+        /* floats */
+    case 0xd8 ... 0xdf: 
+#if 1
+        /* currently not stable enough */
+        goto unsupported_op;
+#else
+        if (s->flags & (HF_EM_MASK | HF_TS_MASK))
+            goto unsupported_op;
+#endif
+#if 0
+        /* for testing FPU context switch */
+        {
+            static int count;
+            count = (count + 1) % 3;
+            if (count != 0)
+                goto unsupported_op;
+        }
+#endif
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        if (mod != 3) {
+            /* memory op */
+            parse_modrm(s, modrm);
+            switch(op) {
+            case 0x00 ... 0x07: /* fxxxs */
+            case 0x10 ... 0x17: /* fixxxl */
+            case 0x20 ... 0x27: /* fxxxl */
+            case 0x30 ... 0x37: /* fixxx */
+                break;
+            case 0x08: /* flds */
+            case 0x0a: /* fsts */
+            case 0x0b: /* fstps */
+            case 0x18: /* fildl */
+            case 0x1a: /* fistl */
+            case 0x1b: /* fistpl */
+            case 0x28: /* fldl */
+            case 0x2a: /* fstl */
+            case 0x2b: /* fstpl */
+            case 0x38: /* filds */
+            case 0x3a: /* fists */
+            case 0x3b: /* fistps */
+            case 0x0c: /* fldenv mem */
+            case 0x0d: /* fldcw mem */
+            case 0x0e: /* fnstenv mem */
+            case 0x0f: /* fnstcw mem */
+            case 0x1d: /* fldt mem */
+            case 0x1f: /* fstpt mem */
+            case 0x2c: /* frstor mem */
+            case 0x2e: /* fnsave mem */
+            case 0x2f: /* fnstsw mem */
+            case 0x3c: /* fbld */
+            case 0x3e: /* fbstp */
+            case 0x3d: /* fildll */
+            case 0x3f: /* fistpll */
+                break;
+            default:
+                goto illegal_op;
+            }
+        } else {
+            /* register float ops */
+            switch(op) {
+            case 0x08: /* fld sti */
+            case 0x09: /* fxchg sti */
+                break;
+            case 0x0a: /* grp d9/2 */
+                switch(rm) {
+                case 0: /* fnop */
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x0c: /* grp d9/4 */
+                switch(rm) {
+                case 0: /* fchs */
+                case 1: /* fabs */
+                case 4: /* ftst */
+                case 5: /* fxam */
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x0d: /* grp d9/5 */
+                switch(rm) {
+                case 0:
+                case 1:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x0e: /* grp d9/6 */
+                break;
+            case 0x0f: /* grp d9/7 */
+                break;
+            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+                break;
+            case 0x02: /* fcom */
+                break;
+            case 0x03: /* fcomp */
+                break;
+            case 0x15: /* da/5 */
+                switch(rm) {
+                case 1: /* fucompp */
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x1c:
+                switch(rm) {
+                case 0: /* feni (287 only, just do nop here) */
+                case 1: /* fdisi (287 only, just do nop here) */
+                    goto unsupported_op;
+                case 2: /* fclex */
+                case 3: /* fninit */
+                case 4: /* fsetpm (287 only, just do nop here) */
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x1d: /* fucomi */
+                break;
+            case 0x1e: /* fcomi */
+                break;
+            case 0x28: /* ffree sti */
+                break;
+            case 0x2a: /* fst sti */
+                break;
+            case 0x2b: /* fstp sti */
+                break;
+            case 0x2c: /* fucom st(i) */
+                break;
+            case 0x2d: /* fucomp st(i) */
+                break;
+            case 0x33: /* de/3 */
+                switch(rm) {
+                case 1: /* fcompp */
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x3c: /* df/4 */
+                switch(rm) {
+                case 0:
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x3d: /* fucomip */
+                break;
+            case 0x3e: /* fcomip */
+                break;
+            case 0x10 ... 0x13: /* fcmovxx */
+            case 0x18 ... 0x1b:
+                break;
+            default:
+                goto illegal_op;
+            }
+        }
+        s->tb->cflags |= CF_TB_FP_USED;
+        break;
+
+        /**************************/
+        /* mov */
+    case 0xc6:
+    case 0xc7: /* mov Ev, Iv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        parse_modrm(s, modrm);
+        insn_get(s, ot);
+        break;
+
+    case 0x8d: /* lea */
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        parse_modrm(s, modrm);
+        break;
+        
+    case 0xa0: /* mov EAX, Ov */
+    case 0xa1:
+    case 0xa2: /* mov Ov, EAX */
+    case 0xa3:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (s->aflag)
+            insn_get(s, OT_LONG);
+        else
+            insn_get(s, OT_WORD);
+        break;
+    case 0xd7: /* xlat */
+        break;
+    case 0xb0 ... 0xb7: /* mov R, Ib */
+        insn_get(s, OT_BYTE);
+        break;
+    case 0xb8 ... 0xbf: /* mov R, Iv */
+        ot = dflag ? OT_LONG : OT_WORD;
+        insn_get(s, ot);
+        break;
+
+    case 0x91 ... 0x97: /* xchg R, EAX */
+        break;
+
+        /************************/
+        /* shifts */
+    case 0xc0:
+    case 0xc1: /* shift Ev,imm */
+
+    case 0x1a4: /* shld imm */
+    case 0x1ac: /* shrd imm */
+        modrm = ldub_code(s->pc++);
+        parse_modrm(s, modrm);
+        ldub_code(s->pc++);
+        break;
+        
+        /************************/
+        /* string ops */
+
+    case 0xa4: /* movsS */
+    case 0xa5:
+        break;
+        
+    case 0xaa: /* stosS */
+    case 0xab:
+        break;
+
+    case 0xac: /* lodsS */
+    case 0xad:
+        break;
+
+    case 0xae: /* scasS */
+    case 0xaf:
+        break;
+
+    case 0xa6: /* cmpsS */
+    case 0xa7:
+        break;
+
+    case 0x6c: /* insS */
+    case 0x6d:
+        goto unsupported_op;
+
+    case 0x6e: /* outsS */
+    case 0x6f:
+        goto unsupported_op;
+
+        /************************/
+        /* port I/O */
+    case 0xe4:
+    case 0xe5:
+        goto unsupported_op;
+
+    case 0xe6:
+    case 0xe7:
+        goto unsupported_op;
+
+    case 0xec:
+    case 0xed:
+        goto unsupported_op;
+
+    case 0xee:
+    case 0xef:
+        goto unsupported_op;
+
+        /************************/
+        /* control */
+#if 0
+    case 0xc2: /* ret im */
+        val = ldsw_code(s->pc);
+        s->pc += 2;
+        gen_pop_T0(s);
+        gen_stack_update(s, val + (2 << s->dflag));
+        if (s->dflag == 0)
+            gen_op_andl_T0_ffff();
+        gen_op_jmp_T0();
+        gen_eob(s);
+        break;
+#endif
+
+    case 0xc3: /* ret */
+        gb(s, CPU_SEG);
+        if (!s->dflag)  
+            gb(s, 0x66); /* d16 */
+        gb(s, 0x8f); /* pop addr */
+        gb(s, 0x05);
+        gl(s, CPU_FIELD_OFFSET(eip));
+        if (!s->dflag) {
+            /* reset high bits of EIP */
+            gen_movw_addr_im(s, CPU_FIELD_OFFSET(eip) + 2, 0);
+        }
+        gen_eob(s);
+        goto no_copy;
+    case 0xca: /* lret im */
+    case 0xcb: /* lret */
+    case 0xcf: /* iret */
+    case 0x9a: /* lcall im */
+    case 0xea: /* ljmp im */
+        goto unsupported_op;
+
+    case 0xe8: /* call im */
+        ot = dflag ? OT_LONG : OT_WORD;
+        val = insn_get(s, ot);
+        next_eip = s->pc - s->cs_base;
+        val += next_eip;
+        if (s->dflag) {
+            gb(s, 0x68); /* pushl imm */
+            gl(s, next_eip);
+        } else {
+            gb(s, 0x66); /* pushw imm */
+            gb(s, 0x68);
+            gw(s, next_eip);
+            val &= 0xffff;
+        }
+        gen_jmp(s, val);
+        goto no_copy;
+    case 0xe9: /* jmp */
+        ot = dflag ? OT_LONG : OT_WORD;
+        val = insn_get(s, ot);
+        val += s->pc - s->cs_base;
+        if (s->dflag == 0)
+            val = val & 0xffff;
+        gen_jmp(s, val);
+        goto no_copy;
+    case 0xeb: /* jmp Jb */
+        val = (int8_t)insn_get(s, OT_BYTE);
+        val += s->pc - s->cs_base;
+        if (s->dflag == 0)
+            val = val & 0xffff;
+        gen_jmp(s, val);
+        goto no_copy;
+    case 0x70 ... 0x7f: /* jcc Jb */
+        val = (int8_t)insn_get(s, OT_BYTE);
+        goto do_jcc;
+    case 0x180 ... 0x18f: /* jcc Jv */
+        if (dflag) {
+            val = insn_get(s, OT_LONG);
+        } else {
+            val = (int16_t)insn_get(s, OT_WORD); 
+        }
+    do_jcc:
+        next_eip = s->pc - s->cs_base;
+        val += next_eip;
+        if (s->dflag == 0)
+            val &= 0xffff;
+        gen_jcc(s, b & 0xf, val, next_eip);
+        goto no_copy;
+
+        /************************/
+        /* flags */
+    case 0x9c: /* pushf */
+        /* XXX: put specific code ? */
+        goto unsupported_op;
+    case 0x9d: /* popf */
+        goto unsupported_op;
+
+    case 0x9e: /* sahf */
+    case 0x9f: /* lahf */
+    case 0xf5: /* cmc */
+    case 0xf8: /* clc */
+    case 0xf9: /* stc */
+    case 0xfc: /* cld */
+    case 0xfd: /* std */
+        break;
+
+        /************************/
+        /* bit operations */
+    case 0x1ba: /* bt/bts/btr/btc Gv, im */
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        op = (modrm >> 3) & 7;
+        parse_modrm(s, modrm);
+        /* load shift */
+        ldub_code(s->pc++);
+        if (op < 4)
+            goto illegal_op;
+        break;
+        /************************/
+        /* bcd */
+    case 0x27: /* daa */
+        break;
+    case 0x2f: /* das */
+        break;
+    case 0x37: /* aaa */
+        break;
+    case 0x3f: /* aas */
+        break;
+    case 0xd4: /* aam */
+        ldub_code(s->pc++);
+        break;
+    case 0xd5: /* aad */
+        ldub_code(s->pc++);
+        break;
+        /************************/
+        /* misc */
+    case 0x90: /* nop */
+        break;
+    case 0x9b: /* fwait */
+        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == 
+            (HF_MP_MASK | HF_TS_MASK)) {
+            goto unsupported_op;
+        }
+        break;
+    case 0xcc: /* int3 */
+        goto unsupported_op;
+    case 0xcd: /* int N */
+        goto unsupported_op;
+    case 0xce: /* into */
+        goto unsupported_op;
+    case 0xf1: /* icebp (undocumented, exits to external debugger) */
+        goto unsupported_op;
+    case 0xfa: /* cli */
+        goto unsupported_op;
+    case 0xfb: /* sti */
+        goto unsupported_op;
+    case 0x62: /* bound */
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        parse_modrm(s, modrm);
+        break;
+    case 0x1c8 ... 0x1cf: /* bswap reg */
+        break;
+    case 0xd6: /* salc */
+        break;
+    case 0xe0: /* loopnz */
+    case 0xe1: /* loopz */
+    case 0xe2: /* loop */
+    case 0xe3: /* jecxz */
+        goto unsupported_op;
+
+    case 0x130: /* wrmsr */
+    case 0x132: /* rdmsr */
+        goto unsupported_op;
+    case 0x131: /* rdtsc */
+        goto unsupported_op;
+    case 0x1a2: /* cpuid */
+        goto unsupported_op;
+    case 0xf4: /* hlt */
+        goto unsupported_op;
+    case 0x100:
+        goto unsupported_op;
+    case 0x101:
+        goto unsupported_op;
+    case 0x108: /* invd */
+    case 0x109: /* wbinvd */
+        goto unsupported_op;
+    case 0x63: /* arpl */
+        goto unsupported_op;
+    case 0x102: /* lar */
+    case 0x103: /* lsl */
+        goto unsupported_op;
+    case 0x118:
+        goto unsupported_op;
+    case 0x120: /* mov reg, crN */
+    case 0x122: /* mov crN, reg */
+        goto unsupported_op;
+    case 0x121: /* mov reg, drN */
+    case 0x123: /* mov drN, reg */
+        goto unsupported_op;
+    case 0x106: /* clts */
+        goto unsupported_op;
+    default:
+        goto illegal_op;
+    }
+
+    /* just copy the code */
+
+    /* no override yet */
+    if (!s->dflag)
+        gb(s, 0x66);
+    if (!s->aflag)
+        gb(s, 0x67);
+    if (prefixes & PREFIX_REPZ)
+        gb(s, 0xf3);
+    else if (prefixes & PREFIX_REPNZ)
+        gb(s, 0xf2);
+    {
+        int len, i;
+        len = s->pc - pc_start_insn;
+        for(i = 0; i < len; i++) {
+            *s->gen_code_ptr++ = ldub_code(pc_start_insn + i);
+        }
+    }
+ no_copy:
+    return 0;
+ illegal_op:
+ unsupported_op:
+    /* fall back to slower code gen necessary */
+    s->pc = pc_start;
+    return -1;
+}
+
+#define GEN_CODE_MAX_SIZE      8192
+#define GEN_CODE_MAX_INSN_SIZE 512
+
+static inline int gen_intermediate_code_internal(CPUState *env,
+                                                 TranslationBlock *tb, 
+                                                 uint8_t *gen_code_ptr,
+                                                 int *gen_code_size_ptr,
+                                                 int search_pc,
+                                                 uint8_t *tc_ptr)
+{
+    DisasContext dc1, *dc = &dc1;
+    target_ulong pc_insn, pc_start, cs_base;
+    uint8_t *gen_code_end;
+    int flags, ret;
+
+    if (env->nb_breakpoints > 0 ||
+        env->singlestep_enabled)
+        return -1;
+    flags = tb->flags;
+    if (flags & (HF_TF_MASK | HF_ADDSEG_MASK | 
+                 HF_SOFTMMU_MASK | HF_INHIBIT_IRQ_MASK))
+        return -1;
+    if (!(flags & HF_SS32_MASK))
+        return -1;
+    if (tb->cflags & CF_SINGLE_INSN)
+        return -1;
+    gen_code_end = gen_code_ptr + 
+        GEN_CODE_MAX_SIZE - GEN_CODE_MAX_INSN_SIZE;
+    dc->gen_code_ptr = gen_code_ptr;
+    dc->gen_code_start = gen_code_ptr;
+
+    /* generate intermediate code */
+    pc_start = tb->pc;
+    cs_base = tb->cs_base;
+    dc->pc = pc_start;
+    dc->cs_base = cs_base;
+    dc->pe = (flags >> HF_PE_SHIFT) & 1;
+    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
+    dc->f_st = 0;
+    dc->vm86 = (flags >> VM_SHIFT) & 1;
+    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
+    dc->iopl = (flags >> IOPL_SHIFT) & 3;
+    dc->tb = tb;
+    dc->flags = flags;
+
+    dc->is_jmp = 0;
+
+    for(;;) {
+        pc_insn = dc->pc;
+        ret = disas_insn(dc);
+        if (ret < 0) {
+            /* unsupported insn */
+            if (dc->pc == pc_start) {
+                /* if first instruction, signal that no copying was done */
+                return -1;
+            } else {
+                gen_jmp(dc, dc->pc - dc->cs_base);
+                dc->is_jmp = 1;
+            }
+        }
+        if (search_pc) {
+            /* search pc mode */
+            if (tc_ptr < dc->gen_code_ptr) {
+                env->eip = pc_insn - cs_base;
+                return 0;
+            }
+        }
+        /* stop translation if indicated */
+        if (dc->is_jmp)
+            break;
+        /* if too long translation, stop generation */
+        if (dc->gen_code_ptr >= gen_code_end ||
+            (dc->pc - pc_start) >= (TARGET_PAGE_SIZE - 32)) {
+            gen_jmp(dc, dc->pc - dc->cs_base);
+            break;
+        }
+    }
+    
+#ifdef DEBUG_DISAS
+    if (loglevel & CPU_LOG_TB_IN_ASM) {
+        fprintf(logfile, "----------------\n");
+        fprintf(logfile, "IN: COPY: %s fpu=%d\n", 
+                lookup_symbol(pc_start),
+                tb->cflags & CF_TB_FP_USED ? 1 : 0);
+	target_disas(logfile, pc_start, dc->pc - pc_start, !dc->code32);
+        fprintf(logfile, "\n");
+    }
+#endif
+
+    if (!search_pc) {
+        *gen_code_size_ptr = dc->gen_code_ptr - dc->gen_code_start;
+        tb->size = dc->pc - pc_start;
+        tb->cflags |= CF_CODE_COPY;
+        return 0;
+    } else {
+        return -1;
+    }
+}
+
+/* generate code by just copying data. Return -1 if cannot generate
+   any code. Return 0 if code was generated */
+int cpu_gen_code_copy(CPUState *env, TranslationBlock *tb,
+                      int max_code_size, int *gen_code_size_ptr)
+{
+    /* generate machine code */
+    tb->tb_next_offset[0] = 0xffff;
+    tb->tb_next_offset[1] = 0xffff;
+#ifdef USE_DIRECT_JUMP
+    /* the following two entries are optional (only used for string ops) */
+    tb->tb_jmp_offset[2] = 0xffff;
+    tb->tb_jmp_offset[3] = 0xffff;
+#endif
+    return gen_intermediate_code_internal(env, tb, 
+                                          tb->tc_ptr, gen_code_size_ptr,
+                                          0, NULL);
+}
+
+static uint8_t dummy_gen_code_buf[GEN_CODE_MAX_SIZE];
+
+int cpu_restore_state_copy(TranslationBlock *tb, 
+                           CPUState *env, unsigned long searched_pc,
+                           void *puc)
+{
+    struct ucontext *uc = puc;
+    int ret, eflags;
+
+    /* find opc index corresponding to search_pc */
+    if (searched_pc < (unsigned long)tb->tc_ptr)
+        return -1;
+    searched_pc = searched_pc - (long)tb->tc_ptr + (long)dummy_gen_code_buf;
+    ret = gen_intermediate_code_internal(env, tb, 
+                                         dummy_gen_code_buf, NULL,
+                                         1, (uint8_t *)searched_pc);
+    if (ret < 0)
+        return ret;
+    /* restore all the CPU state from the CPU context from the
+       signal. The FPU context stays in the host CPU. */
+    
+    env->regs[R_EAX] = uc->uc_mcontext.gregs[REG_EAX];
+    env->regs[R_ECX] = uc->uc_mcontext.gregs[REG_ECX];
+    env->regs[R_EDX] = uc->uc_mcontext.gregs[REG_EDX];
+    env->regs[R_EBX] = uc->uc_mcontext.gregs[REG_EBX];
+    env->regs[R_ESP] = uc->uc_mcontext.gregs[REG_ESP];
+    env->regs[R_EBP] = uc->uc_mcontext.gregs[REG_EBP];
+    env->regs[R_ESI] = uc->uc_mcontext.gregs[REG_ESI];
+    env->regs[R_EDI] = uc->uc_mcontext.gregs[REG_EDI];
+    eflags = uc->uc_mcontext.gregs[REG_EFL];
+    env->df = 1 - (2 * ((eflags >> 10) & 1));
+    env->cc_src = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    env->cc_op = CC_OP_EFLAGS;
+    return 0;
+}
+
+#endif /* USE_CODE_COPY */
Index: /trunk/src/recompiler_new/target-i386/translate.c
===================================================================
--- /trunk/src/recompiler_new/target-i386/translate.c	(revision 13168)
+++ /trunk/src/recompiler_new/target-i386/translate.c	(revision 13168)
@@ -0,0 +1,6944 @@
+/*
+ *  i386 translation
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#ifndef VBOX
+#include <signal.h>
+#include <assert.h>
+#endif /* !VBOX */
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "disas.h"
+
+/* XXX: move that elsewhere */
+static uint16_t *gen_opc_ptr;
+static uint32_t *gen_opparam_ptr;
+
+#define PREFIX_REPZ   0x01
+#define PREFIX_REPNZ  0x02
+#define PREFIX_LOCK   0x04
+#define PREFIX_DATA   0x08
+#define PREFIX_ADR    0x10
+
+#ifdef TARGET_X86_64
+#define X86_64_ONLY(x) x
+#define X86_64_DEF(x...) x
+#define CODE64(s) ((s)->code64)
+#define REX_X(s) ((s)->rex_x)
+#define REX_B(s) ((s)->rex_b)
+/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
+#if 1
+#define BUGGY_64(x) NULL
+#endif
+#else
+#define X86_64_ONLY(x) NULL
+#define X86_64_DEF(x...)
+#define CODE64(s) 0
+#define REX_X(s) 0
+#define REX_B(s) 0
+#endif
+
+#ifdef TARGET_X86_64
+static int x86_64_hregs;
+#endif
+
+#ifdef USE_DIRECT_JUMP
+#define TBPARAM(x)
+#else
+#define TBPARAM(x) (long)(x)
+#endif
+
+#ifdef VBOX
+/* Special/override code readers to hide patched code. */
+
+uint8_t ldub_code_raw(target_ulong pc)
+{
+    uint8_t b;
+
+    if (!remR3GetOpcode(cpu_single_env, pc, &b))
+        b = ldub_code(pc);
+    return b;
+}
+#define ldub_code(a) ldub_code_raw(a)
+
+uint16_t lduw_code_raw(target_ulong pc)
+{
+    return (ldub_code(pc+1) << 8) | ldub_code(pc);
+}
+#define lduw_code(a) lduw_code_raw(a)
+
+
+uint32_t ldl_code_raw(target_ulong pc)
+{
+    return (ldub_code(pc+3) << 24) | (ldub_code(pc+2) << 16) | (ldub_code(pc+1) << 8) | ldub_code(pc);
+}
+#define ldl_code(a) ldl_code_raw(a)
+
+#endif /* VBOX */
+
+
+typedef struct DisasContext {
+    /* current insn context */
+    int override; /* -1 if no override */
+    int prefix;
+    int aflag, dflag;
+    target_ulong pc; /* pc = eip + cs_base */
+    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
+                   static state change (stop translation) */
+    /* current block context */
+    target_ulong cs_base; /* base of CS segment */
+    int pe;     /* protected mode */
+    int code32; /* 32 bit code segment */
+#ifdef TARGET_X86_64
+    int lma;    /* long mode active */
+    int code64; /* 64 bit code segment */
+    int rex_x, rex_b;
+#endif
+    int ss32;   /* 32 bit stack segment */
+    int cc_op;  /* current CC operation */
+    int addseg; /* non zero if either DS/ES/SS have a non zero base */
+    int f_st;   /* currently unused */
+    int vm86;   /* vm86 mode */
+#ifdef VBOX
+    int vme;    /* CR4.VME */
+    int record_call;    /* record calls for CSAM or not? */
+#endif
+    int cpl;
+    int iopl;
+    int tf;     /* TF cpu flag */
+    int singlestep_enabled; /* "hardware" single step enabled */
+    int jmp_opt; /* use direct block chaining for direct jumps */
+    int mem_index; /* select memory access functions */
+    int flags; /* all execution flags */
+    struct TranslationBlock *tb;
+    int popl_esp_hack; /* for correct popl with esp base handling */
+    int rip_offset; /* only used in x86_64, but left for simplicity */
+    int cpuid_features;
+    int cpuid_ext_features;
+    int cpuid_ext2_features;
+    int cpuid_ext3_features;
+} DisasContext;
+
+static void gen_eob(DisasContext *s);
+static void gen_jmp(DisasContext *s, target_ulong eip);
+static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
+
+/* i386 arith/logic operations */
+enum {
+    OP_ADDL,
+    OP_ORL,
+    OP_ADCL,
+    OP_SBBL,
+    OP_ANDL,
+    OP_SUBL,
+    OP_XORL,
+    OP_CMPL,
+};
+
+/* i386 shift ops */
+enum {
+    OP_ROL,
+    OP_ROR,
+    OP_RCL,
+    OP_RCR,
+    OP_SHL,
+    OP_SHR,
+    OP_SHL1, /* undocumented */
+    OP_SAR = 7,
+};
+
+enum {
+#define DEF(s, n, copy_size) INDEX_op_ ## s,
+#include "opc.h"
+#undef DEF
+    NB_OPS,
+};
+
+#include "gen-op.h"
+
+/* operand size */
+enum {
+    OT_BYTE = 0,
+    OT_WORD,
+    OT_LONG,
+    OT_QUAD,
+};
+
+enum {
+    /* I386 int registers */
+    OR_EAX,   /* MUST be even numbered */
+    OR_ECX,
+    OR_EDX,
+    OR_EBX,
+    OR_ESP,
+    OR_EBP,
+    OR_ESI,
+    OR_EDI,
+
+    OR_TMP0 = 16,    /* temporary operand register */
+    OR_TMP1,
+    OR_A0, /* temporary register used when doing address evaluation */
+};
+
+#ifdef TARGET_X86_64
+
+#define NB_OP_SIZES 4
+
+#define DEF_REGS(prefix, suffix) \
+  prefix ## EAX ## suffix,\
+  prefix ## ECX ## suffix,\
+  prefix ## EDX ## suffix,\
+  prefix ## EBX ## suffix,\
+  prefix ## ESP ## suffix,\
+  prefix ## EBP ## suffix,\
+  prefix ## ESI ## suffix,\
+  prefix ## EDI ## suffix,\
+  prefix ## R8 ## suffix,\
+  prefix ## R9 ## suffix,\
+  prefix ## R10 ## suffix,\
+  prefix ## R11 ## suffix,\
+  prefix ## R12 ## suffix,\
+  prefix ## R13 ## suffix,\
+  prefix ## R14 ## suffix,\
+  prefix ## R15 ## suffix,
+
+#define DEF_BREGS(prefixb, prefixh, suffix)             \
+                                                        \
+static void prefixb ## ESP ## suffix ## _wrapper(void)  \
+{                                                       \
+    if (x86_64_hregs)                                 \
+        prefixb ## ESP ## suffix ();                    \
+    else                                                \
+        prefixh ## EAX ## suffix ();                    \
+}                                                       \
+                                                        \
+static void prefixb ## EBP ## suffix ## _wrapper(void)  \
+{                                                       \
+    if (x86_64_hregs)                                 \
+        prefixb ## EBP ## suffix ();                    \
+    else                                                \
+        prefixh ## ECX ## suffix ();                    \
+}                                                       \
+                                                        \
+static void prefixb ## ESI ## suffix ## _wrapper(void)  \
+{                                                       \
+    if (x86_64_hregs)                                 \
+        prefixb ## ESI ## suffix ();                    \
+    else                                                \
+        prefixh ## EDX ## suffix ();                    \
+}                                                       \
+                                                        \
+static void prefixb ## EDI ## suffix ## _wrapper(void)  \
+{                                                       \
+    if (x86_64_hregs)                                 \
+        prefixb ## EDI ## suffix ();                    \
+    else                                                \
+        prefixh ## EBX ## suffix ();                    \
+}
+
+DEF_BREGS(gen_op_movb_, gen_op_movh_, _T0)
+DEF_BREGS(gen_op_movb_, gen_op_movh_, _T1)
+DEF_BREGS(gen_op_movl_T0_, gen_op_movh_T0_, )
+DEF_BREGS(gen_op_movl_T1_, gen_op_movh_T1_, )
+
+#else /* !TARGET_X86_64 */
+
+#define NB_OP_SIZES 3
+
+#define DEF_REGS(prefix, suffix) \
+  prefix ## EAX ## suffix,\
+  prefix ## ECX ## suffix,\
+  prefix ## EDX ## suffix,\
+  prefix ## EBX ## suffix,\
+  prefix ## ESP ## suffix,\
+  prefix ## EBP ## suffix,\
+  prefix ## ESI ## suffix,\
+  prefix ## EDI ## suffix,
+
+#endif /* !TARGET_X86_64 */
+
+static GenOpFunc *gen_op_mov_reg_T0[NB_OP_SIZES][CPU_NB_REGS] = {
+    [OT_BYTE] = {
+        gen_op_movb_EAX_T0,
+        gen_op_movb_ECX_T0,
+        gen_op_movb_EDX_T0,
+        gen_op_movb_EBX_T0,
+#ifdef TARGET_X86_64
+        gen_op_movb_ESP_T0_wrapper,
+        gen_op_movb_EBP_T0_wrapper,
+        gen_op_movb_ESI_T0_wrapper,
+        gen_op_movb_EDI_T0_wrapper,
+        gen_op_movb_R8_T0,
+        gen_op_movb_R9_T0,
+        gen_op_movb_R10_T0,
+        gen_op_movb_R11_T0,
+        gen_op_movb_R12_T0,
+        gen_op_movb_R13_T0,
+        gen_op_movb_R14_T0,
+        gen_op_movb_R15_T0,
+#else
+        gen_op_movh_EAX_T0,
+        gen_op_movh_ECX_T0,
+        gen_op_movh_EDX_T0,
+        gen_op_movh_EBX_T0,
+#endif
+    },
+    [OT_WORD] = {
+        DEF_REGS(gen_op_movw_, _T0)
+    },
+    [OT_LONG] = {
+        DEF_REGS(gen_op_movl_, _T0)
+    },
+#ifdef TARGET_X86_64
+    [OT_QUAD] = {
+        DEF_REGS(gen_op_movq_, _T0)
+    },
+#endif
+};
+
+static GenOpFunc *gen_op_mov_reg_T1[NB_OP_SIZES][CPU_NB_REGS] = {
+    [OT_BYTE] = {
+        gen_op_movb_EAX_T1,
+        gen_op_movb_ECX_T1,
+        gen_op_movb_EDX_T1,
+        gen_op_movb_EBX_T1,
+#ifdef TARGET_X86_64
+        gen_op_movb_ESP_T1_wrapper,
+        gen_op_movb_EBP_T1_wrapper,
+        gen_op_movb_ESI_T1_wrapper,
+        gen_op_movb_EDI_T1_wrapper,
+        gen_op_movb_R8_T1,
+        gen_op_movb_R9_T1,
+        gen_op_movb_R10_T1,
+        gen_op_movb_R11_T1,
+        gen_op_movb_R12_T1,
+        gen_op_movb_R13_T1,
+        gen_op_movb_R14_T1,
+        gen_op_movb_R15_T1,
+#else
+        gen_op_movh_EAX_T1,
+        gen_op_movh_ECX_T1,
+        gen_op_movh_EDX_T1,
+        gen_op_movh_EBX_T1,
+#endif
+    },
+    [OT_WORD] = {
+        DEF_REGS(gen_op_movw_, _T1)
+    },
+    [OT_LONG] = {
+        DEF_REGS(gen_op_movl_, _T1)
+    },
+#ifdef TARGET_X86_64
+    [OT_QUAD] = {
+        DEF_REGS(gen_op_movq_, _T1)
+    },
+#endif
+};
+
+static GenOpFunc *gen_op_mov_reg_A0[NB_OP_SIZES - 1][CPU_NB_REGS] = {
+    [0] = {
+        DEF_REGS(gen_op_movw_, _A0)
+    },
+    [1] = {
+        DEF_REGS(gen_op_movl_, _A0)
+    },
+#ifdef TARGET_X86_64
+    [2] = {
+        DEF_REGS(gen_op_movq_, _A0)
+    },
+#endif
+};
+
+static GenOpFunc *gen_op_mov_TN_reg[NB_OP_SIZES][2][CPU_NB_REGS] =
+{
+    [OT_BYTE] = {
+        {
+            gen_op_movl_T0_EAX,
+            gen_op_movl_T0_ECX,
+            gen_op_movl_T0_EDX,
+            gen_op_movl_T0_EBX,
+#ifdef TARGET_X86_64
+            gen_op_movl_T0_ESP_wrapper,
+            gen_op_movl_T0_EBP_wrapper,
+            gen_op_movl_T0_ESI_wrapper,
+            gen_op_movl_T0_EDI_wrapper,
+            gen_op_movl_T0_R8,
+            gen_op_movl_T0_R9,
+            gen_op_movl_T0_R10,
+            gen_op_movl_T0_R11,
+            gen_op_movl_T0_R12,
+            gen_op_movl_T0_R13,
+            gen_op_movl_T0_R14,
+            gen_op_movl_T0_R15,
+#else
+            gen_op_movh_T0_EAX,
+            gen_op_movh_T0_ECX,
+            gen_op_movh_T0_EDX,
+            gen_op_movh_T0_EBX,
+#endif
+        },
+        {
+            gen_op_movl_T1_EAX,
+            gen_op_movl_T1_ECX,
+            gen_op_movl_T1_EDX,
+            gen_op_movl_T1_EBX,
+#ifdef TARGET_X86_64
+            gen_op_movl_T1_ESP_wrapper,
+            gen_op_movl_T1_EBP_wrapper,
+            gen_op_movl_T1_ESI_wrapper,
+            gen_op_movl_T1_EDI_wrapper,
+            gen_op_movl_T1_R8,
+            gen_op_movl_T1_R9,
+            gen_op_movl_T1_R10,
+            gen_op_movl_T1_R11,
+            gen_op_movl_T1_R12,
+            gen_op_movl_T1_R13,
+            gen_op_movl_T1_R14,
+            gen_op_movl_T1_R15,
+#else
+            gen_op_movh_T1_EAX,
+            gen_op_movh_T1_ECX,
+            gen_op_movh_T1_EDX,
+            gen_op_movh_T1_EBX,
+#endif
+        },
+    },
+    [OT_WORD] = {
+        {
+            DEF_REGS(gen_op_movl_T0_, )
+        },
+        {
+            DEF_REGS(gen_op_movl_T1_, )
+        },
+    },
+    [OT_LONG] = {
+        {
+            DEF_REGS(gen_op_movl_T0_, )
+        },
+        {
+            DEF_REGS(gen_op_movl_T1_, )
+        },
+    },
+#ifdef TARGET_X86_64
+    [OT_QUAD] = {
+        {
+            DEF_REGS(gen_op_movl_T0_, )
+        },
+        {
+            DEF_REGS(gen_op_movl_T1_, )
+        },
+    },
+#endif
+};
+
+static GenOpFunc *gen_op_movl_A0_reg[CPU_NB_REGS] = {
+    DEF_REGS(gen_op_movl_A0_, )
+};
+
+static GenOpFunc *gen_op_addl_A0_reg_sN[4][CPU_NB_REGS] = {
+    [0] = {
+        DEF_REGS(gen_op_addl_A0_, )
+    },
+    [1] = {
+        DEF_REGS(gen_op_addl_A0_, _s1)
+    },
+    [2] = {
+        DEF_REGS(gen_op_addl_A0_, _s2)
+    },
+    [3] = {
+        DEF_REGS(gen_op_addl_A0_, _s3)
+    },
+};
+
+#ifdef TARGET_X86_64
+static GenOpFunc *gen_op_movq_A0_reg[CPU_NB_REGS] = {
+    DEF_REGS(gen_op_movq_A0_, )
+};
+
+static GenOpFunc *gen_op_addq_A0_reg_sN[4][CPU_NB_REGS] = {
+    [0] = {
+        DEF_REGS(gen_op_addq_A0_, )
+    },
+    [1] = {
+        DEF_REGS(gen_op_addq_A0_, _s1)
+    },
+    [2] = {
+        DEF_REGS(gen_op_addq_A0_, _s2)
+    },
+    [3] = {
+        DEF_REGS(gen_op_addq_A0_, _s3)
+    },
+};
+#endif
+
+static GenOpFunc *gen_op_cmov_reg_T1_T0[NB_OP_SIZES - 1][CPU_NB_REGS] = {
+    [0] = {
+        DEF_REGS(gen_op_cmovw_, _T1_T0)
+    },
+    [1] = {
+        DEF_REGS(gen_op_cmovl_, _T1_T0)
+    },
+#ifdef TARGET_X86_64
+    [2] = {
+        DEF_REGS(gen_op_cmovq_, _T1_T0)
+    },
+#endif
+};
+
+static GenOpFunc *gen_op_arith_T0_T1_cc[8] = {
+    NULL,
+    gen_op_orl_T0_T1,
+    NULL,
+    NULL,
+    gen_op_andl_T0_T1,
+    NULL,
+    gen_op_xorl_T0_T1,
+    NULL,
+};
+
+#define DEF_ARITHC(SUFFIX)\
+    {\
+        gen_op_adcb ## SUFFIX ## _T0_T1_cc,\
+        gen_op_sbbb ## SUFFIX ## _T0_T1_cc,\
+    },\
+    {\
+        gen_op_adcw ## SUFFIX ## _T0_T1_cc,\
+        gen_op_sbbw ## SUFFIX ## _T0_T1_cc,\
+    },\
+    {\
+        gen_op_adcl ## SUFFIX ## _T0_T1_cc,\
+        gen_op_sbbl ## SUFFIX ## _T0_T1_cc,\
+    },\
+    {\
+        X86_64_ONLY(gen_op_adcq ## SUFFIX ## _T0_T1_cc),\
+        X86_64_ONLY(gen_op_sbbq ## SUFFIX ## _T0_T1_cc),\
+    },
+
+static GenOpFunc *gen_op_arithc_T0_T1_cc[4][2] = {
+    DEF_ARITHC( )
+};
+
+static GenOpFunc *gen_op_arithc_mem_T0_T1_cc[3 * 4][2] = {
+    DEF_ARITHC(_raw)
+#ifndef CONFIG_USER_ONLY
+    DEF_ARITHC(_kernel)
+    DEF_ARITHC(_user)
+#endif
+};
+
+static const int cc_op_arithb[8] = {
+    CC_OP_ADDB,
+    CC_OP_LOGICB,
+    CC_OP_ADDB,
+    CC_OP_SUBB,
+    CC_OP_LOGICB,
+    CC_OP_SUBB,
+    CC_OP_LOGICB,
+    CC_OP_SUBB,
+};
+
+#define DEF_CMPXCHG(SUFFIX)\
+    gen_op_cmpxchgb ## SUFFIX ## _T0_T1_EAX_cc,\
+    gen_op_cmpxchgw ## SUFFIX ## _T0_T1_EAX_cc,\
+    gen_op_cmpxchgl ## SUFFIX ## _T0_T1_EAX_cc,\
+    X86_64_ONLY(gen_op_cmpxchgq ## SUFFIX ## _T0_T1_EAX_cc),
+
+static GenOpFunc *gen_op_cmpxchg_T0_T1_EAX_cc[4] = {
+    DEF_CMPXCHG( )
+};
+
+static GenOpFunc *gen_op_cmpxchg_mem_T0_T1_EAX_cc[3 * 4] = {
+    DEF_CMPXCHG(_raw)
+#ifndef CONFIG_USER_ONLY
+    DEF_CMPXCHG(_kernel)
+    DEF_CMPXCHG(_user)
+#endif
+};
+
+#define DEF_SHIFT(SUFFIX)\
+    {\
+        gen_op_rolb ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rorb ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rclb ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rcrb ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shlb ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shrb ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shlb ## SUFFIX ## _T0_T1_cc,\
+        gen_op_sarb ## SUFFIX ## _T0_T1_cc,\
+    },\
+    {\
+        gen_op_rolw ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rorw ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rclw ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rcrw ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shlw ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shrw ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shlw ## SUFFIX ## _T0_T1_cc,\
+        gen_op_sarw ## SUFFIX ## _T0_T1_cc,\
+    },\
+    {\
+        gen_op_roll ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rorl ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rcll ## SUFFIX ## _T0_T1_cc,\
+        gen_op_rcrl ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shll ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shrl ## SUFFIX ## _T0_T1_cc,\
+        gen_op_shll ## SUFFIX ## _T0_T1_cc,\
+        gen_op_sarl ## SUFFIX ## _T0_T1_cc,\
+    },\
+    {\
+        X86_64_ONLY(gen_op_rolq ## SUFFIX ## _T0_T1_cc),\
+        X86_64_ONLY(gen_op_rorq ## SUFFIX ## _T0_T1_cc),\
+        X86_64_ONLY(gen_op_rclq ## SUFFIX ## _T0_T1_cc),\
+        X86_64_ONLY(gen_op_rcrq ## SUFFIX ## _T0_T1_cc),\
+        X86_64_ONLY(gen_op_shlq ## SUFFIX ## _T0_T1_cc),\
+        X86_64_ONLY(gen_op_shrq ## SUFFIX ## _T0_T1_cc),\
+        X86_64_ONLY(gen_op_shlq ## SUFFIX ## _T0_T1_cc),\
+        X86_64_ONLY(gen_op_sarq ## SUFFIX ## _T0_T1_cc),\
+    },
+
+static GenOpFunc *gen_op_shift_T0_T1_cc[4][8] = {
+    DEF_SHIFT( )
+};
+
+static GenOpFunc *gen_op_shift_mem_T0_T1_cc[3 * 4][8] = {
+    DEF_SHIFT(_raw)
+#ifndef CONFIG_USER_ONLY
+    DEF_SHIFT(_kernel)
+    DEF_SHIFT(_user)
+#endif
+};
+
+#define DEF_SHIFTD(SUFFIX, op)\
+    {\
+        NULL,\
+        NULL,\
+    },\
+    {\
+        gen_op_shldw ## SUFFIX ## _T0_T1_ ## op ## _cc,\
+        gen_op_shrdw ## SUFFIX ## _T0_T1_ ## op ## _cc,\
+     },\
+    {\
+        gen_op_shldl ## SUFFIX ## _T0_T1_ ## op ## _cc,\
+        gen_op_shrdl ## SUFFIX ## _T0_T1_ ## op ## _cc,\
+    },\
+    {\
+X86_64_DEF(gen_op_shldq ## SUFFIX ## _T0_T1_ ## op ## _cc,\
+           gen_op_shrdq ## SUFFIX ## _T0_T1_ ## op ## _cc,)\
+    },
+
+static GenOpFunc1 *gen_op_shiftd_T0_T1_im_cc[4][2] = {
+    DEF_SHIFTD(, im)
+};
+
+static GenOpFunc *gen_op_shiftd_T0_T1_ECX_cc[4][2] = {
+    DEF_SHIFTD(, ECX)
+};
+
+static GenOpFunc1 *gen_op_shiftd_mem_T0_T1_im_cc[3 * 4][2] = {
+    DEF_SHIFTD(_raw, im)
+#ifndef CONFIG_USER_ONLY
+    DEF_SHIFTD(_kernel, im)
+    DEF_SHIFTD(_user, im)
+#endif
+};
+
+static GenOpFunc *gen_op_shiftd_mem_T0_T1_ECX_cc[3 * 4][2] = {
+    DEF_SHIFTD(_raw, ECX)
+#ifndef CONFIG_USER_ONLY
+    DEF_SHIFTD(_kernel, ECX)
+    DEF_SHIFTD(_user, ECX)
+#endif
+};
+
+static GenOpFunc *gen_op_btx_T0_T1_cc[3][4] = {
+    [0] = {
+        gen_op_btw_T0_T1_cc,
+        gen_op_btsw_T0_T1_cc,
+        gen_op_btrw_T0_T1_cc,
+        gen_op_btcw_T0_T1_cc,
+    },
+    [1] = {
+        gen_op_btl_T0_T1_cc,
+        gen_op_btsl_T0_T1_cc,
+        gen_op_btrl_T0_T1_cc,
+        gen_op_btcl_T0_T1_cc,
+    },
+#ifdef TARGET_X86_64
+    [2] = {
+        gen_op_btq_T0_T1_cc,
+        gen_op_btsq_T0_T1_cc,
+        gen_op_btrq_T0_T1_cc,
+        gen_op_btcq_T0_T1_cc,
+    },
+#endif
+};
+
+static GenOpFunc *gen_op_add_bit_A0_T1[3] = {
+    gen_op_add_bitw_A0_T1,
+    gen_op_add_bitl_A0_T1,
+    X86_64_ONLY(gen_op_add_bitq_A0_T1),
+};
+
+static GenOpFunc *gen_op_bsx_T0_cc[3][2] = {
+    [0] = {
+        gen_op_bsfw_T0_cc,
+        gen_op_bsrw_T0_cc,
+    },
+    [1] = {
+        gen_op_bsfl_T0_cc,
+        gen_op_bsrl_T0_cc,
+    },
+#ifdef TARGET_X86_64
+    [2] = {
+        gen_op_bsfq_T0_cc,
+        gen_op_bsrq_T0_cc,
+    },
+#endif
+};
+
+static GenOpFunc *gen_op_lds_T0_A0[3 * 4] = {
+    gen_op_ldsb_raw_T0_A0,
+    gen_op_ldsw_raw_T0_A0,
+    X86_64_ONLY(gen_op_ldsl_raw_T0_A0),
+    NULL,
+#ifndef CONFIG_USER_ONLY
+    gen_op_ldsb_kernel_T0_A0,
+    gen_op_ldsw_kernel_T0_A0,
+    X86_64_ONLY(gen_op_ldsl_kernel_T0_A0),
+    NULL,
+
+    gen_op_ldsb_user_T0_A0,
+    gen_op_ldsw_user_T0_A0,
+    X86_64_ONLY(gen_op_ldsl_user_T0_A0),
+    NULL,
+#endif
+};
+
+static GenOpFunc *gen_op_ldu_T0_A0[3 * 4] = {
+    gen_op_ldub_raw_T0_A0,
+    gen_op_lduw_raw_T0_A0,
+    NULL,
+    NULL,
+
+#ifndef CONFIG_USER_ONLY
+    gen_op_ldub_kernel_T0_A0,
+    gen_op_lduw_kernel_T0_A0,
+    NULL,
+    NULL,
+
+    gen_op_ldub_user_T0_A0,
+    gen_op_lduw_user_T0_A0,
+    NULL,
+    NULL,
+#endif
+};
+
+/* sign does not matter, except for lidt/lgdt call (TODO: fix it) */
+static GenOpFunc *gen_op_ld_T0_A0[3 * 4] = {
+    gen_op_ldub_raw_T0_A0,
+    gen_op_lduw_raw_T0_A0,
+    gen_op_ldl_raw_T0_A0,
+    X86_64_ONLY(gen_op_ldq_raw_T0_A0),
+
+#ifndef CONFIG_USER_ONLY
+    gen_op_ldub_kernel_T0_A0,
+    gen_op_lduw_kernel_T0_A0,
+    gen_op_ldl_kernel_T0_A0,
+    X86_64_ONLY(gen_op_ldq_kernel_T0_A0),
+
+    gen_op_ldub_user_T0_A0,
+    gen_op_lduw_user_T0_A0,
+    gen_op_ldl_user_T0_A0,
+    X86_64_ONLY(gen_op_ldq_user_T0_A0),
+#endif
+};
+
+static GenOpFunc *gen_op_ld_T1_A0[3 * 4] = {
+    gen_op_ldub_raw_T1_A0,
+    gen_op_lduw_raw_T1_A0,
+    gen_op_ldl_raw_T1_A0,
+    X86_64_ONLY(gen_op_ldq_raw_T1_A0),
+
+#ifndef CONFIG_USER_ONLY
+    gen_op_ldub_kernel_T1_A0,
+    gen_op_lduw_kernel_T1_A0,
+    gen_op_ldl_kernel_T1_A0,
+    X86_64_ONLY(gen_op_ldq_kernel_T1_A0),
+
+    gen_op_ldub_user_T1_A0,
+    gen_op_lduw_user_T1_A0,
+    gen_op_ldl_user_T1_A0,
+    X86_64_ONLY(gen_op_ldq_user_T1_A0),
+#endif
+};
+
+static GenOpFunc *gen_op_st_T0_A0[3 * 4] = {
+    gen_op_stb_raw_T0_A0,
+    gen_op_stw_raw_T0_A0,
+    gen_op_stl_raw_T0_A0,
+    X86_64_ONLY(gen_op_stq_raw_T0_A0),
+
+#ifndef CONFIG_USER_ONLY
+    gen_op_stb_kernel_T0_A0,
+    gen_op_stw_kernel_T0_A0,
+    gen_op_stl_kernel_T0_A0,
+    X86_64_ONLY(gen_op_stq_kernel_T0_A0),
+
+    gen_op_stb_user_T0_A0,
+    gen_op_stw_user_T0_A0,
+    gen_op_stl_user_T0_A0,
+    X86_64_ONLY(gen_op_stq_user_T0_A0),
+#endif
+};
+
+static GenOpFunc *gen_op_st_T1_A0[3 * 4] = {
+    NULL,
+    gen_op_stw_raw_T1_A0,
+    gen_op_stl_raw_T1_A0,
+    X86_64_ONLY(gen_op_stq_raw_T1_A0),
+
+#ifndef CONFIG_USER_ONLY
+    NULL,
+    gen_op_stw_kernel_T1_A0,
+    gen_op_stl_kernel_T1_A0,
+    X86_64_ONLY(gen_op_stq_kernel_T1_A0),
+
+    NULL,
+    gen_op_stw_user_T1_A0,
+    gen_op_stl_user_T1_A0,
+    X86_64_ONLY(gen_op_stq_user_T1_A0),
+#endif
+};
+
+#ifdef VBOX
+static void gen_check_external_event()
+{
+    gen_op_check_external_event();
+}
+
+static inline void gen_update_eip(target_ulong pc)
+{
+#ifdef TARGET_X86_64
+    if (pc == (uint32_t)pc) {
+        gen_op_movl_eip_im(pc);
+    } else if (pc == (int32_t)pc) {
+        gen_op_movq_eip_im(pc);
+    } else {
+        gen_op_movq_eip_im64(pc >> 32, pc);
+    }
+#else
+    gen_op_movl_eip_im(pc);
+#endif
+}
+
+#endif /* VBOX */
+
+static inline void gen_jmp_im(target_ulong pc)
+{
+#ifdef VBOX
+    gen_check_external_event();
+#endif /* VBOX */
+#ifdef TARGET_X86_64
+    if (pc == (uint32_t)pc) {
+        gen_op_movl_eip_im(pc);
+    } else if (pc == (int32_t)pc) {
+        gen_op_movq_eip_im(pc);
+    } else {
+        gen_op_movq_eip_im64(pc >> 32, pc);
+    }
+#else
+    gen_op_movl_eip_im(pc);
+#endif
+}
+
+static inline void gen_string_movl_A0_ESI(DisasContext *s)
+{
+    int override;
+
+    override = s->override;
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        if (override >= 0) {
+            gen_op_movq_A0_seg(offsetof(CPUX86State,segs[override].base));
+            gen_op_addq_A0_reg_sN[0][R_ESI]();
+        } else {
+            gen_op_movq_A0_reg[R_ESI]();
+        }
+    } else
+#endif
+    if (s->aflag) {
+        /* 32 bit address */
+        if (s->addseg && override < 0)
+            override = R_DS;
+        if (override >= 0) {
+            gen_op_movl_A0_seg(offsetof(CPUX86State,segs[override].base));
+            gen_op_addl_A0_reg_sN[0][R_ESI]();
+        } else {
+            gen_op_movl_A0_reg[R_ESI]();
+        }
+    } else {
+        /* 16 address, always override */
+        if (override < 0)
+            override = R_DS;
+        gen_op_movl_A0_reg[R_ESI]();
+        gen_op_andl_A0_ffff();
+        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+    }
+}
+
+static inline void gen_string_movl_A0_EDI(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_movq_A0_reg[R_EDI]();
+    } else
+#endif
+    if (s->aflag) {
+        if (s->addseg) {
+            gen_op_movl_A0_seg(offsetof(CPUX86State,segs[R_ES].base));
+            gen_op_addl_A0_reg_sN[0][R_EDI]();
+        } else {
+            gen_op_movl_A0_reg[R_EDI]();
+        }
+    } else {
+        gen_op_movl_A0_reg[R_EDI]();
+        gen_op_andl_A0_ffff();
+        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_ES].base));
+    }
+}
+
+static GenOpFunc *gen_op_movl_T0_Dshift[4] = {
+    gen_op_movl_T0_Dshiftb,
+    gen_op_movl_T0_Dshiftw,
+    gen_op_movl_T0_Dshiftl,
+    X86_64_ONLY(gen_op_movl_T0_Dshiftq),
+};
+
+static GenOpFunc1 *gen_op_jnz_ecx[3] = {
+    gen_op_jnz_ecxw,
+    gen_op_jnz_ecxl,
+    X86_64_ONLY(gen_op_jnz_ecxq),
+};
+
+static GenOpFunc1 *gen_op_jz_ecx[3] = {
+    gen_op_jz_ecxw,
+    gen_op_jz_ecxl,
+    X86_64_ONLY(gen_op_jz_ecxq),
+};
+
+static GenOpFunc *gen_op_dec_ECX[3] = {
+    gen_op_decw_ECX,
+    gen_op_decl_ECX,
+    X86_64_ONLY(gen_op_decq_ECX),
+};
+
+static GenOpFunc1 *gen_op_string_jnz_sub[2][4] = {
+    {
+        gen_op_jnz_subb,
+        gen_op_jnz_subw,
+        gen_op_jnz_subl,
+        X86_64_ONLY(gen_op_jnz_subq),
+    },
+    {
+        gen_op_jz_subb,
+        gen_op_jz_subw,
+        gen_op_jz_subl,
+        X86_64_ONLY(gen_op_jz_subq),
+    },
+};
+
+static GenOpFunc *gen_op_in_DX_T0[3] = {
+    gen_op_inb_DX_T0,
+    gen_op_inw_DX_T0,
+    gen_op_inl_DX_T0,
+};
+
+static GenOpFunc *gen_op_out_DX_T0[3] = {
+    gen_op_outb_DX_T0,
+    gen_op_outw_DX_T0,
+    gen_op_outl_DX_T0,
+};
+
+static GenOpFunc *gen_op_in[3] = {
+    gen_op_inb_T0_T1,
+    gen_op_inw_T0_T1,
+    gen_op_inl_T0_T1,
+};
+
+static GenOpFunc *gen_op_out[3] = {
+    gen_op_outb_T0_T1,
+    gen_op_outw_T0_T1,
+    gen_op_outl_T0_T1,
+};
+
+static GenOpFunc *gen_check_io_T0[3] = {
+    gen_op_check_iob_T0,
+    gen_op_check_iow_T0,
+    gen_op_check_iol_T0,
+};
+
+static GenOpFunc *gen_check_io_DX[3] = {
+    gen_op_check_iob_DX,
+    gen_op_check_iow_DX,
+    gen_op_check_iol_DX,
+};
+
+static void gen_check_io(DisasContext *s, int ot, int use_dx, target_ulong cur_eip)
+{
+    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(cur_eip);
+        if (use_dx)
+            gen_check_io_DX[ot]();
+        else
+            gen_check_io_T0[ot]();
+    }
+}
+
+static inline void gen_movs(DisasContext *s, int ot)
+{
+    gen_string_movl_A0_ESI(s);
+    gen_op_ld_T0_A0[ot + s->mem_index]();
+    gen_string_movl_A0_EDI(s);
+    gen_op_st_T0_A0[ot + s->mem_index]();
+    gen_op_movl_T0_Dshift[ot]();
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_addq_ESI_T0();
+        gen_op_addq_EDI_T0();
+    } else
+#endif
+    if (s->aflag) {
+        gen_op_addl_ESI_T0();
+        gen_op_addl_EDI_T0();
+    } else {
+        gen_op_addw_ESI_T0();
+        gen_op_addw_EDI_T0();
+    }
+}
+
+static inline void gen_update_cc_op(DisasContext *s)
+{
+    if (s->cc_op != CC_OP_DYNAMIC) {
+        gen_op_set_cc_op(s->cc_op);
+        s->cc_op = CC_OP_DYNAMIC;
+    }
+}
+
+/* XXX: does not work with gdbstub "ice" single step - not a
+   serious problem */
+static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
+{
+    int l1, l2;
+
+    l1 = gen_new_label();
+    l2 = gen_new_label();
+    gen_op_jnz_ecx[s->aflag](l1);
+    gen_set_label(l2);
+    gen_jmp_tb(s, next_eip, 1);
+    gen_set_label(l1);
+    return l2;
+}
+
+static inline void gen_stos(DisasContext *s, int ot)
+{
+    gen_op_mov_TN_reg[OT_LONG][0][R_EAX]();
+    gen_string_movl_A0_EDI(s);
+    gen_op_st_T0_A0[ot + s->mem_index]();
+    gen_op_movl_T0_Dshift[ot]();
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_addq_EDI_T0();
+    } else
+#endif
+    if (s->aflag) {
+        gen_op_addl_EDI_T0();
+    } else {
+        gen_op_addw_EDI_T0();
+    }
+}
+
+static inline void gen_lods(DisasContext *s, int ot)
+{
+    gen_string_movl_A0_ESI(s);
+    gen_op_ld_T0_A0[ot + s->mem_index]();
+    gen_op_mov_reg_T0[ot][R_EAX]();
+    gen_op_movl_T0_Dshift[ot]();
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_addq_ESI_T0();
+    } else
+#endif
+    if (s->aflag) {
+        gen_op_addl_ESI_T0();
+    } else {
+        gen_op_addw_ESI_T0();
+    }
+}
+
+static inline void gen_scas(DisasContext *s, int ot)
+{
+    gen_op_mov_TN_reg[OT_LONG][0][R_EAX]();
+    gen_string_movl_A0_EDI(s);
+    gen_op_ld_T1_A0[ot + s->mem_index]();
+    gen_op_cmpl_T0_T1_cc();
+    gen_op_movl_T0_Dshift[ot]();
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_addq_EDI_T0();
+    } else
+#endif
+    if (s->aflag) {
+        gen_op_addl_EDI_T0();
+    } else {
+        gen_op_addw_EDI_T0();
+    }
+}
+
+static inline void gen_cmps(DisasContext *s, int ot)
+{
+    gen_string_movl_A0_ESI(s);
+    gen_op_ld_T0_A0[ot + s->mem_index]();
+    gen_string_movl_A0_EDI(s);
+    gen_op_ld_T1_A0[ot + s->mem_index]();
+    gen_op_cmpl_T0_T1_cc();
+    gen_op_movl_T0_Dshift[ot]();
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_addq_ESI_T0();
+        gen_op_addq_EDI_T0();
+    } else
+#endif
+    if (s->aflag) {
+        gen_op_addl_ESI_T0();
+        gen_op_addl_EDI_T0();
+    } else {
+        gen_op_addw_ESI_T0();
+        gen_op_addw_EDI_T0();
+    }
+}
+
+static inline void gen_ins(DisasContext *s, int ot)
+{
+    gen_string_movl_A0_EDI(s);
+    gen_op_movl_T0_0();
+    gen_op_st_T0_A0[ot + s->mem_index]();
+    gen_op_in_DX_T0[ot]();
+    gen_op_st_T0_A0[ot + s->mem_index]();
+    gen_op_movl_T0_Dshift[ot]();
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_addq_EDI_T0();
+    } else
+#endif
+    if (s->aflag) {
+        gen_op_addl_EDI_T0();
+    } else {
+        gen_op_addw_EDI_T0();
+    }
+}
+
+static inline void gen_outs(DisasContext *s, int ot)
+{
+    gen_string_movl_A0_ESI(s);
+    gen_op_ld_T0_A0[ot + s->mem_index]();
+    gen_op_out_DX_T0[ot]();
+    gen_op_movl_T0_Dshift[ot]();
+#ifdef TARGET_X86_64
+    if (s->aflag == 2) {
+        gen_op_addq_ESI_T0();
+    } else
+#endif
+    if (s->aflag) {
+        gen_op_addl_ESI_T0();
+    } else {
+        gen_op_addw_ESI_T0();
+    }
+}
+
+/* same method as Valgrind : we generate jumps to current or next
+   instruction */
+#define GEN_REPZ(op)                                                          \
+static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
+                                 target_ulong cur_eip, target_ulong next_eip) \
+{                                                                             \
+    int l2;\
+    gen_update_cc_op(s);                                                      \
+    l2 = gen_jz_ecx_string(s, next_eip);                                      \
+    gen_ ## op(s, ot);                                                        \
+    gen_op_dec_ECX[s->aflag]();                                               \
+    /* a loop would cause two single step exceptions if ECX = 1               \
+       before rep string_insn */                                              \
+    if (!s->jmp_opt)                                                          \
+        gen_op_jz_ecx[s->aflag](l2);                                          \
+    gen_jmp(s, cur_eip);                                                      \
+}
+
+#define GEN_REPZ2(op)                                                         \
+static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
+                                   target_ulong cur_eip,                      \
+                                   target_ulong next_eip,                     \
+                                   int nz)                                    \
+{                                                                             \
+    int l2;\
+    gen_update_cc_op(s);                                                      \
+    l2 = gen_jz_ecx_string(s, next_eip);                                      \
+    gen_ ## op(s, ot);                                                        \
+    gen_op_dec_ECX[s->aflag]();                                               \
+    gen_op_set_cc_op(CC_OP_SUBB + ot);                                        \
+    gen_op_string_jnz_sub[nz][ot](l2);\
+    if (!s->jmp_opt)                                                          \
+        gen_op_jz_ecx[s->aflag](l2);                                          \
+    gen_jmp(s, cur_eip);                                                      \
+}
+
+GEN_REPZ(movs)
+GEN_REPZ(stos)
+GEN_REPZ(lods)
+GEN_REPZ(ins)
+GEN_REPZ(outs)
+GEN_REPZ2(scas)
+GEN_REPZ2(cmps)
+
+enum {
+    JCC_O,
+    JCC_B,
+    JCC_Z,
+    JCC_BE,
+    JCC_S,
+    JCC_P,
+    JCC_L,
+    JCC_LE,
+};
+
+static GenOpFunc1 *gen_jcc_sub[4][8] = {
+    [OT_BYTE] = {
+        NULL,
+        gen_op_jb_subb,
+        gen_op_jz_subb,
+        gen_op_jbe_subb,
+        gen_op_js_subb,
+        NULL,
+        gen_op_jl_subb,
+        gen_op_jle_subb,
+    },
+    [OT_WORD] = {
+        NULL,
+        gen_op_jb_subw,
+        gen_op_jz_subw,
+        gen_op_jbe_subw,
+        gen_op_js_subw,
+        NULL,
+        gen_op_jl_subw,
+        gen_op_jle_subw,
+    },
+    [OT_LONG] = {
+        NULL,
+        gen_op_jb_subl,
+        gen_op_jz_subl,
+        gen_op_jbe_subl,
+        gen_op_js_subl,
+        NULL,
+        gen_op_jl_subl,
+        gen_op_jle_subl,
+    },
+#ifdef TARGET_X86_64
+    [OT_QUAD] = {
+        NULL,
+        BUGGY_64(gen_op_jb_subq),
+        gen_op_jz_subq,
+        BUGGY_64(gen_op_jbe_subq),
+        gen_op_js_subq,
+        NULL,
+        BUGGY_64(gen_op_jl_subq),
+        BUGGY_64(gen_op_jle_subq),
+    },
+#endif
+};
+static GenOpFunc1 *gen_op_loop[3][4] = {
+    [0] = {
+        gen_op_loopnzw,
+        gen_op_loopzw,
+        gen_op_jnz_ecxw,
+    },
+    [1] = {
+        gen_op_loopnzl,
+        gen_op_loopzl,
+        gen_op_jnz_ecxl,
+    },
+#ifdef TARGET_X86_64
+    [2] = {
+        gen_op_loopnzq,
+        gen_op_loopzq,
+        gen_op_jnz_ecxq,
+    },
+#endif
+};
+
+static GenOpFunc *gen_setcc_slow[8] = {
+    gen_op_seto_T0_cc,
+    gen_op_setb_T0_cc,
+    gen_op_setz_T0_cc,
+    gen_op_setbe_T0_cc,
+    gen_op_sets_T0_cc,
+    gen_op_setp_T0_cc,
+    gen_op_setl_T0_cc,
+    gen_op_setle_T0_cc,
+};
+
+static GenOpFunc *gen_setcc_sub[4][8] = {
+    [OT_BYTE] = {
+        NULL,
+        gen_op_setb_T0_subb,
+        gen_op_setz_T0_subb,
+        gen_op_setbe_T0_subb,
+        gen_op_sets_T0_subb,
+        NULL,
+        gen_op_setl_T0_subb,
+        gen_op_setle_T0_subb,
+    },
+    [OT_WORD] = {
+        NULL,
+        gen_op_setb_T0_subw,
+        gen_op_setz_T0_subw,
+        gen_op_setbe_T0_subw,
+        gen_op_sets_T0_subw,
+        NULL,
+        gen_op_setl_T0_subw,
+        gen_op_setle_T0_subw,
+    },
+    [OT_LONG] = {
+        NULL,
+        gen_op_setb_T0_subl,
+        gen_op_setz_T0_subl,
+        gen_op_setbe_T0_subl,
+        gen_op_sets_T0_subl,
+        NULL,
+        gen_op_setl_T0_subl,
+        gen_op_setle_T0_subl,
+    },
+#ifdef TARGET_X86_64
+    [OT_QUAD] = {
+        NULL,
+        gen_op_setb_T0_subq,
+        gen_op_setz_T0_subq,
+        gen_op_setbe_T0_subq,
+        gen_op_sets_T0_subq,
+        NULL,
+        gen_op_setl_T0_subq,
+        gen_op_setle_T0_subq,
+    },
+#endif
+};
+
+static GenOpFunc *gen_op_fp_arith_ST0_FT0[8] = {
+    gen_op_fadd_ST0_FT0,
+    gen_op_fmul_ST0_FT0,
+    gen_op_fcom_ST0_FT0,
+    gen_op_fcom_ST0_FT0,
+    gen_op_fsub_ST0_FT0,
+    gen_op_fsubr_ST0_FT0,
+    gen_op_fdiv_ST0_FT0,
+    gen_op_fdivr_ST0_FT0,
+};
+
+/* NOTE the exception in "r" op ordering */
+static GenOpFunc1 *gen_op_fp_arith_STN_ST0[8] = {
+    gen_op_fadd_STN_ST0,
+    gen_op_fmul_STN_ST0,
+    NULL,
+    NULL,
+    gen_op_fsubr_STN_ST0,
+    gen_op_fsub_STN_ST0,
+    gen_op_fdivr_STN_ST0,
+    gen_op_fdiv_STN_ST0,
+};
+
+/* if d == OR_TMP0, it means memory operand (address in A0) */
+static void gen_op(DisasContext *s1, int op, int ot, int d)
+{
+    GenOpFunc *gen_update_cc;
+
+    if (d != OR_TMP0) {
+        gen_op_mov_TN_reg[ot][0][d]();
+    } else {
+        gen_op_ld_T0_A0[ot + s1->mem_index]();
+    }
+    switch(op) {
+    case OP_ADCL:
+    case OP_SBBL:
+        if (s1->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s1->cc_op);
+        if (d != OR_TMP0) {
+            gen_op_arithc_T0_T1_cc[ot][op - OP_ADCL]();
+            gen_op_mov_reg_T0[ot][d]();
+        } else {
+            gen_op_arithc_mem_T0_T1_cc[ot + s1->mem_index][op - OP_ADCL]();
+        }
+        s1->cc_op = CC_OP_DYNAMIC;
+        goto the_end;
+    case OP_ADDL:
+        gen_op_addl_T0_T1();
+        s1->cc_op = CC_OP_ADDB + ot;
+        gen_update_cc = gen_op_update2_cc;
+        break;
+    case OP_SUBL:
+        gen_op_subl_T0_T1();
+        s1->cc_op = CC_OP_SUBB + ot;
+        gen_update_cc = gen_op_update2_cc;
+        break;
+    default:
+    case OP_ANDL:
+    case OP_ORL:
+    case OP_XORL:
+        gen_op_arith_T0_T1_cc[op]();
+        s1->cc_op = CC_OP_LOGICB + ot;
+        gen_update_cc = gen_op_update1_cc;
+        break;
+    case OP_CMPL:
+        gen_op_cmpl_T0_T1_cc();
+        s1->cc_op = CC_OP_SUBB + ot;
+        gen_update_cc = NULL;
+        break;
+    }
+    if (op != OP_CMPL) {
+        if (d != OR_TMP0)
+            gen_op_mov_reg_T0[ot][d]();
+        else
+            gen_op_st_T0_A0[ot + s1->mem_index]();
+    }
+    /* the flags update must happen after the memory write (precise
+       exception support) */
+    if (gen_update_cc)
+        gen_update_cc();
+ the_end: ;
+}
+
+/* if d == OR_TMP0, it means memory operand (address in A0) */
+static void gen_inc(DisasContext *s1, int ot, int d, int c)
+{
+    if (d != OR_TMP0)
+        gen_op_mov_TN_reg[ot][0][d]();
+    else
+        gen_op_ld_T0_A0[ot + s1->mem_index]();
+    if (s1->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s1->cc_op);
+    if (c > 0) {
+        gen_op_incl_T0();
+        s1->cc_op = CC_OP_INCB + ot;
+    } else {
+        gen_op_decl_T0();
+        s1->cc_op = CC_OP_DECB + ot;
+    }
+    if (d != OR_TMP0)
+        gen_op_mov_reg_T0[ot][d]();
+    else
+        gen_op_st_T0_A0[ot + s1->mem_index]();
+    gen_op_update_inc_cc();
+}
+
+static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
+{
+    if (d != OR_TMP0)
+        gen_op_mov_TN_reg[ot][0][d]();
+    else
+        gen_op_ld_T0_A0[ot + s1->mem_index]();
+    if (s != OR_TMP1)
+        gen_op_mov_TN_reg[ot][1][s]();
+    /* for zero counts, flags are not updated, so must do it dynamically */
+    if (s1->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s1->cc_op);
+
+    if (d != OR_TMP0)
+        gen_op_shift_T0_T1_cc[ot][op]();
+    else
+        gen_op_shift_mem_T0_T1_cc[ot + s1->mem_index][op]();
+    if (d != OR_TMP0)
+        gen_op_mov_reg_T0[ot][d]();
+    s1->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+}
+
+static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
+{
+    /* currently not optimized */
+    gen_op_movl_T1_im(c);
+    gen_shift(s1, op, ot, d, OR_TMP1);
+}
+
+static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
+{
+    target_long disp;
+    int havesib;
+    int base;
+    int index;
+    int scale;
+    int opreg;
+    int mod, rm, code, override, must_add_seg;
+
+    override = s->override;
+    must_add_seg = s->addseg;
+    if (override >= 0)
+        must_add_seg = 1;
+    mod = (modrm >> 6) & 3;
+    rm = modrm & 7;
+
+    if (s->aflag) {
+
+        havesib = 0;
+        base = rm;
+        index = 0;
+        scale = 0;
+
+        if (base == 4) {
+            havesib = 1;
+            code = ldub_code(s->pc++);
+            scale = (code >> 6) & 3;
+            index = ((code >> 3) & 7) | REX_X(s);
+            base = (code & 7);
+        }
+        base |= REX_B(s);
+
+        switch (mod) {
+        case 0:
+            if ((base & 7) == 5) {
+                base = -1;
+                disp = (int32_t)ldl_code(s->pc);
+                s->pc += 4;
+                if (CODE64(s) && !havesib) {
+                    disp += s->pc + s->rip_offset;
+                }
+            } else {
+                disp = 0;
+            }
+            break;
+        case 1:
+            disp = (int8_t)ldub_code(s->pc++);
+            break;
+        default:
+        case 2:
+            disp = ldl_code(s->pc);
+            s->pc += 4;
+            break;
+        }
+
+        if (base >= 0) {
+            /* for correct popl handling with esp */
+            if (base == 4 && s->popl_esp_hack)
+                disp += s->popl_esp_hack;
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_movq_A0_reg[base]();
+                if (disp != 0) {
+                    gen_op_addq_A0_im(disp);
+                }
+            } else
+#endif
+            {
+                gen_op_movl_A0_reg[base]();
+                if (disp != 0)
+                    gen_op_addl_A0_im(disp);
+            }
+        } else {
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_movq_A0_im(disp);
+            } else
+#endif
+            {
+                gen_op_movl_A0_im(disp);
+            }
+        }
+        /* XXX: index == 4 is always invalid */
+        if (havesib && (index != 4 || scale != 0)) {
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_addq_A0_reg_sN[scale][index]();
+            } else
+#endif
+            {
+                gen_op_addl_A0_reg_sN[scale][index]();
+            }
+        }
+        if (must_add_seg) {
+            if (override < 0) {
+                if (base == R_EBP || base == R_ESP)
+                    override = R_SS;
+                else
+                    override = R_DS;
+            }
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_addq_A0_seg(offsetof(CPUX86State,segs[override].base));
+            } else
+#endif
+            {
+                gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+            }
+        }
+    } else {
+        switch (mod) {
+        case 0:
+            if (rm == 6) {
+                disp = lduw_code(s->pc);
+                s->pc += 2;
+                gen_op_movl_A0_im(disp);
+                rm = 0; /* avoid SS override */
+                goto no_rm;
+            } else {
+                disp = 0;
+            }
+            break;
+        case 1:
+            disp = (int8_t)ldub_code(s->pc++);
+            break;
+        default:
+        case 2:
+            disp = lduw_code(s->pc);
+            s->pc += 2;
+            break;
+        }
+        switch(rm) {
+        case 0:
+            gen_op_movl_A0_reg[R_EBX]();
+            gen_op_addl_A0_reg_sN[0][R_ESI]();
+            break;
+        case 1:
+            gen_op_movl_A0_reg[R_EBX]();
+            gen_op_addl_A0_reg_sN[0][R_EDI]();
+            break;
+        case 2:
+            gen_op_movl_A0_reg[R_EBP]();
+            gen_op_addl_A0_reg_sN[0][R_ESI]();
+            break;
+        case 3:
+            gen_op_movl_A0_reg[R_EBP]();
+            gen_op_addl_A0_reg_sN[0][R_EDI]();
+            break;
+        case 4:
+            gen_op_movl_A0_reg[R_ESI]();
+            break;
+        case 5:
+            gen_op_movl_A0_reg[R_EDI]();
+            break;
+        case 6:
+            gen_op_movl_A0_reg[R_EBP]();
+            break;
+        default:
+        case 7:
+            gen_op_movl_A0_reg[R_EBX]();
+            break;
+        }
+        if (disp != 0)
+            gen_op_addl_A0_im(disp);
+        gen_op_andl_A0_ffff();
+    no_rm:
+        if (must_add_seg) {
+            if (override < 0) {
+                if (rm == 2 || rm == 3 || rm == 6)
+                    override = R_SS;
+                else
+                    override = R_DS;
+            }
+            gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+        }
+    }
+
+    opreg = OR_A0;
+    disp = 0;
+    *reg_ptr = opreg;
+    *offset_ptr = disp;
+}
+
+static void gen_nop_modrm(DisasContext *s, int modrm)
+{
+    int mod, rm, base, code;
+
+    mod = (modrm >> 6) & 3;
+    if (mod == 3)
+        return;
+    rm = modrm & 7;
+
+    if (s->aflag) {
+
+        base = rm;
+
+        if (base == 4) {
+            code = ldub_code(s->pc++);
+            base = (code & 7);
+        }
+
+        switch (mod) {
+        case 0:
+            if (base == 5) {
+                s->pc += 4;
+            }
+            break;
+        case 1:
+            s->pc++;
+            break;
+        default:
+        case 2:
+            s->pc += 4;
+            break;
+        }
+    } else {
+        switch (mod) {
+        case 0:
+            if (rm == 6) {
+                s->pc += 2;
+            }
+            break;
+        case 1:
+            s->pc++;
+            break;
+        default:
+        case 2:
+            s->pc += 2;
+            break;
+        }
+    }
+}
+
+/* used for LEA and MOV AX, mem */
+static void gen_add_A0_ds_seg(DisasContext *s)
+{
+    int override, must_add_seg;
+    must_add_seg = s->addseg;
+    override = R_DS;
+    if (s->override >= 0) {
+        override = s->override;
+        must_add_seg = 1;
+    } else {
+        override = R_DS;
+    }
+    if (must_add_seg) {
+#ifdef TARGET_X86_64
+        if (CODE64(s)) {
+            gen_op_addq_A0_seg(offsetof(CPUX86State,segs[override].base));
+        } else
+#endif
+        {
+            gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+        }
+    }
+}
+
+/* generate modrm memory load or store of 'reg'. TMP0 is used if reg !=
+   OR_TMP0 */
+static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
+{
+    int mod, rm, opreg, disp;
+
+    mod = (modrm >> 6) & 3;
+    rm = (modrm & 7) | REX_B(s);
+    if (mod == 3) {
+        if (is_store) {
+            if (reg != OR_TMP0)
+                gen_op_mov_TN_reg[ot][0][reg]();
+            gen_op_mov_reg_T0[ot][rm]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+            if (reg != OR_TMP0)
+                gen_op_mov_reg_T0[ot][reg]();
+        }
+    } else {
+        gen_lea_modrm(s, modrm, &opreg, &disp);
+        if (is_store) {
+            if (reg != OR_TMP0)
+                gen_op_mov_TN_reg[ot][0][reg]();
+            gen_op_st_T0_A0[ot + s->mem_index]();
+        } else {
+            gen_op_ld_T0_A0[ot + s->mem_index]();
+            if (reg != OR_TMP0)
+                gen_op_mov_reg_T0[ot][reg]();
+        }
+    }
+}
+
+static inline uint32_t insn_get(DisasContext *s, int ot)
+{
+    uint32_t ret;
+
+    switch(ot) {
+    case OT_BYTE:
+        ret = ldub_code(s->pc);
+        s->pc++;
+        break;
+    case OT_WORD:
+        ret = lduw_code(s->pc);
+        s->pc += 2;
+        break;
+    default:
+    case OT_LONG:
+        ret = ldl_code(s->pc);
+        s->pc += 4;
+        break;
+    }
+    return ret;
+}
+
+static inline int insn_const_size(unsigned int ot)
+{
+    if (ot <= OT_LONG)
+        return 1 << ot;
+    else
+        return 4;
+}
+
+static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
+{
+    TranslationBlock *tb;
+    target_ulong pc;
+
+    pc = s->cs_base + eip;
+    tb = s->tb;
+    /* NOTE: we handle the case where the TB spans two pages here */
+    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
+        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
+        /* jump to same page: we can use a direct jump */
+        if (tb_num == 0)
+            gen_op_goto_tb0(TBPARAM(tb));
+        else
+            gen_op_goto_tb1(TBPARAM(tb));
+        gen_jmp_im(eip);
+        gen_op_movl_T0_im((long)tb + tb_num);
+        gen_op_exit_tb();
+    } else {
+        /* jump to another page: currently not optimized */
+        gen_jmp_im(eip);
+        gen_eob(s);
+    }
+}
+
+static inline void gen_jcc(DisasContext *s, int b,
+                           target_ulong val, target_ulong next_eip)
+{
+    TranslationBlock *tb;
+    int inv, jcc_op;
+    GenOpFunc1 *func;
+    target_ulong tmp;
+    int l1, l2;
+
+    inv = b & 1;
+    jcc_op = (b >> 1) & 7;
+
+    if (s->jmp_opt) {
+#ifdef VBOX
+        gen_check_external_event(s);
+#endif /* VBOX */
+        switch(s->cc_op) {
+            /* we optimize the cmp/jcc case */
+        case CC_OP_SUBB:
+        case CC_OP_SUBW:
+        case CC_OP_SUBL:
+        case CC_OP_SUBQ:
+            func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op];
+            break;
+
+            /* some jumps are easy to compute */
+        case CC_OP_ADDB:
+        case CC_OP_ADDW:
+        case CC_OP_ADDL:
+        case CC_OP_ADDQ:
+
+        case CC_OP_ADCB:
+        case CC_OP_ADCW:
+        case CC_OP_ADCL:
+        case CC_OP_ADCQ:
+
+        case CC_OP_SBBB:
+        case CC_OP_SBBW:
+        case CC_OP_SBBL:
+        case CC_OP_SBBQ:
+
+        case CC_OP_LOGICB:
+        case CC_OP_LOGICW:
+        case CC_OP_LOGICL:
+        case CC_OP_LOGICQ:
+
+        case CC_OP_INCB:
+        case CC_OP_INCW:
+        case CC_OP_INCL:
+        case CC_OP_INCQ:
+
+        case CC_OP_DECB:
+        case CC_OP_DECW:
+        case CC_OP_DECL:
+        case CC_OP_DECQ:
+
+        case CC_OP_SHLB:
+        case CC_OP_SHLW:
+        case CC_OP_SHLL:
+        case CC_OP_SHLQ:
+
+        case CC_OP_SARB:
+        case CC_OP_SARW:
+        case CC_OP_SARL:
+        case CC_OP_SARQ:
+            switch(jcc_op) {
+            case JCC_Z:
+                func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 4][jcc_op];
+                break;
+            case JCC_S:
+                func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 4][jcc_op];
+                break;
+            default:
+                func = NULL;
+                break;
+            }
+            break;
+        default:
+            func = NULL;
+            break;
+        }
+
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+
+        if (!func) {
+            gen_setcc_slow[jcc_op]();
+            func = gen_op_jnz_T0_label;
+        }
+
+        if (inv) {
+            tmp = val;
+            val = next_eip;
+            next_eip = tmp;
+        }
+        tb = s->tb;
+
+        l1 = gen_new_label();
+        func(l1);
+
+        gen_goto_tb(s, 0, next_eip);
+
+        gen_set_label(l1);
+        gen_goto_tb(s, 1, val);
+
+        s->is_jmp = 3;
+    } else {
+
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_setcc_slow[jcc_op]();
+        if (inv) {
+            tmp = val;
+            val = next_eip;
+            next_eip = tmp;
+        }
+        l1 = gen_new_label();
+        l2 = gen_new_label();
+        gen_op_jnz_T0_label(l1);
+        gen_jmp_im(next_eip);
+        gen_op_jmp_label(l2);
+        gen_set_label(l1);
+        gen_jmp_im(val);
+        gen_set_label(l2);
+        gen_eob(s);
+    }
+}
+
+static void gen_setcc(DisasContext *s, int b)
+{
+    int inv, jcc_op;
+    GenOpFunc *func;
+
+    inv = b & 1;
+    jcc_op = (b >> 1) & 7;
+    switch(s->cc_op) {
+        /* we optimize the cmp/jcc case */
+    case CC_OP_SUBB:
+    case CC_OP_SUBW:
+    case CC_OP_SUBL:
+    case CC_OP_SUBQ:
+        func = gen_setcc_sub[s->cc_op - CC_OP_SUBB][jcc_op];
+        if (!func)
+            goto slow_jcc;
+        break;
+
+        /* some jumps are easy to compute */
+    case CC_OP_ADDB:
+    case CC_OP_ADDW:
+    case CC_OP_ADDL:
+    case CC_OP_ADDQ:
+
+    case CC_OP_LOGICB:
+    case CC_OP_LOGICW:
+    case CC_OP_LOGICL:
+    case CC_OP_LOGICQ:
+
+    case CC_OP_INCB:
+    case CC_OP_INCW:
+    case CC_OP_INCL:
+    case CC_OP_INCQ:
+
+    case CC_OP_DECB:
+    case CC_OP_DECW:
+    case CC_OP_DECL:
+    case CC_OP_DECQ:
+
+    case CC_OP_SHLB:
+    case CC_OP_SHLW:
+    case CC_OP_SHLL:
+    case CC_OP_SHLQ:
+        switch(jcc_op) {
+        case JCC_Z:
+            func = gen_setcc_sub[(s->cc_op - CC_OP_ADDB) % 4][jcc_op];
+            break;
+        case JCC_S:
+            func = gen_setcc_sub[(s->cc_op - CC_OP_ADDB) % 4][jcc_op];
+            break;
+        default:
+            goto slow_jcc;
+        }
+        break;
+    default:
+    slow_jcc:
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        func = gen_setcc_slow[jcc_op];
+        break;
+    }
+    func();
+    if (inv) {
+        gen_op_xor_T0_1();
+    }
+}
+
+/* move T0 to seg_reg and compute if the CPU state may change. Never
+   call this function with seg_reg == R_CS */
+static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
+{
+    if (s->pe && !s->vm86) {
+        /* XXX: optimize by finding processor state dynamically */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(cur_eip);
+        gen_op_movl_seg_T0(seg_reg);
+        /* abort translation because the addseg value may change or
+           because ss32 may change. For R_SS, translation must always
+           stop as a special handling must be done to disable hardware
+           interrupts for the next instruction */
+        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
+            s->is_jmp = 3;
+    } else {
+        gen_op_movl_seg_T0_vm(offsetof(CPUX86State,segs[seg_reg]));
+        if (seg_reg == R_SS)
+            s->is_jmp = 3;
+    }
+}
+
+static inline void gen_stack_update(DisasContext *s, int addend)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        if (addend == 8)
+            gen_op_addq_ESP_8();
+        else
+            gen_op_addq_ESP_im(addend);
+    } else
+#endif
+    if (s->ss32) {
+        if (addend == 2)
+            gen_op_addl_ESP_2();
+        else if (addend == 4)
+            gen_op_addl_ESP_4();
+        else
+            gen_op_addl_ESP_im(addend);
+    } else {
+        if (addend == 2)
+            gen_op_addw_ESP_2();
+        else if (addend == 4)
+            gen_op_addw_ESP_4();
+        else
+            gen_op_addw_ESP_im(addend);
+    }
+}
+
+/* generate a push. It depends on ss32, addseg and dflag */
+static void gen_push_T0(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        gen_op_movq_A0_reg[R_ESP]();
+        if (s->dflag) {
+            gen_op_subq_A0_8();
+            gen_op_st_T0_A0[OT_QUAD + s->mem_index]();
+        } else {
+            gen_op_subq_A0_2();
+            gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+        }
+        gen_op_movq_ESP_A0();
+    } else
+#endif
+    {
+        gen_op_movl_A0_reg[R_ESP]();
+        if (!s->dflag)
+            gen_op_subl_A0_2();
+        else
+            gen_op_subl_A0_4();
+        if (s->ss32) {
+            if (s->addseg) {
+                gen_op_movl_T1_A0();
+                gen_op_addl_A0_SS();
+            }
+        } else {
+            gen_op_andl_A0_ffff();
+            gen_op_movl_T1_A0();
+            gen_op_addl_A0_SS();
+        }
+        gen_op_st_T0_A0[s->dflag + 1 + s->mem_index]();
+        if (s->ss32 && !s->addseg)
+            gen_op_movl_ESP_A0();
+        else
+            gen_op_mov_reg_T1[s->ss32 + 1][R_ESP]();
+    }
+}
+
+/* generate a push. It depends on ss32, addseg and dflag */
+/* slower version for T1, only used for call Ev */
+static void gen_push_T1(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        gen_op_movq_A0_reg[R_ESP]();
+        if (s->dflag) {
+            gen_op_subq_A0_8();
+            gen_op_st_T1_A0[OT_QUAD + s->mem_index]();
+        } else {
+            gen_op_subq_A0_2();
+            gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+        }
+        gen_op_movq_ESP_A0();
+    } else
+#endif
+    {
+        gen_op_movl_A0_reg[R_ESP]();
+        if (!s->dflag)
+            gen_op_subl_A0_2();
+        else
+            gen_op_subl_A0_4();
+        if (s->ss32) {
+            if (s->addseg) {
+                gen_op_addl_A0_SS();
+            }
+        } else {
+            gen_op_andl_A0_ffff();
+            gen_op_addl_A0_SS();
+        }
+        gen_op_st_T1_A0[s->dflag + 1 + s->mem_index]();
+
+        if (s->ss32 && !s->addseg)
+            gen_op_movl_ESP_A0();
+        else
+            gen_stack_update(s, (-2) << s->dflag);
+    }
+}
+
+/* two step pop is necessary for precise exceptions */
+static void gen_pop_T0(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        gen_op_movq_A0_reg[R_ESP]();
+        gen_op_ld_T0_A0[(s->dflag ? OT_QUAD : OT_WORD) + s->mem_index]();
+    } else
+#endif
+    {
+        gen_op_movl_A0_reg[R_ESP]();
+        if (s->ss32) {
+            if (s->addseg)
+                gen_op_addl_A0_SS();
+        } else {
+            gen_op_andl_A0_ffff();
+            gen_op_addl_A0_SS();
+        }
+        gen_op_ld_T0_A0[s->dflag + 1 + s->mem_index]();
+    }
+}
+
+static void gen_pop_update(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s) && s->dflag) {
+        gen_stack_update(s, 8);
+    } else
+#endif
+    {
+        gen_stack_update(s, 2 << s->dflag);
+    }
+}
+
+static void gen_stack_A0(DisasContext *s)
+{
+    gen_op_movl_A0_ESP();
+    if (!s->ss32)
+        gen_op_andl_A0_ffff();
+    gen_op_movl_T1_A0();
+    if (s->addseg)
+        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+}
+
+/* NOTE: wrap around in 16 bit not fully handled */
+static void gen_pusha(DisasContext *s)
+{
+    int i;
+    gen_op_movl_A0_ESP();
+    gen_op_addl_A0_im(-16 <<  s->dflag);
+    if (!s->ss32)
+        gen_op_andl_A0_ffff();
+    gen_op_movl_T1_A0();
+    if (s->addseg)
+        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+    for(i = 0;i < 8; i++) {
+        gen_op_mov_TN_reg[OT_LONG][0][7 - i]();
+        gen_op_st_T0_A0[OT_WORD + s->dflag + s->mem_index]();
+        gen_op_addl_A0_im(2 <<  s->dflag);
+    }
+    gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
+}
+
+/* NOTE: wrap around in 16 bit not fully handled */
+static void gen_popa(DisasContext *s)
+{
+    int i;
+    gen_op_movl_A0_ESP();
+    if (!s->ss32)
+        gen_op_andl_A0_ffff();
+    gen_op_movl_T1_A0();
+    gen_op_addl_T1_im(16 <<  s->dflag);
+    if (s->addseg)
+        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+    for(i = 0;i < 8; i++) {
+        /* ESP is not reloaded */
+        if (i != 3) {
+            gen_op_ld_T0_A0[OT_WORD + s->dflag + s->mem_index]();
+            gen_op_mov_reg_T0[OT_WORD + s->dflag][7 - i]();
+        }
+        gen_op_addl_A0_im(2 <<  s->dflag);
+    }
+    gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
+}
+
+static void gen_enter(DisasContext *s, int esp_addend, int level)
+{
+    int ot, opsize;
+
+    level &= 0x1f;
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        ot = s->dflag ? OT_QUAD : OT_WORD;
+        opsize = 1 << ot;
+
+        gen_op_movl_A0_ESP();
+        gen_op_addq_A0_im(-opsize);
+        gen_op_movl_T1_A0();
+
+        /* push bp */
+        gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
+        gen_op_st_T0_A0[ot + s->mem_index]();
+        if (level) {
+            gen_op_enter64_level(level, (ot == OT_QUAD));
+        }
+        gen_op_mov_reg_T1[ot][R_EBP]();
+        gen_op_addl_T1_im( -esp_addend + (-opsize * level) );
+        gen_op_mov_reg_T1[OT_QUAD][R_ESP]();
+    } else
+#endif
+    {
+        ot = s->dflag + OT_WORD;
+        opsize = 2 << s->dflag;
+
+        gen_op_movl_A0_ESP();
+        gen_op_addl_A0_im(-opsize);
+        if (!s->ss32)
+            gen_op_andl_A0_ffff();
+        gen_op_movl_T1_A0();
+        if (s->addseg)
+            gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+        /* push bp */
+        gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
+        gen_op_st_T0_A0[ot + s->mem_index]();
+        if (level) {
+            gen_op_enter_level(level, s->dflag);
+        }
+        gen_op_mov_reg_T1[ot][R_EBP]();
+        gen_op_addl_T1_im( -esp_addend + (-opsize * level) );
+        gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
+    }
+}
+
+static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    gen_jmp_im(cur_eip);
+    gen_op_raise_exception(trapno);
+    s->is_jmp = 3;
+}
+
+/* an interrupt is different from an exception because of the
+   priviledge checks */
+static void gen_interrupt(DisasContext *s, int intno,
+                          target_ulong cur_eip, target_ulong next_eip)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    gen_jmp_im(cur_eip);
+    gen_op_raise_interrupt(intno, (int)(next_eip - cur_eip));
+    s->is_jmp = 3;
+}
+
+static void gen_debug(DisasContext *s, target_ulong cur_eip)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    gen_jmp_im(cur_eip);
+    gen_op_debug();
+    s->is_jmp = 3;
+}
+
+/* generate a generic end of block. Trace exception is also generated
+   if needed */
+static void gen_eob(DisasContext *s)
+{
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
+        gen_op_reset_inhibit_irq();
+    }
+    if (s->singlestep_enabled) {
+        gen_op_debug();
+    } else if (s->tf) {
+        gen_op_single_step();
+    } else {
+        gen_op_movl_T0_0();
+        gen_op_exit_tb();
+    }
+    s->is_jmp = 3;
+}
+
+/* generate a jump to eip. No segment change must happen before as a
+   direct call to the next block may occur */
+static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
+{
+    if (s->jmp_opt) {
+#ifdef VBOX
+        gen_check_external_event(s);
+#endif /* VBOX */
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_goto_tb(s, tb_num, eip);
+        s->is_jmp = 3;
+    } else {
+        gen_jmp_im(eip);
+        gen_eob(s);
+    }
+}
+
+static void gen_jmp(DisasContext *s, target_ulong eip)
+{
+    gen_jmp_tb(s, eip, 0);
+}
+
+static void gen_movtl_T0_im(target_ulong val)
+{
+#ifdef TARGET_X86_64
+    if ((int32_t)val == val) {
+        gen_op_movl_T0_im(val);
+    } else {
+        gen_op_movq_T0_im64(val >> 32, val);
+    }
+#else
+    gen_op_movl_T0_im(val);
+#endif
+}
+
+static void gen_movtl_T1_im(target_ulong val)
+{
+#ifdef TARGET_X86_64
+    if ((int32_t)val == val) {
+        gen_op_movl_T1_im(val);
+    } else {
+        gen_op_movq_T1_im64(val >> 32, val);
+    }
+#else
+    gen_op_movl_T1_im(val);
+#endif
+}
+
+static void gen_add_A0_im(DisasContext *s, int val)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s))
+        gen_op_addq_A0_im(val);
+    else
+#endif
+        gen_op_addl_A0_im(val);
+}
+
+static GenOpFunc1 *gen_ldq_env_A0[3] = {
+    gen_op_ldq_raw_env_A0,
+#ifndef CONFIG_USER_ONLY
+    gen_op_ldq_kernel_env_A0,
+    gen_op_ldq_user_env_A0,
+#endif
+};
+
+static GenOpFunc1 *gen_stq_env_A0[3] = {
+    gen_op_stq_raw_env_A0,
+#ifndef CONFIG_USER_ONLY
+    gen_op_stq_kernel_env_A0,
+    gen_op_stq_user_env_A0,
+#endif
+};
+
+static GenOpFunc1 *gen_ldo_env_A0[3] = {
+    gen_op_ldo_raw_env_A0,
+#ifndef CONFIG_USER_ONLY
+    gen_op_ldo_kernel_env_A0,
+    gen_op_ldo_user_env_A0,
+#endif
+};
+
+static GenOpFunc1 *gen_sto_env_A0[3] = {
+    gen_op_sto_raw_env_A0,
+#ifndef CONFIG_USER_ONLY
+    gen_op_sto_kernel_env_A0,
+    gen_op_sto_user_env_A0,
+#endif
+};
+
+#define SSE_SPECIAL ((GenOpFunc2 *)1)
+
+#define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm }
+#define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \
+                     gen_op_ ## x ## ss, gen_op_ ## x ## sd, }
+
+static GenOpFunc2 *sse_op_table1[256][4] = {
+    /* pure SSE operations */
+    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
+    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
+    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
+    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
+    [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm },
+    [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm },
+    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
+    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
+
+    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
+    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
+    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
+    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL },  /* movntps, movntpd */
+    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
+    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
+    [0x2e] = { gen_op_ucomiss, gen_op_ucomisd },
+    [0x2f] = { gen_op_comiss, gen_op_comisd },
+    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
+    [0x51] = SSE_FOP(sqrt),
+    [0x52] = { gen_op_rsqrtps, NULL, gen_op_rsqrtss, NULL },
+    [0x53] = { gen_op_rcpps, NULL, gen_op_rcpss, NULL },
+    [0x54] = { gen_op_pand_xmm, gen_op_pand_xmm }, /* andps, andpd */
+    [0x55] = { gen_op_pandn_xmm, gen_op_pandn_xmm }, /* andnps, andnpd */
+    [0x56] = { gen_op_por_xmm, gen_op_por_xmm }, /* orps, orpd */
+    [0x57] = { gen_op_pxor_xmm, gen_op_pxor_xmm }, /* xorps, xorpd */
+    [0x58] = SSE_FOP(add),
+    [0x59] = SSE_FOP(mul),
+    [0x5a] = { gen_op_cvtps2pd, gen_op_cvtpd2ps,
+               gen_op_cvtss2sd, gen_op_cvtsd2ss },
+    [0x5b] = { gen_op_cvtdq2ps, gen_op_cvtps2dq, gen_op_cvttps2dq },
+    [0x5c] = SSE_FOP(sub),
+    [0x5d] = SSE_FOP(min),
+    [0x5e] = SSE_FOP(div),
+    [0x5f] = SSE_FOP(max),
+
+    [0xc2] = SSE_FOP(cmpeq),
+    [0xc6] = { (GenOpFunc2 *)gen_op_shufps, (GenOpFunc2 *)gen_op_shufpd },
+
+    /* MMX ops and their SSE extensions */
+    [0x60] = MMX_OP2(punpcklbw),
+    [0x61] = MMX_OP2(punpcklwd),
+    [0x62] = MMX_OP2(punpckldq),
+    [0x63] = MMX_OP2(packsswb),
+    [0x64] = MMX_OP2(pcmpgtb),
+    [0x65] = MMX_OP2(pcmpgtw),
+    [0x66] = MMX_OP2(pcmpgtl),
+    [0x67] = MMX_OP2(packuswb),
+    [0x68] = MMX_OP2(punpckhbw),
+    [0x69] = MMX_OP2(punpckhwd),
+    [0x6a] = MMX_OP2(punpckhdq),
+    [0x6b] = MMX_OP2(packssdw),
+    [0x6c] = { NULL, gen_op_punpcklqdq_xmm },
+    [0x6d] = { NULL, gen_op_punpckhqdq_xmm },
+    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
+    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
+    [0x70] = { (GenOpFunc2 *)gen_op_pshufw_mmx,
+               (GenOpFunc2 *)gen_op_pshufd_xmm,
+               (GenOpFunc2 *)gen_op_pshufhw_xmm,
+               (GenOpFunc2 *)gen_op_pshuflw_xmm },
+    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
+    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
+    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
+    [0x74] = MMX_OP2(pcmpeqb),
+    [0x75] = MMX_OP2(pcmpeqw),
+    [0x76] = MMX_OP2(pcmpeql),
+    [0x77] = { SSE_SPECIAL }, /* emms */
+    [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps },
+    [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps },
+    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
+    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
+    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
+    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
+    [0xd0] = { NULL, gen_op_addsubpd, NULL, gen_op_addsubps },
+    [0xd1] = MMX_OP2(psrlw),
+    [0xd2] = MMX_OP2(psrld),
+    [0xd3] = MMX_OP2(psrlq),
+    [0xd4] = MMX_OP2(paddq),
+    [0xd5] = MMX_OP2(pmullw),
+    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
+    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
+    [0xd8] = MMX_OP2(psubusb),
+    [0xd9] = MMX_OP2(psubusw),
+    [0xda] = MMX_OP2(pminub),
+    [0xdb] = MMX_OP2(pand),
+    [0xdc] = MMX_OP2(paddusb),
+    [0xdd] = MMX_OP2(paddusw),
+    [0xde] = MMX_OP2(pmaxub),
+    [0xdf] = MMX_OP2(pandn),
+    [0xe0] = MMX_OP2(pavgb),
+    [0xe1] = MMX_OP2(psraw),
+    [0xe2] = MMX_OP2(psrad),
+    [0xe3] = MMX_OP2(pavgw),
+    [0xe4] = MMX_OP2(pmulhuw),
+    [0xe5] = MMX_OP2(pmulhw),
+    [0xe6] = { NULL, gen_op_cvttpd2dq, gen_op_cvtdq2pd, gen_op_cvtpd2dq },
+    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
+    [0xe8] = MMX_OP2(psubsb),
+    [0xe9] = MMX_OP2(psubsw),
+    [0xea] = MMX_OP2(pminsw),
+    [0xeb] = MMX_OP2(por),
+    [0xec] = MMX_OP2(paddsb),
+    [0xed] = MMX_OP2(paddsw),
+    [0xee] = MMX_OP2(pmaxsw),
+    [0xef] = MMX_OP2(pxor),
+    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
+    [0xf1] = MMX_OP2(psllw),
+    [0xf2] = MMX_OP2(pslld),
+    [0xf3] = MMX_OP2(psllq),
+    [0xf4] = MMX_OP2(pmuludq),
+    [0xf5] = MMX_OP2(pmaddwd),
+    [0xf6] = MMX_OP2(psadbw),
+    [0xf7] = MMX_OP2(maskmov),
+    [0xf8] = MMX_OP2(psubb),
+    [0xf9] = MMX_OP2(psubw),
+    [0xfa] = MMX_OP2(psubl),
+    [0xfb] = MMX_OP2(psubq),
+    [0xfc] = MMX_OP2(paddb),
+    [0xfd] = MMX_OP2(paddw),
+    [0xfe] = MMX_OP2(paddl),
+};
+
+static GenOpFunc2 *sse_op_table2[3 * 8][2] = {
+    [0 + 2] = MMX_OP2(psrlw),
+    [0 + 4] = MMX_OP2(psraw),
+    [0 + 6] = MMX_OP2(psllw),
+    [8 + 2] = MMX_OP2(psrld),
+    [8 + 4] = MMX_OP2(psrad),
+    [8 + 6] = MMX_OP2(pslld),
+    [16 + 2] = MMX_OP2(psrlq),
+    [16 + 3] = { NULL, gen_op_psrldq_xmm },
+    [16 + 6] = MMX_OP2(psllq),
+    [16 + 7] = { NULL, gen_op_pslldq_xmm },
+};
+
+static GenOpFunc1 *sse_op_table3[4 * 3] = {
+    gen_op_cvtsi2ss,
+    gen_op_cvtsi2sd,
+    X86_64_ONLY(gen_op_cvtsq2ss),
+    X86_64_ONLY(gen_op_cvtsq2sd),
+
+    gen_op_cvttss2si,
+    gen_op_cvttsd2si,
+    X86_64_ONLY(gen_op_cvttss2sq),
+    X86_64_ONLY(gen_op_cvttsd2sq),
+
+    gen_op_cvtss2si,
+    gen_op_cvtsd2si,
+    X86_64_ONLY(gen_op_cvtss2sq),
+    X86_64_ONLY(gen_op_cvtsd2sq),
+};
+
+static GenOpFunc2 *sse_op_table4[8][4] = {
+    SSE_FOP(cmpeq),
+    SSE_FOP(cmplt),
+    SSE_FOP(cmple),
+    SSE_FOP(cmpunord),
+    SSE_FOP(cmpneq),
+    SSE_FOP(cmpnlt),
+    SSE_FOP(cmpnle),
+    SSE_FOP(cmpord),
+};
+
+static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
+{
+    int b1, op1_offset, op2_offset, is_xmm, val, ot;
+    int modrm, mod, rm, reg, reg_addr, offset_addr;
+    GenOpFunc2 *sse_op2;
+    GenOpFunc3 *sse_op3;
+
+    b &= 0xff;
+    if (s->prefix & PREFIX_DATA)
+        b1 = 1;
+    else if (s->prefix & PREFIX_REPZ)
+        b1 = 2;
+    else if (s->prefix & PREFIX_REPNZ)
+        b1 = 3;
+    else
+        b1 = 0;
+    sse_op2 = sse_op_table1[b][b1];
+    if (!sse_op2)
+        goto illegal_op;
+    if (b <= 0x5f || b == 0xc6 || b == 0xc2) {
+        is_xmm = 1;
+    } else {
+        if (b1 == 0) {
+            /* MMX case */
+            is_xmm = 0;
+        } else {
+            is_xmm = 1;
+        }
+    }
+    /* simple MMX/SSE operation */
+    if (s->flags & HF_TS_MASK) {
+        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+        return;
+    }
+    if (s->flags & HF_EM_MASK) {
+    illegal_op:
+        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+        return;
+    }
+    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
+        goto illegal_op;
+    if (b == 0x77) {
+        /* emms */
+        gen_op_emms();
+        return;
+    }
+    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
+       the static cpu state) */
+    if (!is_xmm) {
+        gen_op_enter_mmx();
+    }
+
+    modrm = ldub_code(s->pc++);
+    reg = ((modrm >> 3) & 7);
+    if (is_xmm)
+        reg |= rex_r;
+    mod = (modrm >> 6) & 3;
+    if (sse_op2 == SSE_SPECIAL) {
+        b |= (b1 << 8);
+        switch(b) {
+        case 0x0e7: /* movntq */
+            if (mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,fpregs[reg].mmx));
+            break;
+        case 0x1e7: /* movntdq */
+        case 0x02b: /* movntps */
+        case 0x12b: /* movntps */
+        case 0x3f0: /* lddqu */
+            if (mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
+            break;
+        case 0x6e: /* movd mm, ea */
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                gen_op_movq_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+            } else
+#endif
+            {
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+                gen_op_movl_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+            }
+            break;
+        case 0x16e: /* movd xmm, ea */
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                gen_op_movq_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+            } else
+#endif
+            {
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+                gen_op_movl_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+            }
+            break;
+        case 0x6f: /* movq mm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,fpregs[reg].mmx));
+            } else {
+                rm = (modrm & 7);
+                gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx),
+                            offsetof(CPUX86State,fpregs[rm].mmx));
+            }
+            break;
+        case 0x010: /* movups */
+        case 0x110: /* movupd */
+        case 0x028: /* movaps */
+        case 0x128: /* movapd */
+        case 0x16f: /* movdqa xmm, ea */
+        case 0x26f: /* movdqu xmm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
+                            offsetof(CPUX86State,xmm_regs[rm]));
+            }
+            break;
+        case 0x210: /* movss xmm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+                gen_op_movl_T0_0();
+                gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
+                gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+                gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+            }
+            break;
+        case 0x310: /* movsd xmm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_op_movl_T0_0();
+                gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+                gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            break;
+        case 0x012: /* movlps */
+        case 0x112: /* movlpd */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                /* movhlps */
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+            }
+            break;
+        case 0x212: /* movsldup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
+            }
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+            break;
+        case 0x312: /* movddup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            break;
+        case 0x016: /* movhps */
+        case 0x116: /* movhpd */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            } else {
+                /* movlhps */
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            break;
+        case 0x216: /* movshdup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
+            }
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+            break;
+        case 0x7e: /* movd ea, mm */
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_op_movq_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+            } else
+#endif
+            {
+                gen_op_movl_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+            }
+            break;
+        case 0x17e: /* movd ea, xmm */
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_op_movq_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+            } else
+#endif
+            {
+                gen_op_movl_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+            }
+            break;
+        case 0x27e: /* movq xmm, ea */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            break;
+        case 0x7f: /* movq ea, mm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,fpregs[reg].mmx));
+            } else {
+                rm = (modrm & 7);
+                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+                            offsetof(CPUX86State,fpregs[reg].mmx));
+            }
+            break;
+        case 0x011: /* movups */
+        case 0x111: /* movupd */
+        case 0x029: /* movaps */
+        case 0x129: /* movapd */
+        case 0x17f: /* movdqa ea, xmm */
+        case 0x27f: /* movdqu ea, xmm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
+                            offsetof(CPUX86State,xmm_regs[reg]));
+            }
+            break;
+        case 0x211: /* movss ea, xmm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_movl_T0_env(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+                gen_op_st_T0_A0[OT_LONG + s->mem_index]();
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+            }
+            break;
+        case 0x311: /* movsd ea, xmm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            }
+            break;
+        case 0x013: /* movlps */
+        case 0x113: /* movlpd */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                goto illegal_op;
+            }
+            break;
+        case 0x017: /* movhps */
+        case 0x117: /* movhpd */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            } else {
+                goto illegal_op;
+            }
+            break;
+        case 0x71: /* shift mm, im */
+        case 0x72:
+        case 0x73:
+        case 0x171: /* shift xmm, im */
+        case 0x172:
+        case 0x173:
+            val = ldub_code(s->pc++);
+            if (is_xmm) {
+                gen_op_movl_T0_im(val);
+                gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                gen_op_movl_T0_0();
+                gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(1)));
+                op1_offset = offsetof(CPUX86State,xmm_t0);
+            } else {
+                gen_op_movl_T0_im(val);
+                gen_op_movl_env_T0(offsetof(CPUX86State,mmx_t0.MMX_L(0)));
+                gen_op_movl_T0_0();
+                gen_op_movl_env_T0(offsetof(CPUX86State,mmx_t0.MMX_L(1)));
+                op1_offset = offsetof(CPUX86State,mmx_t0);
+            }
+            sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
+            if (!sse_op2)
+                goto illegal_op;
+            if (is_xmm) {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            } else {
+                rm = (modrm & 7);
+                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+            }
+            sse_op2(op2_offset, op1_offset);
+            break;
+        case 0x050: /* movmskps */
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_movmskps(offsetof(CPUX86State,xmm_regs[rm]));
+            gen_op_mov_reg_T0[OT_LONG][reg]();
+            break;
+        case 0x150: /* movmskpd */
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_movmskpd(offsetof(CPUX86State,xmm_regs[rm]));
+            gen_op_mov_reg_T0[OT_LONG][reg]();
+            break;
+        case 0x02a: /* cvtpi2ps */
+        case 0x12a: /* cvtpi2pd */
+            gen_op_enter_mmx();
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                op2_offset = offsetof(CPUX86State,mmx_t0);
+                gen_ldq_env_A0[s->mem_index >> 2](op2_offset);
+            } else {
+                rm = (modrm & 7);
+                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+            }
+            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+            switch(b >> 8) {
+            case 0x0:
+                gen_op_cvtpi2ps(op1_offset, op2_offset);
+                break;
+            default:
+            case 0x1:
+                gen_op_cvtpi2pd(op1_offset, op2_offset);
+                break;
+            }
+            break;
+        case 0x22a: /* cvtsi2ss */
+        case 0x32a: /* cvtsi2sd */
+            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+            sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)](op1_offset);
+            break;
+        case 0x02c: /* cvttps2pi */
+        case 0x12c: /* cvttpd2pi */
+        case 0x02d: /* cvtps2pi */
+        case 0x12d: /* cvtpd2pi */
+            gen_op_enter_mmx();
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                op2_offset = offsetof(CPUX86State,xmm_t0);
+                gen_ldo_env_A0[s->mem_index >> 2](op2_offset);
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            }
+            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
+            switch(b) {
+            case 0x02c:
+                gen_op_cvttps2pi(op1_offset, op2_offset);
+                break;
+            case 0x12c:
+                gen_op_cvttpd2pi(op1_offset, op2_offset);
+                break;
+            case 0x02d:
+                gen_op_cvtps2pi(op1_offset, op2_offset);
+                break;
+            case 0x12d:
+                gen_op_cvtpd2pi(op1_offset, op2_offset);
+                break;
+            }
+            break;
+        case 0x22c: /* cvttss2si */
+        case 0x32c: /* cvttsd2si */
+        case 0x22d: /* cvtss2si */
+        case 0x32d: /* cvtsd2si */
+            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                if ((b >> 8) & 1) {
+                    gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
+                } else {
+                    gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                    gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                }
+                op2_offset = offsetof(CPUX86State,xmm_t0);
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            }
+            sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
+                          (b & 1) * 4](op2_offset);
+            gen_op_mov_reg_T0[ot][reg]();
+            break;
+        case 0xc4: /* pinsrw */
+        case 0x1c4:
+            s->rip_offset = 1;
+            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            val = ldub_code(s->pc++);
+            if (b1) {
+                val &= 7;
+                gen_op_pinsrw_xmm(offsetof(CPUX86State,xmm_regs[reg]), val);
+            } else {
+                val &= 3;
+                gen_op_pinsrw_mmx(offsetof(CPUX86State,fpregs[reg].mmx), val);
+            }
+            break;
+        case 0xc5: /* pextrw */
+        case 0x1c5:
+            if (mod != 3)
+                goto illegal_op;
+            val = ldub_code(s->pc++);
+            if (b1) {
+                val &= 7;
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_pextrw_xmm(offsetof(CPUX86State,xmm_regs[rm]), val);
+            } else {
+                val &= 3;
+                rm = (modrm & 7);
+                gen_op_pextrw_mmx(offsetof(CPUX86State,fpregs[rm].mmx), val);
+            }
+            reg = ((modrm >> 3) & 7) | rex_r;
+            gen_op_mov_reg_T0[OT_LONG][reg]();
+            break;
+        case 0x1d6: /* movq ea, xmm */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_stq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+            }
+            break;
+        case 0x2d6: /* movq2dq */
+            gen_op_enter_mmx();
+            rm = (modrm & 7);
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                        offsetof(CPUX86State,fpregs[rm].mmx));
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            break;
+        case 0x3d6: /* movdq2q */
+            gen_op_enter_mmx();
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
+                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            break;
+        case 0xd7: /* pmovmskb */
+        case 0x1d7:
+            if (mod != 3)
+                goto illegal_op;
+            if (b1) {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_pmovmskb_xmm(offsetof(CPUX86State,xmm_regs[rm]));
+            } else {
+                rm = (modrm & 7);
+                gen_op_pmovmskb_mmx(offsetof(CPUX86State,fpregs[rm].mmx));
+            }
+            reg = ((modrm >> 3) & 7) | rex_r;
+            gen_op_mov_reg_T0[OT_LONG][reg]();
+            break;
+        default:
+            goto illegal_op;
+        }
+    } else {
+        /* generic MMX or SSE operation */
+        switch(b) {
+        case 0xf7:
+            /* maskmov : we must prepare A0 */
+            if (mod != 3)
+                goto illegal_op;
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                gen_op_movq_A0_reg[R_EDI]();
+            } else
+#endif
+            {
+                gen_op_movl_A0_reg[R_EDI]();
+                if (s->aflag == 0)
+                    gen_op_andl_A0_ffff();
+            }
+            gen_add_A0_ds_seg(s);
+            break;
+        case 0x70: /* pshufx insn */
+        case 0xc6: /* pshufx insn */
+        case 0xc2: /* compare insns */
+            s->rip_offset = 1;
+            break;
+        default:
+            break;
+        }
+        if (is_xmm) {
+            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                op2_offset = offsetof(CPUX86State,xmm_t0);
+                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
+                                b == 0xc2)) {
+                    /* specific case for SSE single instructions */
+                    if (b1 == 2) {
+                        /* 32 bit access */
+                        gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                        gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                    } else {
+                        /* 64 bit access */
+                        gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_D(0)));
+                    }
+                } else {
+                    gen_ldo_env_A0[s->mem_index >> 2](op2_offset);
+                }
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            }
+        } else {
+            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                op2_offset = offsetof(CPUX86State,mmx_t0);
+                gen_ldq_env_A0[s->mem_index >> 2](op2_offset);
+            } else {
+                rm = (modrm & 7);
+                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+            }
+        }
+        switch(b) {
+        case 0x70: /* pshufx insn */
+        case 0xc6: /* pshufx insn */
+            val = ldub_code(s->pc++);
+            sse_op3 = (GenOpFunc3 *)sse_op2;
+            sse_op3(op1_offset, op2_offset, val);
+            break;
+        case 0xc2:
+            /* compare insns */
+            val = ldub_code(s->pc++);
+            if (val >= 8)
+                goto illegal_op;
+            sse_op2 = sse_op_table4[val][b1];
+            sse_op2(op1_offset, op2_offset);
+            break;
+        default:
+            sse_op2(op1_offset, op2_offset);
+            break;
+        }
+        if (b == 0x2e || b == 0x2f) {
+            s->cc_op = CC_OP_EFLAGS;
+        }
+    }
+}
+
+#ifdef VBOX
+/* Checks if it's an invalid lock sequence. Only a few instructions
+   can be used together with the lock prefix and of those only the
+   form that write a memory operand. So, this is kind of annoying
+   work to do...
+   The AMD manual lists the following instructions.
+        ADC
+        ADD
+        AND
+        BTC
+        BTR
+        BTS
+        CMPXCHG
+        CMPXCHG8B
+        CMPXCHG16B
+        DEC
+        INC
+        NEG
+        NOT
+        OR
+        SBB
+        SUB
+        XADD
+        XCHG
+        XOR */
+static bool is_invalid_lock_sequence(DisasContext *s, target_ulong pc_start, int b)
+{
+    target_ulong pc = s->pc;
+    int modrm, mod, op;
+
+    /* X={8,16,32,64}  Y={16,32,64} */
+    switch (b)
+    {
+        /* /2: ADC reg/memX, immX */
+        /* /0: ADD reg/memX, immX */
+        /* /4: AND reg/memX, immX */
+        /* /1: OR  reg/memX, immX */
+        /* /3: SBB reg/memX, immX */
+        /* /5: SUB reg/memX, immX */
+        /* /6: XOR reg/memX, immX */
+        case 0x80:
+        case 0x81:
+        case 0x83:
+            modrm = ldub_code(pc++);
+            op = (modrm >> 3) & 7;
+            if (op == 7) /* /7: CMP */
+                break;
+            mod = (modrm >> 6) & 3;
+            if (mod == 3) /* register destination */
+                break;
+            return false;
+
+        case 0x10: /* /r: ADC reg/mem8, reg8 */
+        case 0x11: /* /r: ADC reg/memX, regY */
+        case 0x00: /* /r: ADD reg/mem8, reg8 */
+        case 0x01: /* /r: ADD reg/memX, regY */
+        case 0x20: /* /r: AND reg/mem8, reg8 */
+        case 0x21: /* /r: AND reg/memY, regY */
+        case 0x08: /* /r: OR  reg/mem8, reg8 */
+        case 0x09: /* /r: OR  reg/memY, regY */
+        case 0x18: /* /r: SBB reg/mem8, reg8 */
+        case 0x19: /* /r: SBB reg/memY, regY */
+        case 0x28: /* /r: SUB reg/mem8, reg8 */
+        case 0x29: /* /r: SUB reg/memY, regY */
+        case 0x86: /* /r: XCHG reg/mem8, reg8 or XCHG reg8, reg/mem8 */
+        case 0x87: /* /r: XCHG reg/memY, regY or XCHG regY, reg/memY */
+        case 0x30: /* /r: XOR reg/mem8, reg8 */
+        case 0x31: /* /r: XOR reg/memY, regY */
+            modrm = ldub_code(pc++);
+            mod = (modrm >> 6) & 3;
+            if (mod == 3) /* register destination */
+                break;
+            return false;
+
+        /* /1: DEC reg/memX */
+        /* /0: INC reg/memX */
+        case 0xfe:
+        case 0xff:
+            modrm = ldub_code(pc++);
+            mod = (modrm >> 6) & 3;
+            if (mod == 3) /* register destination */
+                break;
+            return false;
+
+        /* /3: NEG reg/memX */
+        /* /2: NOT reg/memX */
+        case 0xf6:
+        case 0xf7:
+            modrm = ldub_code(pc++);
+            mod = (modrm >> 6) & 3;
+            if (mod == 3) /* register destination */
+                break;
+            return false;
+
+        case 0x0f:
+            b = ldub_code(pc++);
+            switch (b)
+            {
+                /* /7: BTC reg/memY, imm8 */
+                /* /6: BTR reg/memY, imm8 */
+                /* /5: BTS reg/memY, imm8 */
+                case 0xba:
+                    modrm = ldub_code(pc++);
+                    op = (modrm >> 3) & 7;
+                    if (op < 5)
+                        break;
+                    mod = (modrm >> 6) & 3;
+                    if (mod == 3) /* register destination */
+                        break;
+                    return false;
+
+                case 0xbb: /* /r: BTC reg/memY, regY */
+                case 0xb3: /* /r: BTR reg/memY, regY */
+                case 0xab: /* /r: BTS reg/memY, regY */
+                case 0xb0: /* /r: CMPXCHG reg/mem8, reg8 */
+                case 0xb1: /* /r: CMPXCHG reg/memY, regY */
+                case 0xc0: /* /r: XADD reg/mem8, reg8 */
+                case 0xc1: /* /r: XADD reg/memY, regY */
+                    modrm = ldub_code(pc++);
+                    mod = (modrm >> 6) & 3;
+                    if (mod == 3) /* register destination */
+                        break;
+                    return false;
+
+                /* /1: CMPXCHG8B mem64 or CMPXCHG16B mem128 */
+                case 0xc7:
+                    modrm = ldub_code(pc++);
+                    op = (modrm >> 3) & 7;
+                    if (op != 1)
+                        break;
+                    return false;
+            }
+            break;
+    }
+
+    /* illegal sequence. The s->pc is past the lock prefix and that
+       is sufficient for the TB, I think. */
+    Log(("illegal lock sequence %VGv (b=%#x)\n", pc_start, b));
+    return true;
+}
+#endif /* VBOX */
+
+
+/* convert one instruction. s->is_jmp is set if the translation must
+   be stopped. Return the next pc value */
+static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
+{
+    int b, prefixes, aflag, dflag;
+    int shift, ot;
+    int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
+    target_ulong next_eip, tval;
+    int rex_w, rex_r;
+
+    s->pc = pc_start;
+    prefixes = 0;
+    aflag = s->code32;
+    dflag = s->code32;
+    s->override = -1;
+    rex_w = -1;
+    rex_r = 0;
+#ifdef TARGET_X86_64
+    s->rex_x = 0;
+    s->rex_b = 0;
+    x86_64_hregs = 0;
+#endif
+    s->rip_offset = 0; /* for relative ip address */
+
+#ifdef VBOX
+    /* Always update EIP. Otherwise one must be very careful with generated code that can raise exceptions. */
+    gen_update_eip(pc_start - s->cs_base);
+#endif
+
+ next_byte:
+    b = ldub_code(s->pc);
+    s->pc++;
+    /* check prefixes */
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        switch (b) {
+        case 0xf3:
+            prefixes |= PREFIX_REPZ;
+            goto next_byte;
+        case 0xf2:
+            prefixes |= PREFIX_REPNZ;
+            goto next_byte;
+        case 0xf0:
+            prefixes |= PREFIX_LOCK;
+            goto next_byte;
+        case 0x2e:
+            s->override = R_CS;
+            goto next_byte;
+        case 0x36:
+            s->override = R_SS;
+            goto next_byte;
+        case 0x3e:
+            s->override = R_DS;
+            goto next_byte;
+        case 0x26:
+            s->override = R_ES;
+            goto next_byte;
+        case 0x64:
+            s->override = R_FS;
+            goto next_byte;
+        case 0x65:
+            s->override = R_GS;
+            goto next_byte;
+        case 0x66:
+            prefixes |= PREFIX_DATA;
+            goto next_byte;
+        case 0x67:
+            prefixes |= PREFIX_ADR;
+            goto next_byte;
+        case 0x40 ... 0x4f:
+            /* REX prefix */
+            rex_w = (b >> 3) & 1;
+            rex_r = (b & 0x4) << 1;
+            s->rex_x = (b & 0x2) << 2;
+            REX_B(s) = (b & 0x1) << 3;
+            x86_64_hregs = 1; /* select uniform byte register addressing */
+            goto next_byte;
+        }
+        if (rex_w == 1) {
+            /* 0x66 is ignored if rex.w is set */
+            dflag = 2;
+        } else {
+            if (prefixes & PREFIX_DATA)
+                dflag ^= 1;
+        }
+        if (!(prefixes & PREFIX_ADR))
+            aflag = 2;
+    } else
+#endif
+    {
+        switch (b) {
+        case 0xf3:
+            prefixes |= PREFIX_REPZ;
+            goto next_byte;
+        case 0xf2:
+            prefixes |= PREFIX_REPNZ;
+            goto next_byte;
+        case 0xf0:
+            prefixes |= PREFIX_LOCK;
+            goto next_byte;
+        case 0x2e:
+            s->override = R_CS;
+            goto next_byte;
+        case 0x36:
+            s->override = R_SS;
+            goto next_byte;
+        case 0x3e:
+            s->override = R_DS;
+            goto next_byte;
+        case 0x26:
+            s->override = R_ES;
+            goto next_byte;
+        case 0x64:
+            s->override = R_FS;
+            goto next_byte;
+        case 0x65:
+            s->override = R_GS;
+            goto next_byte;
+        case 0x66:
+            prefixes |= PREFIX_DATA;
+            goto next_byte;
+        case 0x67:
+            prefixes |= PREFIX_ADR;
+            goto next_byte;
+        }
+        if (prefixes & PREFIX_DATA)
+            dflag ^= 1;
+        if (prefixes & PREFIX_ADR)
+            aflag ^= 1;
+    }
+
+    s->prefix = prefixes;
+    s->aflag = aflag;
+    s->dflag = dflag;
+
+    /* lock generation */
+#ifndef VBOX
+    if (prefixes & PREFIX_LOCK)
+        gen_op_lock();
+#else /* VBOX */
+    if (prefixes & PREFIX_LOCK) {
+        if (is_invalid_lock_sequence(s, pc_start, b)) {
+            gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+            return s->pc;
+        }
+        gen_op_lock();
+    }
+#endif /* VBOX */
+
+    /* now check op code */
+ reswitch:
+    switch(b) {
+    case 0x0f:
+        /**************************/
+        /* extended op code */
+        b = ldub_code(s->pc++) | 0x100;
+        goto reswitch;
+
+        /**************************/
+        /* arith & logic */
+    case 0x00 ... 0x05:
+    case 0x08 ... 0x0d:
+    case 0x10 ... 0x15:
+    case 0x18 ... 0x1d:
+    case 0x20 ... 0x25:
+    case 0x28 ... 0x2d:
+    case 0x30 ... 0x35:
+    case 0x38 ... 0x3d:
+        {
+            int op, f, val;
+            op = (b >> 3) & 7;
+            f = (b >> 1) & 3;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+
+            switch(f) {
+            case 0: /* OP Ev, Gv */
+                modrm = ldub_code(s->pc++);
+                reg = ((modrm >> 3) & 7) | rex_r;
+                mod = (modrm >> 6) & 3;
+                rm = (modrm & 7) | REX_B(s);
+                if (mod != 3) {
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    opreg = OR_TMP0;
+                } else if (op == OP_XORL && rm == reg) {
+                xor_zero:
+                    /* xor reg, reg optimisation */
+                    gen_op_movl_T0_0();
+                    s->cc_op = CC_OP_LOGICB + ot;
+                    gen_op_mov_reg_T0[ot][reg]();
+                    gen_op_update1_cc();
+                    break;
+                } else {
+                    opreg = rm;
+                }
+                gen_op_mov_TN_reg[ot][1][reg]();
+                gen_op(s, op, ot, opreg);
+                break;
+            case 1: /* OP Gv, Ev */
+                modrm = ldub_code(s->pc++);
+                mod = (modrm >> 6) & 3;
+                reg = ((modrm >> 3) & 7) | rex_r;
+                rm = (modrm & 7) | REX_B(s);
+                if (mod != 3) {
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_op_ld_T1_A0[ot + s->mem_index]();
+                } else if (op == OP_XORL && rm == reg) {
+                    goto xor_zero;
+                } else {
+                    gen_op_mov_TN_reg[ot][1][rm]();
+                }
+                gen_op(s, op, ot, reg);
+                break;
+            case 2: /* OP A, Iv */
+                val = insn_get(s, ot);
+                gen_op_movl_T1_im(val);
+                gen_op(s, op, ot, OR_EAX);
+                break;
+            }
+        }
+        break;
+
+    case 0x80: /* GRP1 */
+    case 0x81:
+    case 0x82:
+    case 0x83:
+        {
+            int val;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+
+            modrm = ldub_code(s->pc++);
+            mod = (modrm >> 6) & 3;
+            rm = (modrm & 7) | REX_B(s);
+            op = (modrm >> 3) & 7;
+
+            if (mod != 3) {
+                if (b == 0x83)
+                    s->rip_offset = 1;
+                else
+                    s->rip_offset = insn_const_size(ot);
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                opreg = OR_TMP0;
+            } else {
+                opreg = rm;
+            }
+
+            switch(b) {
+            default:
+            case 0x80:
+            case 0x81:
+            case 0x82:
+                val = insn_get(s, ot);
+                break;
+            case 0x83:
+                val = (int8_t)insn_get(s, OT_BYTE);
+                break;
+            }
+            gen_op_movl_T1_im(val);
+            gen_op(s, op, ot, opreg);
+        }
+        break;
+
+        /**************************/
+        /* inc, dec, and other misc arith */
+    case 0x40 ... 0x47: /* inc Gv */
+        ot = dflag ? OT_LONG : OT_WORD;
+        gen_inc(s, ot, OR_EAX + (b & 7), 1);
+        break;
+    case 0x48 ... 0x4f: /* dec Gv */
+        ot = dflag ? OT_LONG : OT_WORD;
+        gen_inc(s, ot, OR_EAX + (b & 7), -1);
+        break;
+    case 0xf6: /* GRP3 */
+    case 0xf7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        op = (modrm >> 3) & 7;
+        if (mod != 3) {
+            if (op == 0)
+                s->rip_offset = insn_const_size(ot);
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T0_A0[ot + s->mem_index]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+        }
+
+        switch(op) {
+        case 0: /* test */
+            val = insn_get(s, ot);
+            gen_op_movl_T1_im(val);
+            gen_op_testl_T0_T1_cc();
+            s->cc_op = CC_OP_LOGICB + ot;
+            break;
+        case 2: /* not */
+            gen_op_notl_T0();
+            if (mod != 3) {
+                gen_op_st_T0_A0[ot + s->mem_index]();
+            } else {
+                gen_op_mov_reg_T0[ot][rm]();
+            }
+            break;
+        case 3: /* neg */
+            gen_op_negl_T0();
+            if (mod != 3) {
+                gen_op_st_T0_A0[ot + s->mem_index]();
+            } else {
+                gen_op_mov_reg_T0[ot][rm]();
+            }
+            gen_op_update_neg_cc();
+            s->cc_op = CC_OP_SUBB + ot;
+            break;
+        case 4: /* mul */
+            switch(ot) {
+            case OT_BYTE:
+                gen_op_mulb_AL_T0();
+                s->cc_op = CC_OP_MULB;
+                break;
+            case OT_WORD:
+                gen_op_mulw_AX_T0();
+                s->cc_op = CC_OP_MULW;
+                break;
+            default:
+            case OT_LONG:
+                gen_op_mull_EAX_T0();
+                s->cc_op = CC_OP_MULL;
+                break;
+#ifdef TARGET_X86_64
+            case OT_QUAD:
+                gen_op_mulq_EAX_T0();
+                s->cc_op = CC_OP_MULQ;
+                break;
+#endif
+            }
+            break;
+        case 5: /* imul */
+            switch(ot) {
+            case OT_BYTE:
+                gen_op_imulb_AL_T0();
+                s->cc_op = CC_OP_MULB;
+                break;
+            case OT_WORD:
+                gen_op_imulw_AX_T0();
+                s->cc_op = CC_OP_MULW;
+                break;
+            default:
+            case OT_LONG:
+                gen_op_imull_EAX_T0();
+                s->cc_op = CC_OP_MULL;
+                break;
+#ifdef TARGET_X86_64
+            case OT_QUAD:
+                gen_op_imulq_EAX_T0();
+                s->cc_op = CC_OP_MULQ;
+                break;
+#endif
+            }
+            break;
+        case 6: /* div */
+            switch(ot) {
+            case OT_BYTE:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_divb_AL_T0();
+                break;
+            case OT_WORD:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_divw_AX_T0();
+                break;
+            default:
+            case OT_LONG:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_divl_EAX_T0();
+                break;
+#ifdef TARGET_X86_64
+            case OT_QUAD:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_divq_EAX_T0();
+                break;
+#endif
+            }
+            break;
+        case 7: /* idiv */
+            switch(ot) {
+            case OT_BYTE:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_idivb_AL_T0();
+                break;
+            case OT_WORD:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_idivw_AX_T0();
+                break;
+            default:
+            case OT_LONG:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_idivl_EAX_T0();
+                break;
+#ifdef TARGET_X86_64
+            case OT_QUAD:
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_idivq_EAX_T0();
+                break;
+#endif
+            }
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+
+    case 0xfe: /* GRP4 */
+    case 0xff: /* GRP5 */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        op = (modrm >> 3) & 7;
+        if (op >= 2 && b == 0xfe) {
+            goto illegal_op;
+        }
+        if (CODE64(s)) {
+            if (op == 2 || op == 4) {
+                /* operand size for jumps is 64 bit */
+                ot = OT_QUAD;
+            } else if (op == 3 || op == 5) {
+                /* for call calls, the operand is 16 or 32 bit, even
+                   in long mode */
+                ot = dflag ? OT_LONG : OT_WORD;
+            } else if (op == 6) {
+                /* default push size is 64 bit */
+                ot = dflag ? OT_QUAD : OT_WORD;
+            }
+        }
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            if (op >= 2 && op != 3 && op != 5)
+                gen_op_ld_T0_A0[ot + s->mem_index]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+        }
+
+        switch(op) {
+        case 0: /* inc Ev */
+            if (mod != 3)
+                opreg = OR_TMP0;
+            else
+                opreg = rm;
+            gen_inc(s, ot, opreg, 1);
+            break;
+        case 1: /* dec Ev */
+            if (mod != 3)
+                opreg = OR_TMP0;
+            else
+                opreg = rm;
+            gen_inc(s, ot, opreg, -1);
+            break;
+        case 2: /* call Ev */
+            /* XXX: optimize if memory (no 'and' is necessary) */
+#ifdef VBOX_WITH_CALL_RECORD
+            if (s->record_call)
+                gen_op_record_call();
+#endif
+            if (s->dflag == 0)
+                gen_op_andl_T0_ffff();
+            next_eip = s->pc - s->cs_base;
+            gen_movtl_T1_im(next_eip);
+            gen_push_T1(s);
+            gen_op_jmp_T0();
+            gen_eob(s);
+            break;
+        case 3: /* lcall Ev */
+            gen_op_ld_T1_A0[ot + s->mem_index]();
+            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+            gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
+        do_lcall:
+            if (s->pe && !s->vm86) {
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_lcall_protected_T0_T1(dflag, s->pc - pc_start);
+            } else {
+                gen_op_lcall_real_T0_T1(dflag, s->pc - s->cs_base);
+            }
+            gen_eob(s);
+            break;
+        case 4: /* jmp Ev */
+            if (s->dflag == 0)
+                gen_op_andl_T0_ffff();
+            gen_op_jmp_T0();
+            gen_eob(s);
+            break;
+        case 5: /* ljmp Ev */
+            gen_op_ld_T1_A0[ot + s->mem_index]();
+            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+            gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
+        do_ljmp:
+            if (s->pe && !s->vm86) {
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_ljmp_protected_T0_T1(s->pc - pc_start);
+            } else {
+                gen_op_movl_seg_T0_vm(offsetof(CPUX86State,segs[R_CS]));
+                gen_op_movl_T0_T1();
+                gen_op_jmp_T0();
+            }
+            gen_eob(s);
+            break;
+        case 6: /* push Ev */
+            gen_push_T0(s);
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+
+    case 0x84: /* test Ev, Gv */
+    case 0x85:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        reg = ((modrm >> 3) & 7) | rex_r;
+
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_op_mov_TN_reg[ot][1][reg]();
+        gen_op_testl_T0_T1_cc();
+        s->cc_op = CC_OP_LOGICB + ot;
+        break;
+
+    case 0xa8: /* test eAX, Iv */
+    case 0xa9:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        val = insn_get(s, ot);
+
+        gen_op_mov_TN_reg[ot][0][OR_EAX]();
+        gen_op_movl_T1_im(val);
+        gen_op_testl_T0_T1_cc();
+        s->cc_op = CC_OP_LOGICB + ot;
+        break;
+
+    case 0x98: /* CWDE/CBW */
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            gen_op_movslq_RAX_EAX();
+        } else
+#endif
+        if (dflag == 1)
+            gen_op_movswl_EAX_AX();
+        else
+            gen_op_movsbw_AX_AL();
+        break;
+    case 0x99: /* CDQ/CWD */
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            gen_op_movsqo_RDX_RAX();
+        } else
+#endif
+        if (dflag == 1)
+            gen_op_movslq_EDX_EAX();
+        else
+            gen_op_movswl_DX_AX();
+        break;
+    case 0x1af: /* imul Gv, Ev */
+    case 0x69: /* imul Gv, Ev, I */
+    case 0x6b:
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        if (b == 0x69)
+            s->rip_offset = insn_const_size(ot);
+        else if (b == 0x6b)
+            s->rip_offset = 1;
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        if (b == 0x69) {
+            val = insn_get(s, ot);
+            gen_op_movl_T1_im(val);
+        } else if (b == 0x6b) {
+            val = (int8_t)insn_get(s, OT_BYTE);
+            gen_op_movl_T1_im(val);
+        } else {
+            gen_op_mov_TN_reg[ot][1][reg]();
+        }
+
+#ifdef TARGET_X86_64
+        if (ot == OT_QUAD) {
+            gen_op_imulq_T0_T1();
+        } else
+#endif
+        if (ot == OT_LONG) {
+            gen_op_imull_T0_T1();
+        } else {
+            gen_op_imulw_T0_T1();
+        }
+        gen_op_mov_reg_T0[ot][reg]();
+        s->cc_op = CC_OP_MULB + ot;
+        break;
+    case 0x1c0:
+    case 0x1c1: /* xadd Ev, Gv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        if (mod == 3) {
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_mov_TN_reg[ot][0][reg]();
+            gen_op_mov_TN_reg[ot][1][rm]();
+            gen_op_addl_T0_T1();
+            gen_op_mov_reg_T1[ot][reg]();
+            gen_op_mov_reg_T0[ot][rm]();
+        } else {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_mov_TN_reg[ot][0][reg]();
+            gen_op_ld_T1_A0[ot + s->mem_index]();
+            gen_op_addl_T0_T1();
+            gen_op_st_T0_A0[ot + s->mem_index]();
+            gen_op_mov_reg_T1[ot][reg]();
+        }
+        gen_op_update2_cc();
+        s->cc_op = CC_OP_ADDB + ot;
+        break;
+    case 0x1b0:
+    case 0x1b1: /* cmpxchg Ev, Gv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        gen_op_mov_TN_reg[ot][1][reg]();
+        if (mod == 3) {
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_mov_TN_reg[ot][0][rm]();
+            gen_op_cmpxchg_T0_T1_EAX_cc[ot]();
+            gen_op_mov_reg_T0[ot][rm]();
+        } else {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T0_A0[ot + s->mem_index]();
+            gen_op_cmpxchg_mem_T0_T1_EAX_cc[ot + s->mem_index]();
+        }
+        s->cc_op = CC_OP_SUBB + ot;
+        break;
+    case 0x1c7: /* cmpxchg8b */
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if ((mod == 3) || ((modrm & 0x38) != 0x8))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_op_cmpxchg8b();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+
+        /**************************/
+        /* push/pop */
+    case 0x50 ... 0x57: /* push */
+        gen_op_mov_TN_reg[OT_LONG][0][(b & 7) | REX_B(s)]();
+        gen_push_T0(s);
+        break;
+    case 0x58 ... 0x5f: /* pop */
+        if (CODE64(s)) {
+            ot = dflag ? OT_QUAD : OT_WORD;
+        } else {
+            ot = dflag + OT_WORD;
+        }
+        gen_pop_T0(s);
+        /* NOTE: order is important for pop %sp */
+        gen_pop_update(s);
+        gen_op_mov_reg_T0[ot][(b & 7) | REX_B(s)]();
+        break;
+    case 0x60: /* pusha */
+        if (CODE64(s))
+            goto illegal_op;
+        gen_pusha(s);
+        break;
+    case 0x61: /* popa */
+        if (CODE64(s))
+            goto illegal_op;
+        gen_popa(s);
+        break;
+    case 0x68: /* push Iv */
+    case 0x6a:
+        if (CODE64(s)) {
+            ot = dflag ? OT_QUAD : OT_WORD;
+        } else {
+            ot = dflag + OT_WORD;
+        }
+        if (b == 0x68)
+            val = insn_get(s, ot);
+        else
+            val = (int8_t)insn_get(s, OT_BYTE);
+        gen_op_movl_T0_im(val);
+        gen_push_T0(s);
+        break;
+    case 0x8f: /* pop Ev */
+        if (CODE64(s)) {
+            ot = dflag ? OT_QUAD : OT_WORD;
+        } else {
+            ot = dflag + OT_WORD;
+        }
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        gen_pop_T0(s);
+        if (mod == 3) {
+            /* NOTE: order is important for pop %sp */
+            gen_pop_update(s);
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_mov_reg_T0[ot][rm]();
+        } else {
+            /* NOTE: order is important too for MMU exceptions */
+            s->popl_esp_hack = 1 << ot;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            s->popl_esp_hack = 0;
+            gen_pop_update(s);
+        }
+        break;
+    case 0xc8: /* enter */
+        {
+            int level;
+            val = lduw_code(s->pc);
+            s->pc += 2;
+            level = ldub_code(s->pc++);
+            gen_enter(s, val, level);
+        }
+        break;
+    case 0xc9: /* leave */
+        /* XXX: exception not precise (ESP is updated before potential exception) */
+        if (CODE64(s)) {
+            gen_op_mov_TN_reg[OT_QUAD][0][R_EBP]();
+            gen_op_mov_reg_T0[OT_QUAD][R_ESP]();
+        } else if (s->ss32) {
+            gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
+            gen_op_mov_reg_T0[OT_LONG][R_ESP]();
+        } else {
+            gen_op_mov_TN_reg[OT_WORD][0][R_EBP]();
+            gen_op_mov_reg_T0[OT_WORD][R_ESP]();
+        }
+        gen_pop_T0(s);
+        if (CODE64(s)) {
+            ot = dflag ? OT_QUAD : OT_WORD;
+        } else {
+            ot = dflag + OT_WORD;
+        }
+        gen_op_mov_reg_T0[ot][R_EBP]();
+        gen_pop_update(s);
+        break;
+    case 0x06: /* push es */
+    case 0x0e: /* push cs */
+    case 0x16: /* push ss */
+    case 0x1e: /* push ds */
+        if (CODE64(s))
+            goto illegal_op;
+        gen_op_movl_T0_seg(b >> 3);
+        gen_push_T0(s);
+        break;
+    case 0x1a0: /* push fs */
+    case 0x1a8: /* push gs */
+        gen_op_movl_T0_seg((b >> 3) & 7);
+        gen_push_T0(s);
+        break;
+    case 0x07: /* pop es */
+    case 0x17: /* pop ss */
+    case 0x1f: /* pop ds */
+        if (CODE64(s))
+            goto illegal_op;
+        reg = b >> 3;
+        gen_pop_T0(s);
+        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+        gen_pop_update(s);
+        if (reg == R_SS) {
+            /* if reg == SS, inhibit interrupts/trace. */
+            /* If several instructions disable interrupts, only the
+               _first_ does it */
+            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+                gen_op_set_inhibit_irq();
+            s->tf = 0;
+        }
+        if (s->is_jmp) {
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+    case 0x1a1: /* pop fs */
+    case 0x1a9: /* pop gs */
+        gen_pop_T0(s);
+        gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
+        gen_pop_update(s);
+        if (s->is_jmp) {
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+
+        /**************************/
+        /* mov */
+    case 0x88:
+    case 0x89: /* mov Gv, Ev */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+
+        /* generate a generic store */
+        gen_ldst_modrm(s, modrm, ot, reg, 1);
+        break;
+    case 0xc6:
+    case 0xc7: /* mov Ev, Iv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod != 3) {
+            s->rip_offset = insn_const_size(ot);
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        }
+        val = insn_get(s, ot);
+        gen_op_movl_T0_im(val);
+        if (mod != 3)
+            gen_op_st_T0_A0[ot + s->mem_index]();
+        else
+            gen_op_mov_reg_T0[ot][(modrm & 7) | REX_B(s)]();
+        break;
+    case 0x8a:
+    case 0x8b: /* mov Ev, Gv */
+#ifdef VBOX /* dtrace hot fix */
+        if (prefixes & PREFIX_LOCK)
+            goto illegal_op;
+#endif
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = OT_WORD + dflag;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_op_mov_reg_T0[ot][reg]();
+        break;
+    case 0x8e: /* mov seg, Gv */
+        modrm = ldub_code(s->pc++);
+        reg = (modrm >> 3) & 7;
+        if (reg >= 6 || reg == R_CS)
+            goto illegal_op;
+        gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+        if (reg == R_SS) {
+            /* if reg == SS, inhibit interrupts/trace */
+            /* If several instructions disable interrupts, only the
+               _first_ does it */
+            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+                gen_op_set_inhibit_irq();
+            s->tf = 0;
+        }
+        if (s->is_jmp) {
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+    case 0x8c: /* mov Gv, seg */
+        modrm = ldub_code(s->pc++);
+        reg = (modrm >> 3) & 7;
+        mod = (modrm >> 6) & 3;
+        if (reg >= 6)
+            goto illegal_op;
+        gen_op_movl_T0_seg(reg);
+        if (mod == 3)
+            ot = OT_WORD + dflag;
+        else
+            ot = OT_WORD;
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+        break;
+
+    case 0x1b6: /* movzbS Gv, Eb */
+    case 0x1b7: /* movzwS Gv, Eb */
+    case 0x1be: /* movsbS Gv, Eb */
+    case 0x1bf: /* movswS Gv, Eb */
+        {
+            int d_ot;
+            /* d_ot is the size of destination */
+            d_ot = dflag + OT_WORD;
+            /* ot is the size of source */
+            ot = (b & 1) + OT_BYTE;
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+            rm = (modrm & 7) | REX_B(s);
+
+            if (mod == 3) {
+                gen_op_mov_TN_reg[ot][0][rm]();
+                switch(ot | (b & 8)) {
+                case OT_BYTE:
+                    gen_op_movzbl_T0_T0();
+                    break;
+                case OT_BYTE | 8:
+                    gen_op_movsbl_T0_T0();
+                    break;
+                case OT_WORD:
+                    gen_op_movzwl_T0_T0();
+                    break;
+                default:
+                case OT_WORD | 8:
+                    gen_op_movswl_T0_T0();
+                    break;
+                }
+                gen_op_mov_reg_T0[d_ot][reg]();
+            } else {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                if (b & 8) {
+                    gen_op_lds_T0_A0[ot + s->mem_index]();
+                } else {
+                    gen_op_ldu_T0_A0[ot + s->mem_index]();
+                }
+                gen_op_mov_reg_T0[d_ot][reg]();
+            }
+        }
+        break;
+
+    case 0x8d: /* lea */
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        reg = ((modrm >> 3) & 7) | rex_r;
+        /* we must ensure that no segment is added */
+        s->override = -1;
+        val = s->addseg;
+        s->addseg = 0;
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        s->addseg = val;
+        gen_op_mov_reg_A0[ot - OT_WORD][reg]();
+        break;
+
+    case 0xa0: /* mov EAX, Ov */
+    case 0xa1:
+    case 0xa2: /* mov Ov, EAX */
+    case 0xa3:
+        {
+            target_ulong offset_addr;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+#ifdef TARGET_X86_64
+            if (s->aflag == 2) {
+                offset_addr = ldq_code(s->pc);
+                s->pc += 8;
+                if (offset_addr == (int32_t)offset_addr)
+                    gen_op_movq_A0_im(offset_addr);
+                else
+                    gen_op_movq_A0_im64(offset_addr >> 32, offset_addr);
+            } else
+#endif
+            {
+                if (s->aflag) {
+                    offset_addr = insn_get(s, OT_LONG);
+                } else {
+                    offset_addr = insn_get(s, OT_WORD);
+                }
+                gen_op_movl_A0_im(offset_addr);
+            }
+            gen_add_A0_ds_seg(s);
+            if ((b & 2) == 0) {
+                gen_op_ld_T0_A0[ot + s->mem_index]();
+                gen_op_mov_reg_T0[ot][R_EAX]();
+            } else {
+                gen_op_mov_TN_reg[ot][0][R_EAX]();
+                gen_op_st_T0_A0[ot + s->mem_index]();
+            }
+        }
+        break;
+    case 0xd7: /* xlat */
+#ifdef TARGET_X86_64
+        if (s->aflag == 2) {
+            gen_op_movq_A0_reg[R_EBX]();
+            gen_op_addq_A0_AL();
+        } else
+#endif
+        {
+            gen_op_movl_A0_reg[R_EBX]();
+            gen_op_addl_A0_AL();
+            if (s->aflag == 0)
+                gen_op_andl_A0_ffff();
+        }
+        gen_add_A0_ds_seg(s);
+        gen_op_ldu_T0_A0[OT_BYTE + s->mem_index]();
+        gen_op_mov_reg_T0[OT_BYTE][R_EAX]();
+        break;
+    case 0xb0 ... 0xb7: /* mov R, Ib */
+        val = insn_get(s, OT_BYTE);
+        gen_op_movl_T0_im(val);
+        gen_op_mov_reg_T0[OT_BYTE][(b & 7) | REX_B(s)]();
+        break;
+    case 0xb8 ... 0xbf: /* mov R, Iv */
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            uint64_t tmp;
+            /* 64 bit case */
+            tmp = ldq_code(s->pc);
+            s->pc += 8;
+            reg = (b & 7) | REX_B(s);
+            gen_movtl_T0_im(tmp);
+            gen_op_mov_reg_T0[OT_QUAD][reg]();
+        } else
+#endif
+        {
+            ot = dflag ? OT_LONG : OT_WORD;
+            val = insn_get(s, ot);
+            reg = (b & 7) | REX_B(s);
+            gen_op_movl_T0_im(val);
+            gen_op_mov_reg_T0[ot][reg]();
+        }
+        break;
+
+    case 0x91 ... 0x97: /* xchg R, EAX */
+        ot = dflag + OT_WORD;
+        reg = (b & 7) | REX_B(s);
+        rm = R_EAX;
+        goto do_xchg_reg;
+    case 0x86:
+    case 0x87: /* xchg Ev, Gv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        if (mod == 3) {
+            rm = (modrm & 7) | REX_B(s);
+        do_xchg_reg:
+            gen_op_mov_TN_reg[ot][0][reg]();
+            gen_op_mov_TN_reg[ot][1][rm]();
+            gen_op_mov_reg_T0[ot][rm]();
+            gen_op_mov_reg_T1[ot][reg]();
+        } else {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_mov_TN_reg[ot][0][reg]();
+            /* for xchg, lock is implicit */
+            if (!(prefixes & PREFIX_LOCK))
+                gen_op_lock();
+            gen_op_ld_T1_A0[ot + s->mem_index]();
+            gen_op_st_T0_A0[ot + s->mem_index]();
+            if (!(prefixes & PREFIX_LOCK))
+                gen_op_unlock();
+            gen_op_mov_reg_T1[ot][reg]();
+        }
+        break;
+    case 0xc4: /* les Gv */
+        if (CODE64(s))
+            goto illegal_op;
+        op = R_ES;
+        goto do_lxx;
+    case 0xc5: /* lds Gv */
+        if (CODE64(s))
+            goto illegal_op;
+        op = R_DS;
+        goto do_lxx;
+    case 0x1b2: /* lss Gv */
+        op = R_SS;
+        goto do_lxx;
+    case 0x1b4: /* lfs Gv */
+        op = R_FS;
+        goto do_lxx;
+    case 0x1b5: /* lgs Gv */
+        op = R_GS;
+    do_lxx:
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_op_ld_T1_A0[ot + s->mem_index]();
+        gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+        /* load the segment first to handle exceptions properly */
+        gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
+        gen_movl_seg_T0(s, op, pc_start - s->cs_base);
+        /* then put the data */
+        gen_op_mov_reg_T1[ot][reg]();
+        if (s->is_jmp) {
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+
+        /************************/
+        /* shifts */
+    case 0xc0:
+    case 0xc1:
+        /* shift Ev,Ib */
+        shift = 2;
+    grp2:
+        {
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag + OT_WORD;
+
+            modrm = ldub_code(s->pc++);
+            mod = (modrm >> 6) & 3;
+            op = (modrm >> 3) & 7;
+
+            if (mod != 3) {
+                if (shift == 2) {
+                    s->rip_offset = 1;
+                }
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                opreg = OR_TMP0;
+            } else {
+                opreg = (modrm & 7) | REX_B(s);
+            }
+
+            /* simpler op */
+            if (shift == 0) {
+                gen_shift(s, op, ot, opreg, OR_ECX);
+            } else {
+                if (shift == 2) {
+                    shift = ldub_code(s->pc++);
+                }
+                gen_shifti(s, op, ot, opreg, shift);
+            }
+        }
+        break;
+    case 0xd0:
+    case 0xd1:
+        /* shift Ev,1 */
+        shift = 1;
+        goto grp2;
+    case 0xd2:
+    case 0xd3:
+        /* shift Ev,cl */
+        shift = 0;
+        goto grp2;
+
+    case 0x1a4: /* shld imm */
+        op = 0;
+        shift = 1;
+        goto do_shiftd;
+    case 0x1a5: /* shld cl */
+        op = 0;
+        shift = 0;
+        goto do_shiftd;
+    case 0x1ac: /* shrd imm */
+        op = 1;
+        shift = 1;
+        goto do_shiftd;
+    case 0x1ad: /* shrd cl */
+        op = 1;
+        shift = 0;
+    do_shiftd:
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        reg = ((modrm >> 3) & 7) | rex_r;
+
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T0_A0[ot + s->mem_index]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+        }
+        gen_op_mov_TN_reg[ot][1][reg]();
+
+        if (shift) {
+            val = ldub_code(s->pc++);
+            if (ot == OT_QUAD)
+                val &= 0x3f;
+            else
+                val &= 0x1f;
+            if (val) {
+                if (mod == 3)
+                    gen_op_shiftd_T0_T1_im_cc[ot][op](val);
+                else
+                    gen_op_shiftd_mem_T0_T1_im_cc[ot + s->mem_index][op](val);
+                if (op == 0 && ot != OT_WORD)
+                    s->cc_op = CC_OP_SHLB + ot;
+                else
+                    s->cc_op = CC_OP_SARB + ot;
+            }
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            if (mod == 3)
+                gen_op_shiftd_T0_T1_ECX_cc[ot][op]();
+            else
+                gen_op_shiftd_mem_T0_T1_ECX_cc[ot + s->mem_index][op]();
+            s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+        }
+        if (mod == 3) {
+            gen_op_mov_reg_T0[ot][rm]();
+        }
+        break;
+
+        /************************/
+        /* floats */
+    case 0xd8 ... 0xdf:
+        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
+            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
+            /* XXX: what to do if illegal op ? */
+            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+            break;
+        }
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        if (mod != 3) {
+            /* memory op */
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            switch(op) {
+            case 0x00 ... 0x07: /* fxxxs */
+            case 0x10 ... 0x17: /* fixxxl */
+            case 0x20 ... 0x27: /* fxxxl */
+            case 0x30 ... 0x37: /* fixxx */
+                {
+                    int op1;
+                    op1 = op & 7;
+
+                    switch(op >> 4) {
+                    case 0:
+                        gen_op_flds_FT0_A0();
+                        break;
+                    case 1:
+                        gen_op_fildl_FT0_A0();
+                        break;
+                    case 2:
+                        gen_op_fldl_FT0_A0();
+                        break;
+                    case 3:
+                    default:
+                        gen_op_fild_FT0_A0();
+                        break;
+                    }
+
+                    gen_op_fp_arith_ST0_FT0[op1]();
+                    if (op1 == 3) {
+                        /* fcomp needs pop */
+                        gen_op_fpop();
+                    }
+                }
+                break;
+            case 0x08: /* flds */
+            case 0x0a: /* fsts */
+            case 0x0b: /* fstps */
+            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
+                switch(op & 7) {
+                case 0:
+                    switch(op >> 4) {
+                    case 0:
+                        gen_op_flds_ST0_A0();
+                        break;
+                    case 1:
+                        gen_op_fildl_ST0_A0();
+                        break;
+                    case 2:
+                        gen_op_fldl_ST0_A0();
+                        break;
+                    case 3:
+                    default:
+                        gen_op_fild_ST0_A0();
+                        break;
+                    }
+                    break;
+                case 1:
+                    switch(op >> 4) {
+                    case 1:
+                        gen_op_fisttl_ST0_A0();
+                        break;
+                    case 2:
+                        gen_op_fisttll_ST0_A0();
+                        break;
+                    case 3:
+                    default:
+                        gen_op_fistt_ST0_A0();
+                    }
+                    gen_op_fpop();
+                    break;
+                default:
+                    switch(op >> 4) {
+                    case 0:
+                        gen_op_fsts_ST0_A0();
+                        break;
+                    case 1:
+                        gen_op_fistl_ST0_A0();
+                        break;
+                    case 2:
+                        gen_op_fstl_ST0_A0();
+                        break;
+                    case 3:
+                    default:
+                        gen_op_fist_ST0_A0();
+                        break;
+                    }
+                    if ((op & 7) == 3)
+                        gen_op_fpop();
+                    break;
+                }
+                break;
+            case 0x0c: /* fldenv mem */
+                gen_op_fldenv_A0(s->dflag);
+                break;
+            case 0x0d: /* fldcw mem */
+                gen_op_fldcw_A0();
+                break;
+            case 0x0e: /* fnstenv mem */
+                gen_op_fnstenv_A0(s->dflag);
+                break;
+            case 0x0f: /* fnstcw mem */
+                gen_op_fnstcw_A0();
+                break;
+            case 0x1d: /* fldt mem */
+                gen_op_fldt_ST0_A0();
+                break;
+            case 0x1f: /* fstpt mem */
+                gen_op_fstt_ST0_A0();
+                gen_op_fpop();
+                break;
+            case 0x2c: /* frstor mem */
+                gen_op_frstor_A0(s->dflag);
+                break;
+            case 0x2e: /* fnsave mem */
+                gen_op_fnsave_A0(s->dflag);
+                break;
+            case 0x2f: /* fnstsw mem */
+                gen_op_fnstsw_A0();
+                break;
+            case 0x3c: /* fbld */
+                gen_op_fbld_ST0_A0();
+                break;
+            case 0x3e: /* fbstp */
+                gen_op_fbst_ST0_A0();
+                gen_op_fpop();
+                break;
+            case 0x3d: /* fildll */
+                gen_op_fildll_ST0_A0();
+                break;
+            case 0x3f: /* fistpll */
+                gen_op_fistll_ST0_A0();
+                gen_op_fpop();
+                break;
+            default:
+                goto illegal_op;
+            }
+        } else {
+            /* register float ops */
+            opreg = rm;
+
+            switch(op) {
+            case 0x08: /* fld sti */
+                gen_op_fpush();
+                gen_op_fmov_ST0_STN((opreg + 1) & 7);
+                break;
+            case 0x09: /* fxchg sti */
+            case 0x29: /* fxchg4 sti, undocumented op */
+            case 0x39: /* fxchg7 sti, undocumented op */
+                gen_op_fxchg_ST0_STN(opreg);
+                break;
+            case 0x0a: /* grp d9/2 */
+                switch(rm) {
+                case 0: /* fnop */
+                    /* check exceptions (FreeBSD FPU probe) */
+                    if (s->cc_op != CC_OP_DYNAMIC)
+                        gen_op_set_cc_op(s->cc_op);
+                    gen_jmp_im(pc_start - s->cs_base);
+                    gen_op_fwait();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x0c: /* grp d9/4 */
+                switch(rm) {
+                case 0: /* fchs */
+                    gen_op_fchs_ST0();
+                    break;
+                case 1: /* fabs */
+                    gen_op_fabs_ST0();
+                    break;
+                case 4: /* ftst */
+                    gen_op_fldz_FT0();
+                    gen_op_fcom_ST0_FT0();
+                    break;
+                case 5: /* fxam */
+                    gen_op_fxam_ST0();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x0d: /* grp d9/5 */
+                {
+                    switch(rm) {
+                    case 0:
+                        gen_op_fpush();
+                        gen_op_fld1_ST0();
+                        break;
+                    case 1:
+                        gen_op_fpush();
+                        gen_op_fldl2t_ST0();
+                        break;
+                    case 2:
+                        gen_op_fpush();
+                        gen_op_fldl2e_ST0();
+                        break;
+                    case 3:
+                        gen_op_fpush();
+                        gen_op_fldpi_ST0();
+                        break;
+                    case 4:
+                        gen_op_fpush();
+                        gen_op_fldlg2_ST0();
+                        break;
+                    case 5:
+                        gen_op_fpush();
+                        gen_op_fldln2_ST0();
+                        break;
+                    case 6:
+                        gen_op_fpush();
+                        gen_op_fldz_ST0();
+                        break;
+                    default:
+                        goto illegal_op;
+                    }
+                }
+                break;
+            case 0x0e: /* grp d9/6 */
+                switch(rm) {
+                case 0: /* f2xm1 */
+                    gen_op_f2xm1();
+                    break;
+                case 1: /* fyl2x */
+                    gen_op_fyl2x();
+                    break;
+                case 2: /* fptan */
+                    gen_op_fptan();
+                    break;
+                case 3: /* fpatan */
+                    gen_op_fpatan();
+                    break;
+                case 4: /* fxtract */
+                    gen_op_fxtract();
+                    break;
+                case 5: /* fprem1 */
+                    gen_op_fprem1();
+                    break;
+                case 6: /* fdecstp */
+                    gen_op_fdecstp();
+                    break;
+                default:
+                case 7: /* fincstp */
+                    gen_op_fincstp();
+                    break;
+                }
+                break;
+            case 0x0f: /* grp d9/7 */
+                switch(rm) {
+                case 0: /* fprem */
+                    gen_op_fprem();
+                    break;
+                case 1: /* fyl2xp1 */
+                    gen_op_fyl2xp1();
+                    break;
+                case 2: /* fsqrt */
+                    gen_op_fsqrt();
+                    break;
+                case 3: /* fsincos */
+                    gen_op_fsincos();
+                    break;
+                case 5: /* fscale */
+                    gen_op_fscale();
+                    break;
+                case 4: /* frndint */
+                    gen_op_frndint();
+                    break;
+                case 6: /* fsin */
+                    gen_op_fsin();
+                    break;
+                default:
+                case 7: /* fcos */
+                    gen_op_fcos();
+                    break;
+                }
+                break;
+            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+                {
+                    int op1;
+
+                    op1 = op & 7;
+                    if (op >= 0x20) {
+                        gen_op_fp_arith_STN_ST0[op1](opreg);
+                        if (op >= 0x30)
+                            gen_op_fpop();
+                    } else {
+                        gen_op_fmov_FT0_STN(opreg);
+                        gen_op_fp_arith_ST0_FT0[op1]();
+                    }
+                }
+                break;
+            case 0x02: /* fcom */
+            case 0x22: /* fcom2, undocumented op */
+                gen_op_fmov_FT0_STN(opreg);
+                gen_op_fcom_ST0_FT0();
+                break;
+            case 0x03: /* fcomp */
+            case 0x23: /* fcomp3, undocumented op */
+            case 0x32: /* fcomp5, undocumented op */
+                gen_op_fmov_FT0_STN(opreg);
+                gen_op_fcom_ST0_FT0();
+                gen_op_fpop();
+                break;
+            case 0x15: /* da/5 */
+                switch(rm) {
+                case 1: /* fucompp */
+                    gen_op_fmov_FT0_STN(1);
+                    gen_op_fucom_ST0_FT0();
+                    gen_op_fpop();
+                    gen_op_fpop();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x1c:
+                switch(rm) {
+                case 0: /* feni (287 only, just do nop here) */
+                    break;
+                case 1: /* fdisi (287 only, just do nop here) */
+                    break;
+                case 2: /* fclex */
+                    gen_op_fclex();
+                    break;
+                case 3: /* fninit */
+                    gen_op_fninit();
+                    break;
+                case 4: /* fsetpm (287 only, just do nop here) */
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x1d: /* fucomi */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_op_fmov_FT0_STN(opreg);
+                gen_op_fucomi_ST0_FT0();
+                s->cc_op = CC_OP_EFLAGS;
+                break;
+            case 0x1e: /* fcomi */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_op_fmov_FT0_STN(opreg);
+                gen_op_fcomi_ST0_FT0();
+                s->cc_op = CC_OP_EFLAGS;
+                break;
+            case 0x28: /* ffree sti */
+                gen_op_ffree_STN(opreg);
+                break;
+            case 0x2a: /* fst sti */
+                gen_op_fmov_STN_ST0(opreg);
+                break;
+            case 0x2b: /* fstp sti */
+            case 0x0b: /* fstp1 sti, undocumented op */
+            case 0x3a: /* fstp8 sti, undocumented op */
+            case 0x3b: /* fstp9 sti, undocumented op */
+                gen_op_fmov_STN_ST0(opreg);
+                gen_op_fpop();
+                break;
+            case 0x2c: /* fucom st(i) */
+                gen_op_fmov_FT0_STN(opreg);
+                gen_op_fucom_ST0_FT0();
+                break;
+            case 0x2d: /* fucomp st(i) */
+                gen_op_fmov_FT0_STN(opreg);
+                gen_op_fucom_ST0_FT0();
+                gen_op_fpop();
+                break;
+            case 0x33: /* de/3 */
+                switch(rm) {
+                case 1: /* fcompp */
+                    gen_op_fmov_FT0_STN(1);
+                    gen_op_fcom_ST0_FT0();
+                    gen_op_fpop();
+                    gen_op_fpop();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x38: /* ffreep sti, undocumented op */
+                gen_op_ffree_STN(opreg);
+                gen_op_fpop();
+                break;
+            case 0x3c: /* df/4 */
+                switch(rm) {
+                case 0:
+                    gen_op_fnstsw_EAX();
+                    break;
+                default:
+                    goto illegal_op;
+                }
+                break;
+            case 0x3d: /* fucomip */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_op_fmov_FT0_STN(opreg);
+                gen_op_fucomi_ST0_FT0();
+                gen_op_fpop();
+                s->cc_op = CC_OP_EFLAGS;
+                break;
+            case 0x3e: /* fcomip */
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_op_fmov_FT0_STN(opreg);
+                gen_op_fcomi_ST0_FT0();
+                gen_op_fpop();
+                s->cc_op = CC_OP_EFLAGS;
+                break;
+            case 0x10 ... 0x13: /* fcmovxx */
+            case 0x18 ... 0x1b:
+                {
+                    int op1;
+                    const static uint8_t fcmov_cc[8] = {
+                        (JCC_B << 1),
+                        (JCC_Z << 1),
+                        (JCC_BE << 1),
+                        (JCC_P << 1),
+                    };
+                    op1 = fcmov_cc[op & 3] | ((op >> 3) & 1);
+                    gen_setcc(s, op1);
+                    gen_op_fcmov_ST0_STN_T0(opreg);
+                }
+                break;
+            default:
+                goto illegal_op;
+            }
+        }
+#ifdef USE_CODE_COPY
+        s->tb->cflags |= CF_TB_FP_USED;
+#endif
+        break;
+        /************************/
+        /* string ops */
+
+    case 0xa4: /* movsS */
+    case 0xa5:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_movs(s, ot);
+        }
+        break;
+
+    case 0xaa: /* stosS */
+    case 0xab:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_stos(s, ot);
+        }
+        break;
+    case 0xac: /* lodsS */
+    case 0xad:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_lods(s, ot);
+        }
+        break;
+    case 0xae: /* scasS */
+    case 0xaf:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        if (prefixes & PREFIX_REPNZ) {
+            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+        } else if (prefixes & PREFIX_REPZ) {
+            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+        } else {
+            gen_scas(s, ot);
+            s->cc_op = CC_OP_SUBB + ot;
+        }
+        break;
+
+    case 0xa6: /* cmpsS */
+    case 0xa7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag + OT_WORD;
+        if (prefixes & PREFIX_REPNZ) {
+            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+        } else if (prefixes & PREFIX_REPZ) {
+            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+        } else {
+            gen_cmps(s, ot);
+            s->cc_op = CC_OP_SUBB + ot;
+        }
+        break;
+    case 0x6c: /* insS */
+    case 0x6d:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_check_io(s, ot, 1, pc_start - s->cs_base);
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_ins(s, ot);
+        }
+        break;
+    case 0x6e: /* outsS */
+    case 0x6f:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_check_io(s, ot, 1, pc_start - s->cs_base);
+        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+        } else {
+            gen_outs(s, ot);
+        }
+        break;
+
+        /************************/
+        /* port I/O */
+    case 0xe4:
+    case 0xe5:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        val = ldub_code(s->pc++);
+        gen_op_movl_T0_im(val);
+        gen_check_io(s, ot, 0, pc_start - s->cs_base);
+        gen_op_in[ot]();
+        gen_op_mov_reg_T1[ot][R_EAX]();
+        break;
+    case 0xe6:
+    case 0xe7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        val = ldub_code(s->pc++);
+        gen_op_movl_T0_im(val);
+        gen_check_io(s, ot, 0, pc_start - s->cs_base);
+#ifdef VBOX /* bird: linux is writing to this port for delaying I/O. */
+        if (val == 0x80)
+            break;
+#endif /* VBOX */
+        gen_op_mov_TN_reg[ot][1][R_EAX]();
+        gen_op_out[ot]();
+        break;
+    case 0xec:
+    case 0xed:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_op_mov_TN_reg[OT_WORD][0][R_EDX]();
+        gen_op_andl_T0_ffff();
+        gen_check_io(s, ot, 0, pc_start - s->cs_base);
+        gen_op_in[ot]();
+        gen_op_mov_reg_T1[ot][R_EAX]();
+        break;
+    case 0xee:
+    case 0xef:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_op_mov_TN_reg[OT_WORD][0][R_EDX]();
+        gen_op_andl_T0_ffff();
+        gen_check_io(s, ot, 0, pc_start - s->cs_base);
+        gen_op_mov_TN_reg[ot][1][R_EAX]();
+        gen_op_out[ot]();
+        break;
+
+        /************************/
+        /* control */
+    case 0xc2: /* ret im */
+        val = ldsw_code(s->pc);
+        s->pc += 2;
+        gen_pop_T0(s);
+        if (CODE64(s) && s->dflag)
+            s->dflag = 2;
+        gen_stack_update(s, val + (2 << s->dflag));
+        if (s->dflag == 0)
+            gen_op_andl_T0_ffff();
+        gen_op_jmp_T0();
+        gen_eob(s);
+        break;
+    case 0xc3: /* ret */
+        gen_pop_T0(s);
+        gen_pop_update(s);
+        if (s->dflag == 0)
+            gen_op_andl_T0_ffff();
+        gen_op_jmp_T0();
+        gen_eob(s);
+        break;
+    case 0xca: /* lret im */
+        val = ldsw_code(s->pc);
+        s->pc += 2;
+    do_lret:
+        if (s->pe && !s->vm86) {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_op_lret_protected(s->dflag, val);
+        } else {
+            gen_stack_A0(s);
+            /* pop offset */
+            gen_op_ld_T0_A0[1 + s->dflag + s->mem_index]();
+            if (s->dflag == 0)
+                gen_op_andl_T0_ffff();
+            /* NOTE: keeping EIP updated is not a problem in case of
+               exception */
+            gen_op_jmp_T0();
+            /* pop selector */
+            gen_op_addl_A0_im(2 << s->dflag);
+            gen_op_ld_T0_A0[1 + s->dflag + s->mem_index]();
+            gen_op_movl_seg_T0_vm(offsetof(CPUX86State,segs[R_CS]));
+            /* add stack offset */
+            gen_stack_update(s, val + (4 << s->dflag));
+        }
+        gen_eob(s);
+        break;
+    case 0xcb: /* lret */
+        val = 0;
+        goto do_lret;
+    case 0xcf: /* iret */
+        if (!s->pe) {
+            /* real mode */
+            gen_op_iret_real(s->dflag);
+            s->cc_op = CC_OP_EFLAGS;
+        } else if (s->vm86) {
+#ifdef VBOX
+            if (s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+            if (s->iopl != 3) {
+#endif
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_op_iret_real(s->dflag);
+                s->cc_op = CC_OP_EFLAGS;
+            }
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_op_iret_protected(s->dflag, s->pc - s->cs_base);
+            s->cc_op = CC_OP_EFLAGS;
+        }
+        gen_eob(s);
+        break;
+    case 0xe8: /* call im */
+        {
+            if (dflag)
+                tval = (int32_t)insn_get(s, OT_LONG);
+            else
+                tval = (int16_t)insn_get(s, OT_WORD);
+            next_eip = s->pc - s->cs_base;
+            tval += next_eip;
+            if (s->dflag == 0)
+                tval &= 0xffff;
+            gen_movtl_T0_im(next_eip);
+            gen_push_T0(s);
+            gen_jmp(s, tval);
+        }
+        break;
+    case 0x9a: /* lcall im */
+        {
+            unsigned int selector, offset;
+
+            if (CODE64(s))
+                goto illegal_op;
+            ot = dflag ? OT_LONG : OT_WORD;
+            offset = insn_get(s, ot);
+            selector = insn_get(s, OT_WORD);
+
+            gen_op_movl_T0_im(selector);
+            gen_op_movl_T1_imu(offset);
+        }
+        goto do_lcall;
+    case 0xe9: /* jmp im */
+        if (dflag)
+            tval = (int32_t)insn_get(s, OT_LONG);
+        else
+            tval = (int16_t)insn_get(s, OT_WORD);
+        tval += s->pc - s->cs_base;
+        if (s->dflag == 0)
+            tval &= 0xffff;
+        gen_jmp(s, tval);
+        break;
+    case 0xea: /* ljmp im */
+        {
+            unsigned int selector, offset;
+
+            if (CODE64(s))
+                goto illegal_op;
+            ot = dflag ? OT_LONG : OT_WORD;
+            offset = insn_get(s, ot);
+            selector = insn_get(s, OT_WORD);
+
+            gen_op_movl_T0_im(selector);
+            gen_op_movl_T1_imu(offset);
+        }
+        goto do_ljmp;
+    case 0xeb: /* jmp Jb */
+        tval = (int8_t)insn_get(s, OT_BYTE);
+        tval += s->pc - s->cs_base;
+        if (s->dflag == 0)
+            tval &= 0xffff;
+        gen_jmp(s, tval);
+        break;
+    case 0x70 ... 0x7f: /* jcc Jb */
+        tval = (int8_t)insn_get(s, OT_BYTE);
+        goto do_jcc;
+    case 0x180 ... 0x18f: /* jcc Jv */
+        if (dflag) {
+            tval = (int32_t)insn_get(s, OT_LONG);
+        } else {
+            tval = (int16_t)insn_get(s, OT_WORD);
+        }
+    do_jcc:
+        next_eip = s->pc - s->cs_base;
+        tval += next_eip;
+        if (s->dflag == 0)
+            tval &= 0xffff;
+        gen_jcc(s, b, tval, next_eip);
+        break;
+
+    case 0x190 ... 0x19f: /* setcc Gv */
+        modrm = ldub_code(s->pc++);
+        gen_setcc(s, b);
+        gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1);
+        break;
+    case 0x140 ... 0x14f: /* cmov Gv, Ev */
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        gen_setcc(s, b);
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T1_A0[ot + s->mem_index]();
+        } else {
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_mov_TN_reg[ot][1][rm]();
+        }
+        gen_op_cmov_reg_T1_T0[ot - OT_WORD][reg]();
+        break;
+
+        /************************/
+        /* flags */
+    case 0x9c: /* pushf */
+#ifdef VBOX
+        if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+        if (s->vm86 && s->iopl != 3) {
+#endif
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+#ifdef VBOX
+            if (s->vm86 && s->vme && s->iopl != 3)
+                gen_op_movl_T0_eflags_vme();
+            else
+#endif
+                gen_op_movl_T0_eflags();
+            gen_push_T0(s);
+        }
+        break;
+    case 0x9d: /* popf */
+#ifdef VBOX
+        if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+        if (s->vm86 && s->iopl != 3) {
+#endif
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            gen_pop_T0(s);
+            if (s->cpl == 0) {
+                if (s->dflag) {
+                    gen_op_movl_eflags_T0_cpl0();
+                } else {
+                    gen_op_movw_eflags_T0_cpl0();
+                }
+            } else {
+                if (s->cpl <= s->iopl) {
+                    if (s->dflag) {
+                        gen_op_movl_eflags_T0_io();
+                    } else {
+                        gen_op_movw_eflags_T0_io();
+                    }
+                } else {
+                    if (s->dflag) {
+                        gen_op_movl_eflags_T0();
+                    } else {
+#ifdef VBOX
+                        if (s->vm86 && s->vme)
+                            gen_op_movw_eflags_T0_vme();
+                        else
+#endif
+                            gen_op_movw_eflags_T0();
+                    }
+                }
+            }
+            gen_pop_update(s);
+            s->cc_op = CC_OP_EFLAGS;
+            /* abort translation because TF flag may change */
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+    case 0x9e: /* sahf */
+        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
+            goto illegal_op;
+        gen_op_mov_TN_reg[OT_BYTE][0][R_AH]();
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_movb_eflags_T0();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x9f: /* lahf */
+        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_movl_T0_eflags();
+        gen_op_mov_reg_T0[OT_BYTE][R_AH]();
+        break;
+    case 0xf5: /* cmc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_cmc();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xf8: /* clc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_clc();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xf9: /* stc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_stc();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xfc: /* cld */
+        gen_op_cld();
+        break;
+    case 0xfd: /* std */
+        gen_op_std();
+        break;
+
+        /************************/
+        /* bit operations */
+    case 0x1ba: /* bt/bts/btr/btc Gv, im */
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        op = (modrm >> 3) & 7;
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        if (mod != 3) {
+            s->rip_offset = 1;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T0_A0[ot + s->mem_index]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+        }
+        /* load shift */
+        val = ldub_code(s->pc++);
+        gen_op_movl_T1_im(val);
+        if (op < 4)
+            goto illegal_op;
+        op -= 4;
+        gen_op_btx_T0_T1_cc[ot - OT_WORD][op]();
+        s->cc_op = CC_OP_SARB + ot;
+        if (op != 0) {
+            if (mod != 3)
+                gen_op_st_T0_A0[ot + s->mem_index]();
+            else
+                gen_op_mov_reg_T0[ot][rm]();
+            gen_op_update_bt_cc();
+        }
+        break;
+    case 0x1a3: /* bt Gv, Ev */
+        op = 0;
+        goto do_btx;
+    case 0x1ab: /* bts */
+        op = 1;
+        goto do_btx;
+    case 0x1b3: /* btr */
+        op = 2;
+        goto do_btx;
+    case 0x1bb: /* btc */
+        op = 3;
+    do_btx:
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        gen_op_mov_TN_reg[OT_LONG][1][reg]();
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            /* specific case: we need to add a displacement */
+            gen_op_add_bit_A0_T1[ot - OT_WORD]();
+            gen_op_ld_T0_A0[ot + s->mem_index]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+        }
+        gen_op_btx_T0_T1_cc[ot - OT_WORD][op]();
+        s->cc_op = CC_OP_SARB + ot;
+        if (op != 0) {
+            if (mod != 3)
+                gen_op_st_T0_A0[ot + s->mem_index]();
+            else
+                gen_op_mov_reg_T0[ot][rm]();
+            gen_op_update_bt_cc();
+        }
+        break;
+    case 0x1bc: /* bsf */
+    case 0x1bd: /* bsr */
+        ot = dflag + OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        /* NOTE: in order to handle the 0 case, we must load the
+           result. It could be optimized with a generated jump */
+        gen_op_mov_TN_reg[ot][1][reg]();
+        gen_op_bsx_T0_cc[ot - OT_WORD][b & 1]();
+        gen_op_mov_reg_T1[ot][reg]();
+        s->cc_op = CC_OP_LOGICB + ot;
+        break;
+        /************************/
+        /* bcd */
+    case 0x27: /* daa */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_daa();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x2f: /* das */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_das();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x37: /* aaa */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_aaa();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x3f: /* aas */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_aas();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xd4: /* aam */
+        if (CODE64(s))
+            goto illegal_op;
+        val = ldub_code(s->pc++);
+        if (val == 0) {
+            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
+        } else {
+            gen_op_aam(val);
+            s->cc_op = CC_OP_LOGICB;
+        }
+        break;
+    case 0xd5: /* aad */
+        if (CODE64(s))
+            goto illegal_op;
+        val = ldub_code(s->pc++);
+        gen_op_aad(val);
+        s->cc_op = CC_OP_LOGICB;
+        break;
+        /************************/
+        /* misc */
+    case 0x90: /* nop */
+        /* XXX: xchg + rex handling */
+        /* XXX: correct lock test for all insn */
+        if (prefixes & PREFIX_LOCK)
+            goto illegal_op;
+        break;
+    case 0x9b: /* fwait */
+        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
+            (HF_MP_MASK | HF_TS_MASK)) {
+            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_op_fwait();
+        }
+        break;
+    case 0xcc: /* int3 */
+#ifdef VBOX
+        if (s->vm86 && s->iopl != 3 && !s->vme) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else
+#endif
+            gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
+        break;
+    case 0xcd: /* int N */
+        val = ldub_code(s->pc++);
+#ifdef VBOX
+        if (s->vm86 && s->iopl != 3 && !s->vme) {
+#else
+        if (s->vm86 && s->iopl != 3) {
+#endif
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
+        }
+        break;
+    case 0xce: /* into */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(pc_start - s->cs_base);
+        gen_op_into(s->pc - pc_start);
+        break;
+    case 0xf1: /* icebp (undocumented, exits to external debugger) */
+#if 1
+        gen_debug(s, pc_start - s->cs_base);
+#else
+        /* start debug */
+        tb_flush(cpu_single_env);
+        cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
+#endif
+        break;
+    case 0xfa: /* cli */
+        if (!s->vm86) {
+            if (s->cpl <= s->iopl) {
+                gen_op_cli();
+            } else {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            }
+        } else {
+            if (s->iopl == 3) {
+                gen_op_cli();
+#ifdef VBOX
+            } else if (s->iopl != 3 && s->vme) {
+                gen_op_cli_vme();
+#endif
+            } else {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            }
+        }
+        break;
+    case 0xfb: /* sti */
+        if (!s->vm86) {
+            if (s->cpl <= s->iopl) {
+            gen_sti:
+                gen_op_sti();
+                /* interruptions are enabled only the first insn after sti */
+                /* If several instructions disable interrupts, only the
+                   _first_ does it */
+                if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+                    gen_op_set_inhibit_irq();
+                /* give a chance to handle pending irqs */
+                gen_jmp_im(s->pc - s->cs_base);
+                gen_eob(s);
+            } else {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            }
+        } else {
+            if (s->iopl == 3) {
+                goto gen_sti;
+#ifdef VBOX
+            } else if (s->iopl != 3 && s->vme) {
+                gen_op_sti_vme();
+                /* give a chance to handle pending irqs */
+                gen_jmp_im(s->pc - s->cs_base);
+                gen_eob(s);
+#endif
+            } else {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            }
+        }
+        break;
+    case 0x62: /* bound */
+        if (CODE64(s))
+            goto illegal_op;
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = (modrm >> 3) & 7;
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        gen_op_mov_TN_reg[ot][0][reg]();
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_jmp_im(pc_start - s->cs_base);
+        if (ot == OT_WORD)
+            gen_op_boundw();
+        else
+            gen_op_boundl();
+        break;
+    case 0x1c8 ... 0x1cf: /* bswap reg */
+        reg = (b & 7) | REX_B(s);
+#ifdef TARGET_X86_64
+        if (dflag == 2) {
+            gen_op_mov_TN_reg[OT_QUAD][0][reg]();
+            gen_op_bswapq_T0();
+            gen_op_mov_reg_T0[OT_QUAD][reg]();
+        } else
+#endif
+        {
+            gen_op_mov_TN_reg[OT_LONG][0][reg]();
+            gen_op_bswapl_T0();
+            gen_op_mov_reg_T0[OT_LONG][reg]();
+        }
+        break;
+    case 0xd6: /* salc */
+        if (CODE64(s))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_op_salc();
+        break;
+    case 0xe0: /* loopnz */
+    case 0xe1: /* loopz */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        /* FALL THRU */
+    case 0xe2: /* loop */
+    case 0xe3: /* jecxz */
+        {
+            int l1, l2;
+
+            tval = (int8_t)insn_get(s, OT_BYTE);
+            next_eip = s->pc - s->cs_base;
+            tval += next_eip;
+            if (s->dflag == 0)
+                tval &= 0xffff;
+
+            l1 = gen_new_label();
+            l2 = gen_new_label();
+            b &= 3;
+            if (b == 3) {
+                gen_op_jz_ecx[s->aflag](l1);
+            } else {
+                gen_op_dec_ECX[s->aflag]();
+                if (b <= 1)
+                    gen_op_mov_T0_cc();
+                gen_op_loop[s->aflag][b](l1);
+            }
+
+            gen_jmp_im(next_eip);
+            gen_op_jmp_label(l2);
+            gen_set_label(l1);
+            gen_jmp_im(tval);
+            gen_set_label(l2);
+            gen_eob(s);
+        }
+        break;
+    case 0x130: /* wrmsr */
+    case 0x132: /* rdmsr */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (b & 2)
+                gen_op_rdmsr();
+            else
+                gen_op_wrmsr();
+        }
+        break;
+    case 0x131: /* rdtsc */
+        gen_jmp_im(pc_start - s->cs_base);
+        gen_op_rdtsc();
+        break;
+    case 0x134: /* sysenter */
+        if (CODE64(s))
+            goto illegal_op;
+        if (!s->pe) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC) {
+                gen_op_set_cc_op(s->cc_op);
+                s->cc_op = CC_OP_DYNAMIC;
+            }
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_op_sysenter();
+            gen_eob(s);
+        }
+        break;
+    case 0x135: /* sysexit */
+        if (CODE64(s))
+            goto illegal_op;
+        if (!s->pe) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC) {
+                gen_op_set_cc_op(s->cc_op);
+                s->cc_op = CC_OP_DYNAMIC;
+            }
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_op_sysexit();
+            gen_eob(s);
+        }
+        break;
+#ifdef TARGET_X86_64
+    case 0x105: /* syscall */
+        /* XXX: is it usable in real mode ? */
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_jmp_im(pc_start - s->cs_base);
+        gen_op_syscall(s->pc - pc_start);
+        gen_eob(s);
+        break;
+    case 0x107: /* sysret */
+        if (!s->pe) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC) {
+                gen_op_set_cc_op(s->cc_op);
+                s->cc_op = CC_OP_DYNAMIC;
+            }
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_op_sysret(s->dflag);
+            /* condition codes are modified only in long mode */
+            if (s->lma)
+                s->cc_op = CC_OP_EFLAGS;
+            gen_eob(s);
+        }
+        break;
+#endif
+    case 0x1a2: /* cpuid */
+        gen_op_cpuid();
+        break;
+    case 0xf4: /* hlt */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_op_hlt();
+            s->is_jmp = 3;
+        }
+        break;
+    case 0x100:
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        switch(op) {
+        case 0: /* sldt */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            gen_op_movl_T0_env(offsetof(CPUX86State,ldt.selector));
+            ot = OT_WORD;
+            if (mod == 3)
+                ot += s->dflag;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            break;
+        case 2: /* lldt */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_lldt_T0();
+            }
+            break;
+        case 1: /* str */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            gen_op_movl_T0_env(offsetof(CPUX86State,tr.selector));
+            ot = OT_WORD;
+            if (mod == 3)
+                ot += s->dflag;
+            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            break;
+        case 3: /* ltr */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_jmp_im(pc_start - s->cs_base);
+                gen_op_ltr_T0();
+            }
+            break;
+        case 4: /* verr */
+        case 5: /* verw */
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            if (op == 4)
+                gen_op_verr();
+            else
+                gen_op_verw();
+            s->cc_op = CC_OP_EFLAGS;
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+    case 0x101:
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        rm = modrm & 7;
+        switch(op) {
+        case 0: /* sgdt */
+            if (mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_movl_T0_env(offsetof(CPUX86State, gdt.limit));
+            gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+            gen_add_A0_im(s, 2);
+            gen_op_movtl_T0_env(offsetof(CPUX86State, gdt.base));
+            if (!s->dflag)
+                gen_op_andl_T0_im(0xffffff);
+            gen_op_st_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
+            break;
+        case 1:
+            if (mod == 3) {
+                switch (rm) {
+                case 0: /* monitor */
+                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+                        s->cpl != 0)
+                        goto illegal_op;
+                    gen_jmp_im(pc_start - s->cs_base);
+#ifdef TARGET_X86_64
+                    if (s->aflag == 2) {
+                        gen_op_movq_A0_reg[R_EBX]();
+                        gen_op_addq_A0_AL();
+                    } else
+#endif
+                    {
+                        gen_op_movl_A0_reg[R_EBX]();
+                        gen_op_addl_A0_AL();
+                        if (s->aflag == 0)
+                            gen_op_andl_A0_ffff();
+                    }
+                    gen_add_A0_ds_seg(s);
+                    gen_op_monitor();
+                    break;
+                case 1: /* mwait */
+                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+                        s->cpl != 0)
+                        goto illegal_op;
+                    if (s->cc_op != CC_OP_DYNAMIC) {
+                        gen_op_set_cc_op(s->cc_op);
+                        s->cc_op = CC_OP_DYNAMIC;
+                    }
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_op_mwait();
+                    gen_eob(s);
+                    break;
+                default:
+                    goto illegal_op;
+                }
+            } else { /* sidt */
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_movl_T0_env(offsetof(CPUX86State, idt.limit));
+                gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+                gen_add_A0_im(s, 2);
+                gen_op_movtl_T0_env(offsetof(CPUX86State, idt.base));
+                if (!s->dflag)
+                    gen_op_andl_T0_im(0xffffff);
+                gen_op_st_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
+            }
+            break;
+        case 2: /* lgdt */
+        case 3: /* lidt */
+            if (mod == 3)
+                goto illegal_op;
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_T1_A0[OT_WORD + s->mem_index]();
+                gen_add_A0_im(s, 2);
+                gen_op_ld_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
+                if (!s->dflag)
+                    gen_op_andl_T0_im(0xffffff);
+                if (op == 2) {
+                    gen_op_movtl_env_T0(offsetof(CPUX86State,gdt.base));
+                    gen_op_movl_env_T1(offsetof(CPUX86State,gdt.limit));
+                } else {
+                    gen_op_movtl_env_T0(offsetof(CPUX86State,idt.base));
+                    gen_op_movl_env_T1(offsetof(CPUX86State,idt.limit));
+                }
+            }
+            break;
+        case 4: /* smsw */
+            gen_op_movl_T0_env(offsetof(CPUX86State,cr[0]));
+            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1);
+            break;
+        case 6: /* lmsw */
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_op_lmsw_T0();
+                gen_jmp_im(s->pc - s->cs_base);
+                gen_eob(s);
+            }
+            break;
+        case 7: /* invlpg */
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            } else {
+                if (mod == 3) {
+#ifdef TARGET_X86_64
+                    if (CODE64(s) && rm == 0) {
+                        /* swapgs */
+                        gen_op_movtl_T0_env(offsetof(CPUX86State,segs[R_GS].base));
+                        gen_op_movtl_T1_env(offsetof(CPUX86State,kernelgsbase));
+                        gen_op_movtl_env_T1(offsetof(CPUX86State,segs[R_GS].base));
+                        gen_op_movtl_env_T0(offsetof(CPUX86State,kernelgsbase));
+                    } else
+#endif
+                    {
+                        goto illegal_op;
+                    }
+                } else {
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_op_invlpg_A0();
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                }
+            }
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+    case 0x108: /* invd */
+    case 0x109: /* wbinvd */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            /* nothing to do */
+        }
+        break;
+    case 0x63: /* arpl or movslS (x86_64) */
+#ifdef TARGET_X86_64
+        if (CODE64(s)) {
+            int d_ot;
+            /* d_ot is the size of destination */
+            d_ot = dflag + OT_WORD;
+
+            modrm = ldub_code(s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+            rm = (modrm & 7) | REX_B(s);
+
+            if (mod == 3) {
+                gen_op_mov_TN_reg[OT_LONG][0][rm]();
+                /* sign extend */
+                if (d_ot == OT_QUAD)
+                    gen_op_movslq_T0_T0();
+                gen_op_mov_reg_T0[d_ot][reg]();
+            } else {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                if (d_ot == OT_QUAD) {
+                    gen_op_lds_T0_A0[OT_LONG + s->mem_index]();
+                } else {
+                    gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                }
+                gen_op_mov_reg_T0[d_ot][reg]();
+            }
+        } else
+#endif
+        {
+            if (!s->pe || s->vm86)
+                goto illegal_op;
+            ot = dflag ? OT_LONG : OT_WORD;
+            modrm = ldub_code(s->pc++);
+            reg = (modrm >> 3) & 7;
+            mod = (modrm >> 6) & 3;
+            rm = modrm & 7;
+#ifdef VBOX /* Fix for obvious bug - T1 needs to be loaded */
+            gen_op_mov_TN_reg[ot][1][reg]();
+#endif
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_T0_A0[ot + s->mem_index]();
+            } else {
+                gen_op_mov_TN_reg[ot][0][rm]();
+            }
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_op_arpl();
+            s->cc_op = CC_OP_EFLAGS;
+            if (mod != 3) {
+                gen_op_st_T0_A0[ot + s->mem_index]();
+            } else {
+                gen_op_mov_reg_T0[ot][rm]();
+            }
+            gen_op_arpl_update();
+        }
+        break;
+    case 0x102: /* lar */
+    case 0x103: /* lsl */
+        if (!s->pe || s->vm86)
+            goto illegal_op;
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub_code(s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_op_mov_TN_reg[ot][1][reg]();
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        if (b == 0x102)
+            gen_op_lar();
+        else
+            gen_op_lsl();
+        s->cc_op = CC_OP_EFLAGS;
+        gen_op_mov_reg_T1[ot][reg]();
+        break;
+    case 0x118:
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        switch(op) {
+        case 0: /* prefetchnta */
+        case 1: /* prefetchnt0 */
+        case 2: /* prefetchnt0 */
+        case 3: /* prefetchnt0 */
+            if (mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            /* nothing more to do */
+            break;
+        default: /* nop (multi byte) */
+            gen_nop_modrm(s, modrm);
+            break;
+        }
+        break;
+    case 0x119 ... 0x11f: /* nop (multi byte) */
+        modrm = ldub_code(s->pc++);
+        gen_nop_modrm(s, modrm);
+        break;
+    case 0x120: /* mov reg, crN */
+    case 0x122: /* mov crN, reg */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            modrm = ldub_code(s->pc++);
+            if ((modrm & 0xc0) != 0xc0)
+                goto illegal_op;
+            rm = (modrm & 7) | REX_B(s);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            if (CODE64(s))
+                ot = OT_QUAD;
+            else
+                ot = OT_LONG;
+            switch(reg) {
+            case 0:
+            case 2:
+            case 3:
+            case 4:
+            case 8:
+                if (b & 2) {
+                    gen_op_mov_TN_reg[ot][0][rm]();
+                    gen_op_movl_crN_T0(reg);
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                } else {
+#if !defined(CONFIG_USER_ONLY)
+                    if (reg == 8)
+                        gen_op_movtl_T0_cr8();
+                    else
+#endif
+                        gen_op_movtl_T0_env(offsetof(CPUX86State,cr[reg]));
+                    gen_op_mov_reg_T0[ot][rm]();
+                }
+                break;
+            default:
+                goto illegal_op;
+            }
+        }
+        break;
+    case 0x121: /* mov reg, drN */
+    case 0x123: /* mov drN, reg */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            modrm = ldub_code(s->pc++);
+            if ((modrm & 0xc0) != 0xc0)
+                goto illegal_op;
+            rm = (modrm & 7) | REX_B(s);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            if (CODE64(s))
+                ot = OT_QUAD;
+            else
+                ot = OT_LONG;
+            /* XXX: do it dynamically with CR4.DE bit */
+            if (reg == 4 || reg == 5 || reg >= 8)
+                goto illegal_op;
+            if (b & 2) {
+                gen_op_mov_TN_reg[ot][0][rm]();
+                gen_op_movl_drN_T0(reg);
+                gen_jmp_im(s->pc - s->cs_base);
+                gen_eob(s);
+            } else {
+                gen_op_movtl_T0_env(offsetof(CPUX86State,dr[reg]));
+                gen_op_mov_reg_T0[ot][rm]();
+            }
+        }
+        break;
+    case 0x106: /* clts */
+        if (s->cpl != 0) {
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        } else {
+            gen_op_clts();
+            /* abort block because static cpu state changed */
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+        }
+        break;
+    /* MMX/SSE/SSE2/PNI support */
+    case 0x1c3: /* MOVNTI reg, mem */
+        if (!(s->cpuid_features & CPUID_SSE2))
+            goto illegal_op;
+        ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        if (mod == 3)
+            goto illegal_op;
+        reg = ((modrm >> 3) & 7) | rex_r;
+        /* generate a generic store */
+        gen_ldst_modrm(s, modrm, ot, reg, 1);
+        break;
+    case 0x1ae:
+        modrm = ldub_code(s->pc++);
+        mod = (modrm >> 6) & 3;
+        op = (modrm >> 3) & 7;
+        switch(op) {
+        case 0: /* fxsave */
+            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
+                (s->flags & HF_EM_MASK))
+                goto illegal_op;
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_fxsave_A0((s->dflag == 2));
+            break;
+        case 1: /* fxrstor */
+            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
+                (s->flags & HF_EM_MASK))
+                goto illegal_op;
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_fxrstor_A0((s->dflag == 2));
+            break;
+        case 2: /* ldmxcsr */
+        case 3: /* stmxcsr */
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
+            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
+                mod == 3)
+                goto illegal_op;
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            if (op == 2) {
+                gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                gen_op_movl_env_T0(offsetof(CPUX86State, mxcsr));
+            } else {
+                gen_op_movl_T0_env(offsetof(CPUX86State, mxcsr));
+                gen_op_st_T0_A0[OT_LONG + s->mem_index]();
+            }
+            break;
+        case 5: /* lfence */
+        case 6: /* mfence */
+            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE))
+                goto illegal_op;
+            break;
+        case 7: /* sfence / clflush */
+            if ((modrm & 0xc7) == 0xc0) {
+                /* sfence */
+                if (!(s->cpuid_features & CPUID_SSE))
+                    goto illegal_op;
+            } else {
+                /* clflush */
+                if (!(s->cpuid_features & CPUID_CLFLUSH))
+                    goto illegal_op;
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            }
+            break;
+        default:
+            goto illegal_op;
+        }
+        break;
+    case 0x10d: /* prefetch */
+        modrm = ldub_code(s->pc++);
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        /* ignore for now */
+        break;
+    case 0x1aa: /* rsm */
+        if (!(s->flags & HF_SMM_MASK))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_jmp_im(s->pc - s->cs_base);
+        gen_op_rsm();
+        gen_eob(s);
+        break;
+    case 0x110 ... 0x117:
+    case 0x128 ... 0x12f:
+    case 0x150 ... 0x177:
+    case 0x17c ... 0x17f:
+    case 0x1c2:
+    case 0x1c4 ... 0x1c6:
+    case 0x1d0 ... 0x1fe:
+        gen_sse(s, b, pc_start, rex_r);
+        break;
+    default:
+        goto illegal_op;
+    }
+    /* lock generation */
+    if (s->prefix & PREFIX_LOCK)
+        gen_op_unlock();
+    return s->pc;
+ illegal_op:
+    if (s->prefix & PREFIX_LOCK)
+        gen_op_unlock();
+    /* XXX: ensure that no lock was generated */
+    gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+    return s->pc;
+}
+
+#define CC_OSZAPC (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C)
+#define CC_OSZAP (CC_O | CC_S | CC_Z | CC_A | CC_P)
+
+/* flags read by an operation */
+static uint16_t opc_read_flags[NB_OPS] = {
+    [INDEX_op_aas] = CC_A,
+    [INDEX_op_aaa] = CC_A,
+    [INDEX_op_das] = CC_A | CC_C,
+    [INDEX_op_daa] = CC_A | CC_C,
+
+    /* subtle: due to the incl/decl implementation, C is used */
+    [INDEX_op_update_inc_cc] = CC_C,
+
+    [INDEX_op_into] = CC_O,
+
+    [INDEX_op_jb_subb] = CC_C,
+    [INDEX_op_jb_subw] = CC_C,
+    [INDEX_op_jb_subl] = CC_C,
+
+    [INDEX_op_jz_subb] = CC_Z,
+    [INDEX_op_jz_subw] = CC_Z,
+    [INDEX_op_jz_subl] = CC_Z,
+
+    [INDEX_op_jbe_subb] = CC_Z | CC_C,
+    [INDEX_op_jbe_subw] = CC_Z | CC_C,
+    [INDEX_op_jbe_subl] = CC_Z | CC_C,
+
+    [INDEX_op_js_subb] = CC_S,
+    [INDEX_op_js_subw] = CC_S,
+    [INDEX_op_js_subl] = CC_S,
+
+    [INDEX_op_jl_subb] = CC_O | CC_S,
+    [INDEX_op_jl_subw] = CC_O | CC_S,
+    [INDEX_op_jl_subl] = CC_O | CC_S,
+
+    [INDEX_op_jle_subb] = CC_O | CC_S | CC_Z,
+    [INDEX_op_jle_subw] = CC_O | CC_S | CC_Z,
+    [INDEX_op_jle_subl] = CC_O | CC_S | CC_Z,
+
+    [INDEX_op_loopnzw] = CC_Z,
+    [INDEX_op_loopnzl] = CC_Z,
+    [INDEX_op_loopzw] = CC_Z,
+    [INDEX_op_loopzl] = CC_Z,
+
+    [INDEX_op_seto_T0_cc] = CC_O,
+    [INDEX_op_setb_T0_cc] = CC_C,
+    [INDEX_op_setz_T0_cc] = CC_Z,
+    [INDEX_op_setbe_T0_cc] = CC_Z | CC_C,
+    [INDEX_op_sets_T0_cc] = CC_S,
+    [INDEX_op_setp_T0_cc] = CC_P,
+    [INDEX_op_setl_T0_cc] = CC_O | CC_S,
+    [INDEX_op_setle_T0_cc] = CC_O | CC_S | CC_Z,
+
+    [INDEX_op_setb_T0_subb] = CC_C,
+    [INDEX_op_setb_T0_subw] = CC_C,
+    [INDEX_op_setb_T0_subl] = CC_C,
+
+    [INDEX_op_setz_T0_subb] = CC_Z,
+    [INDEX_op_setz_T0_subw] = CC_Z,
+    [INDEX_op_setz_T0_subl] = CC_Z,
+
+    [INDEX_op_setbe_T0_subb] = CC_Z | CC_C,
+    [INDEX_op_setbe_T0_subw] = CC_Z | CC_C,
+    [INDEX_op_setbe_T0_subl] = CC_Z | CC_C,
+
+    [INDEX_op_sets_T0_subb] = CC_S,
+    [INDEX_op_sets_T0_subw] = CC_S,
+    [INDEX_op_sets_T0_subl] = CC_S,
+
+    [INDEX_op_setl_T0_subb] = CC_O | CC_S,
+    [INDEX_op_setl_T0_subw] = CC_O | CC_S,
+    [INDEX_op_setl_T0_subl] = CC_O | CC_S,
+
+    [INDEX_op_setle_T0_subb] = CC_O | CC_S | CC_Z,
+    [INDEX_op_setle_T0_subw] = CC_O | CC_S | CC_Z,
+    [INDEX_op_setle_T0_subl] = CC_O | CC_S | CC_Z,
+
+    [INDEX_op_movl_T0_eflags] = CC_OSZAPC,
+    [INDEX_op_cmc] = CC_C,
+    [INDEX_op_salc] = CC_C,
+
+    /* needed for correct flag optimisation before string ops */
+    [INDEX_op_jnz_ecxw] = CC_OSZAPC,
+    [INDEX_op_jnz_ecxl] = CC_OSZAPC,
+    [INDEX_op_jz_ecxw] = CC_OSZAPC,
+    [INDEX_op_jz_ecxl] = CC_OSZAPC,
+
+#ifdef TARGET_X86_64
+    [INDEX_op_jb_subq] = CC_C,
+    [INDEX_op_jz_subq] = CC_Z,
+    [INDEX_op_jbe_subq] = CC_Z | CC_C,
+    [INDEX_op_js_subq] = CC_S,
+    [INDEX_op_jl_subq] = CC_O | CC_S,
+    [INDEX_op_jle_subq] = CC_O | CC_S | CC_Z,
+
+    [INDEX_op_loopnzq] = CC_Z,
+    [INDEX_op_loopzq] = CC_Z,
+
+    [INDEX_op_setb_T0_subq] = CC_C,
+    [INDEX_op_setz_T0_subq] = CC_Z,
+    [INDEX_op_setbe_T0_subq] = CC_Z | CC_C,
+    [INDEX_op_sets_T0_subq] = CC_S,
+    [INDEX_op_setl_T0_subq] = CC_O | CC_S,
+    [INDEX_op_setle_T0_subq] = CC_O | CC_S | CC_Z,
+
+    [INDEX_op_jnz_ecxq] = CC_OSZAPC,
+    [INDEX_op_jz_ecxq] = CC_OSZAPC,
+#endif
+
+#define DEF_READF(SUFFIX)\
+    [INDEX_op_adcb ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    [INDEX_op_adcw ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    [INDEX_op_adcl ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    X86_64_DEF([INDEX_op_adcq ## SUFFIX ## _T0_T1_cc] = CC_C,)\
+    [INDEX_op_sbbb ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    [INDEX_op_sbbw ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    [INDEX_op_sbbl ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    X86_64_DEF([INDEX_op_sbbq ## SUFFIX ## _T0_T1_cc] = CC_C,)\
+\
+    [INDEX_op_rclb ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    [INDEX_op_rclw ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    [INDEX_op_rcll ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    X86_64_DEF([INDEX_op_rclq ## SUFFIX ## _T0_T1_cc] = CC_C,)\
+    [INDEX_op_rcrb ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    [INDEX_op_rcrw ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    [INDEX_op_rcrl ## SUFFIX ## _T0_T1_cc] = CC_C,\
+    X86_64_DEF([INDEX_op_rcrq ## SUFFIX ## _T0_T1_cc] = CC_C,)
+
+    DEF_READF( )
+    DEF_READF(_raw)
+#ifndef CONFIG_USER_ONLY
+    DEF_READF(_kernel)
+    DEF_READF(_user)
+#endif
+};
+
+/* flags written by an operation */
+static uint16_t opc_write_flags[NB_OPS] = {
+    [INDEX_op_update2_cc] = CC_OSZAPC,
+    [INDEX_op_update1_cc] = CC_OSZAPC,
+    [INDEX_op_cmpl_T0_T1_cc] = CC_OSZAPC,
+    [INDEX_op_update_neg_cc] = CC_OSZAPC,
+    /* subtle: due to the incl/decl implementation, C is used */
+    [INDEX_op_update_inc_cc] = CC_OSZAPC,
+    [INDEX_op_testl_T0_T1_cc] = CC_OSZAPC,
+
+    [INDEX_op_mulb_AL_T0] = CC_OSZAPC,
+    [INDEX_op_mulw_AX_T0] = CC_OSZAPC,
+    [INDEX_op_mull_EAX_T0] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_mulq_EAX_T0] = CC_OSZAPC,)
+    [INDEX_op_imulb_AL_T0] = CC_OSZAPC,
+    [INDEX_op_imulw_AX_T0] = CC_OSZAPC,
+    [INDEX_op_imull_EAX_T0] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_imulq_EAX_T0] = CC_OSZAPC,)
+    [INDEX_op_imulw_T0_T1] = CC_OSZAPC,
+    [INDEX_op_imull_T0_T1] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_imulq_T0_T1] = CC_OSZAPC,)
+
+    /* sse */
+    [INDEX_op_ucomiss] = CC_OSZAPC,
+    [INDEX_op_ucomisd] = CC_OSZAPC,
+    [INDEX_op_comiss] = CC_OSZAPC,
+    [INDEX_op_comisd] = CC_OSZAPC,
+
+    /* bcd */
+    [INDEX_op_aam] = CC_OSZAPC,
+    [INDEX_op_aad] = CC_OSZAPC,
+    [INDEX_op_aas] = CC_OSZAPC,
+    [INDEX_op_aaa] = CC_OSZAPC,
+    [INDEX_op_das] = CC_OSZAPC,
+    [INDEX_op_daa] = CC_OSZAPC,
+
+    [INDEX_op_movb_eflags_T0] = CC_S | CC_Z | CC_A | CC_P | CC_C,
+    [INDEX_op_movw_eflags_T0] = CC_OSZAPC,
+    [INDEX_op_movl_eflags_T0] = CC_OSZAPC,
+    [INDEX_op_movw_eflags_T0_io] = CC_OSZAPC,
+    [INDEX_op_movl_eflags_T0_io] = CC_OSZAPC,
+    [INDEX_op_movw_eflags_T0_cpl0] = CC_OSZAPC,
+    [INDEX_op_movl_eflags_T0_cpl0] = CC_OSZAPC,
+    [INDEX_op_clc] = CC_C,
+    [INDEX_op_stc] = CC_C,
+    [INDEX_op_cmc] = CC_C,
+
+    [INDEX_op_btw_T0_T1_cc] = CC_OSZAPC,
+    [INDEX_op_btl_T0_T1_cc] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_btq_T0_T1_cc] = CC_OSZAPC,)
+    [INDEX_op_btsw_T0_T1_cc] = CC_OSZAPC,
+    [INDEX_op_btsl_T0_T1_cc] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_btsq_T0_T1_cc] = CC_OSZAPC,)
+    [INDEX_op_btrw_T0_T1_cc] = CC_OSZAPC,
+    [INDEX_op_btrl_T0_T1_cc] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_btrq_T0_T1_cc] = CC_OSZAPC,)
+    [INDEX_op_btcw_T0_T1_cc] = CC_OSZAPC,
+    [INDEX_op_btcl_T0_T1_cc] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_btcq_T0_T1_cc] = CC_OSZAPC,)
+
+    [INDEX_op_bsfw_T0_cc] = CC_OSZAPC,
+    [INDEX_op_bsfl_T0_cc] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_bsfq_T0_cc] = CC_OSZAPC,)
+    [INDEX_op_bsrw_T0_cc] = CC_OSZAPC,
+    [INDEX_op_bsrl_T0_cc] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_bsrq_T0_cc] = CC_OSZAPC,)
+
+    [INDEX_op_cmpxchgb_T0_T1_EAX_cc] = CC_OSZAPC,
+    [INDEX_op_cmpxchgw_T0_T1_EAX_cc] = CC_OSZAPC,
+    [INDEX_op_cmpxchgl_T0_T1_EAX_cc] = CC_OSZAPC,
+    X86_64_DEF([INDEX_op_cmpxchgq_T0_T1_EAX_cc] = CC_OSZAPC,)
+
+    [INDEX_op_cmpxchg8b] = CC_Z,
+    [INDEX_op_lar] = CC_Z,
+    [INDEX_op_lsl] = CC_Z,
+    [INDEX_op_verr] = CC_Z,
+    [INDEX_op_verw] = CC_Z,
+    [INDEX_op_fcomi_ST0_FT0] = CC_Z | CC_P | CC_C,
+    [INDEX_op_fucomi_ST0_FT0] = CC_Z | CC_P | CC_C,
+
+#define DEF_WRITEF(SUFFIX)\
+    [INDEX_op_adcb ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_adcw ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_adcl ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_adcq ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,)\
+    [INDEX_op_sbbb ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_sbbw ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_sbbl ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_sbbq ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,)\
+\
+    [INDEX_op_rolb ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    [INDEX_op_rolw ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    [INDEX_op_roll ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    X86_64_DEF([INDEX_op_rolq ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,)\
+    [INDEX_op_rorb ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    [INDEX_op_rorw ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    [INDEX_op_rorl ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    X86_64_DEF([INDEX_op_rorq ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,)\
+\
+    [INDEX_op_rclb ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    [INDEX_op_rclw ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    [INDEX_op_rcll ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    X86_64_DEF([INDEX_op_rclq ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,)\
+    [INDEX_op_rcrb ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    [INDEX_op_rcrw ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    [INDEX_op_rcrl ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,\
+    X86_64_DEF([INDEX_op_rcrq ## SUFFIX ## _T0_T1_cc] = CC_O | CC_C,)\
+\
+    [INDEX_op_shlb ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_shlw ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_shll ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_shlq ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,)\
+\
+    [INDEX_op_shrb ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_shrw ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_shrl ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_shrq ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,)\
+\
+    [INDEX_op_sarb ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_sarw ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    [INDEX_op_sarl ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_sarq ## SUFFIX ## _T0_T1_cc] = CC_OSZAPC,)\
+\
+    [INDEX_op_shldw ## SUFFIX ## _T0_T1_ECX_cc] = CC_OSZAPC,\
+    [INDEX_op_shldl ## SUFFIX ## _T0_T1_ECX_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_shldq ## SUFFIX ## _T0_T1_ECX_cc] = CC_OSZAPC,)\
+    [INDEX_op_shldw ## SUFFIX ## _T0_T1_im_cc] = CC_OSZAPC,\
+    [INDEX_op_shldl ## SUFFIX ## _T0_T1_im_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_shldq ## SUFFIX ## _T0_T1_im_cc] = CC_OSZAPC,)\
+\
+    [INDEX_op_shrdw ## SUFFIX ## _T0_T1_ECX_cc] = CC_OSZAPC,\
+    [INDEX_op_shrdl ## SUFFIX ## _T0_T1_ECX_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_shrdq ## SUFFIX ## _T0_T1_ECX_cc] = CC_OSZAPC,)\
+    [INDEX_op_shrdw ## SUFFIX ## _T0_T1_im_cc] = CC_OSZAPC,\
+    [INDEX_op_shrdl ## SUFFIX ## _T0_T1_im_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_shrdq ## SUFFIX ## _T0_T1_im_cc] = CC_OSZAPC,)\
+\
+    [INDEX_op_cmpxchgb ## SUFFIX ## _T0_T1_EAX_cc] = CC_OSZAPC,\
+    [INDEX_op_cmpxchgw ## SUFFIX ## _T0_T1_EAX_cc] = CC_OSZAPC,\
+    [INDEX_op_cmpxchgl ## SUFFIX ## _T0_T1_EAX_cc] = CC_OSZAPC,\
+    X86_64_DEF([INDEX_op_cmpxchgq ## SUFFIX ## _T0_T1_EAX_cc] = CC_OSZAPC,)
+
+
+    DEF_WRITEF( )
+    DEF_WRITEF(_raw)
+#ifndef CONFIG_USER_ONLY
+    DEF_WRITEF(_kernel)
+    DEF_WRITEF(_user)
+#endif
+};
+
+/* simpler form of an operation if no flags need to be generated */
+static uint16_t opc_simpler[NB_OPS] = {
+    [INDEX_op_update2_cc] = INDEX_op_nop,
+    [INDEX_op_update1_cc] = INDEX_op_nop,
+    [INDEX_op_update_neg_cc] = INDEX_op_nop,
+#if 0
+    /* broken: CC_OP logic must be rewritten */
+    [INDEX_op_update_inc_cc] = INDEX_op_nop,
+#endif
+
+    [INDEX_op_shlb_T0_T1_cc] = INDEX_op_shlb_T0_T1,
+    [INDEX_op_shlw_T0_T1_cc] = INDEX_op_shlw_T0_T1,
+    [INDEX_op_shll_T0_T1_cc] = INDEX_op_shll_T0_T1,
+    X86_64_DEF([INDEX_op_shlq_T0_T1_cc] = INDEX_op_shlq_T0_T1,)
+
+    [INDEX_op_shrb_T0_T1_cc] = INDEX_op_shrb_T0_T1,
+    [INDEX_op_shrw_T0_T1_cc] = INDEX_op_shrw_T0_T1,
+    [INDEX_op_shrl_T0_T1_cc] = INDEX_op_shrl_T0_T1,
+    X86_64_DEF([INDEX_op_shrq_T0_T1_cc] = INDEX_op_shrq_T0_T1,)
+
+    [INDEX_op_sarb_T0_T1_cc] = INDEX_op_sarb_T0_T1,
+    [INDEX_op_sarw_T0_T1_cc] = INDEX_op_sarw_T0_T1,
+    [INDEX_op_sarl_T0_T1_cc] = INDEX_op_sarl_T0_T1,
+    X86_64_DEF([INDEX_op_sarq_T0_T1_cc] = INDEX_op_sarq_T0_T1,)
+
+#define DEF_SIMPLER(SUFFIX)\
+    [INDEX_op_rolb ## SUFFIX ## _T0_T1_cc] = INDEX_op_rolb ## SUFFIX ## _T0_T1,\
+    [INDEX_op_rolw ## SUFFIX ## _T0_T1_cc] = INDEX_op_rolw ## SUFFIX ## _T0_T1,\
+    [INDEX_op_roll ## SUFFIX ## _T0_T1_cc] = INDEX_op_roll ## SUFFIX ## _T0_T1,\
+    X86_64_DEF([INDEX_op_rolq ## SUFFIX ## _T0_T1_cc] = INDEX_op_rolq ## SUFFIX ## _T0_T1,)\
+\
+    [INDEX_op_rorb ## SUFFIX ## _T0_T1_cc] = INDEX_op_rorb ## SUFFIX ## _T0_T1,\
+    [INDEX_op_rorw ## SUFFIX ## _T0_T1_cc] = INDEX_op_rorw ## SUFFIX ## _T0_T1,\
+    [INDEX_op_rorl ## SUFFIX ## _T0_T1_cc] = INDEX_op_rorl ## SUFFIX ## _T0_T1,\
+    X86_64_DEF([INDEX_op_rorq ## SUFFIX ## _T0_T1_cc] = INDEX_op_rorq ## SUFFIX ## _T0_T1,)
+
+    DEF_SIMPLER( )
+    DEF_SIMPLER(_raw)
+#ifndef CONFIG_USER_ONLY
+    DEF_SIMPLER(_kernel)
+    DEF_SIMPLER(_user)
+#endif
+};
+
+void optimize_flags_init(void)
+{
+    int i;
+    /* put default values in arrays */
+    for(i = 0; i < NB_OPS; i++) {
+        if (opc_simpler[i] == 0)
+            opc_simpler[i] = i;
+    }
+}
+
+/* CPU flags computation optimization: we move backward thru the
+   generated code to see which flags are needed. The operation is
+   modified if suitable */
+static void optimize_flags(uint16_t *opc_buf, int opc_buf_len)
+{
+    uint16_t *opc_ptr;
+    int live_flags, write_flags, op;
+
+    opc_ptr = opc_buf + opc_buf_len;
+    /* live_flags contains the flags needed by the next instructions
+       in the code. At the end of the bloc, we consider that all the
+       flags are live. */
+    live_flags = CC_OSZAPC;
+    while (opc_ptr > opc_buf) {
+        op = *--opc_ptr;
+        /* if none of the flags written by the instruction is used,
+           then we can try to find a simpler instruction */
+        write_flags = opc_write_flags[op];
+        if ((live_flags & write_flags) == 0) {
+            *opc_ptr = opc_simpler[op];
+        }
+        /* compute the live flags before the instruction */
+        live_flags &= ~write_flags;
+        live_flags |= opc_read_flags[op];
+    }
+}
+
+/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
+   basic block 'tb'. If search_pc is TRUE, also generate PC
+   information for each intermediate instruction. */
+static inline int gen_intermediate_code_internal(CPUState *env,
+                                                 TranslationBlock *tb,
+                                                 int search_pc)
+{
+    DisasContext dc1, *dc = &dc1;
+    target_ulong pc_ptr;
+    uint16_t *gen_opc_end;
+    int flags, j, lj, cflags;
+    target_ulong pc_start;
+    target_ulong cs_base;
+
+    /* generate intermediate code */
+    pc_start = tb->pc;
+    cs_base = tb->cs_base;
+    flags = tb->flags;
+    cflags = tb->cflags;
+
+    dc->pe = (flags >> HF_PE_SHIFT) & 1;
+    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
+    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
+    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
+    dc->f_st = 0;
+    dc->vm86 = (flags >> VM_SHIFT) & 1;
+#ifdef VBOX_WITH_CALL_RECORD
+    dc->vme = !!(env->cr[4] & CR4_VME_MASK);
+    if (    !(env->state & CPU_RAW_RING0)
+        &&  (env->cr[0] & CR0_PG_MASK)
+        &&  !(env->eflags & X86_EFL_IF)
+        &&  dc->code32)
+        dc->record_call = 1;
+    else
+        dc->record_call = 0;
+#endif
+    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
+    dc->iopl = (flags >> IOPL_SHIFT) & 3;
+    dc->tf = (flags >> TF_SHIFT) & 1;
+    dc->singlestep_enabled = env->singlestep_enabled;
+    dc->cc_op = CC_OP_DYNAMIC;
+    dc->cs_base = cs_base;
+    dc->tb = tb;
+    dc->popl_esp_hack = 0;
+    /* select memory access functions */
+    dc->mem_index = 0;
+    if (flags & HF_SOFTMMU_MASK) {
+        if (dc->cpl == 3)
+            dc->mem_index = 2 * 4;
+        else
+            dc->mem_index = 1 * 4;
+    }
+    dc->cpuid_features = env->cpuid_features;
+    dc->cpuid_ext_features = env->cpuid_ext_features;
+    dc->cpuid_ext2_features = env->cpuid_ext2_features;
+    dc->cpuid_ext3_features = env->cpuid_ext3_features;
+#ifdef TARGET_X86_64
+    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
+    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
+#endif
+    dc->flags = flags;
+    dc->jmp_opt = !(dc->tf || env->singlestep_enabled ||
+                    (flags & HF_INHIBIT_IRQ_MASK)
+#ifndef CONFIG_SOFTMMU
+                    || (flags & HF_SOFTMMU_MASK)
+#endif
+                    );
+#if 0
+    /* check addseg logic */
+    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
+        printf("ERROR addseg\n");
+#endif
+
+    gen_opc_ptr = gen_opc_buf;
+    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opparam_ptr = gen_opparam_buf;
+    nb_gen_labels = 0;
+
+    dc->is_jmp = DISAS_NEXT;
+    pc_ptr = pc_start;
+    lj = -1;
+
+    for(;;) {
+        if (env->nb_breakpoints > 0) {
+            for(j = 0; j < env->nb_breakpoints; j++) {
+                if (env->breakpoints[j] == pc_ptr) {
+                    gen_debug(dc, pc_ptr - dc->cs_base);
+                    break;
+                }
+            }
+        }
+        if (search_pc) {
+            j = gen_opc_ptr - gen_opc_buf;
+            if (lj < j) {
+                lj++;
+                while (lj < j)
+                    gen_opc_instr_start[lj++] = 0;
+            }
+            gen_opc_pc[lj] = pc_ptr;
+            gen_opc_cc_op[lj] = dc->cc_op;
+            gen_opc_instr_start[lj] = 1;
+        }
+        pc_ptr = disas_insn(dc, pc_ptr);
+        /* stop translation if indicated */
+        if (dc->is_jmp)
+            break;
+
+#ifdef VBOX
+#ifdef DEBUG
+/*
+        if(cpu_check_code_raw(env, pc_ptr, env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK))) == ERROR_SUCCESS)
+        {
+            //should never happen as the jump to the patch code terminates the translation block
+            dprintf(("QEmu is about to execute instructions in our patch block at %08X!!\n", pc_ptr));
+        }
+*/
+#endif
+        if (env->state & CPU_EMULATE_SINGLE_INSTR)
+        {
+            env->state &= ~CPU_EMULATE_SINGLE_INSTR;
+            gen_jmp_im(pc_ptr - dc->cs_base);
+            gen_eob(dc);
+            break;
+        }
+#endif /* VBOX */
+
+        /* if single step mode, we generate only one instruction and
+           generate an exception */
+        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
+           the flag and abort the translation to give the irqs a
+           change to be happen */
+        if (dc->tf || dc->singlestep_enabled ||
+            (flags & HF_INHIBIT_IRQ_MASK) ||
+            (cflags & CF_SINGLE_INSN)) {
+            gen_jmp_im(pc_ptr - dc->cs_base);
+            gen_eob(dc);
+            break;
+        }
+        /* if too long translation, stop generation too */
+        if (gen_opc_ptr >= gen_opc_end ||
+            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32)) {
+            gen_jmp_im(pc_ptr - dc->cs_base);
+            gen_eob(dc);
+            break;
+        }
+    }
+    *gen_opc_ptr = INDEX_op_end;
+    /* we don't forget to fill the last values */
+    if (search_pc) {
+        j = gen_opc_ptr - gen_opc_buf;
+        lj++;
+        while (lj <= j)
+            gen_opc_instr_start[lj++] = 0;
+    }
+
+#ifdef DEBUG_DISAS
+    if (loglevel & CPU_LOG_TB_CPU) {
+        cpu_dump_state(env, logfile, fprintf, X86_DUMP_CCOP);
+    }
+    if (loglevel & CPU_LOG_TB_IN_ASM) {
+        int disas_flags;
+        fprintf(logfile, "----------------\n");
+        fprintf(logfile, "IN: %s\n", lookup_symbol(pc_start));
+#ifdef TARGET_X86_64
+        if (dc->code64)
+            disas_flags = 2;
+        else
+#endif
+            disas_flags = !dc->code32;
+	target_disas(logfile, pc_start, pc_ptr - pc_start, disas_flags);
+        fprintf(logfile, "\n");
+        if (loglevel & CPU_LOG_TB_OP) {
+            fprintf(logfile, "OP:\n");
+            dump_ops(gen_opc_buf, gen_opparam_buf);
+            fprintf(logfile, "\n");
+        }
+    }
+#endif
+
+    /* optimize flag computations */
+    optimize_flags(gen_opc_buf, gen_opc_ptr - gen_opc_buf);
+
+#ifdef DEBUG_DISAS
+    if (loglevel & CPU_LOG_TB_OP_OPT) {
+        fprintf(logfile, "AFTER FLAGS OPT:\n");
+        dump_ops(gen_opc_buf, gen_opparam_buf);
+        fprintf(logfile, "\n");
+    }
+#endif
+    if (!search_pc)
+        tb->size = pc_ptr - pc_start;
+    return 0;
+}
+
+int gen_intermediate_code(CPUState *env, TranslationBlock *tb)
+{
+    return gen_intermediate_code_internal(env, tb, 0);
+}
+
+int gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
+{
+    return gen_intermediate_code_internal(env, tb, 1);
+}
+
Index: /trunk/src/recompiler_new/tests/Makefile
===================================================================
--- /trunk/src/recompiler_new/tests/Makefile	(revision 13168)
+++ /trunk/src/recompiler_new/tests/Makefile	(revision 13168)
@@ -0,0 +1,100 @@
+-include ../config-host.mak
+
+CFLAGS=-Wall -O2 -g
+#CFLAGS+=-msse2
+LDFLAGS=
+
+ifeq ($(ARCH),i386)
+TESTS=linux-test testthread sha1-i386 test-i386 runcom
+endif
+ifeq ($(ARCH),x86_64)
+TESTS=test-x86_64
+endif
+TESTS+=sha1# test_path
+#TESTS+=test_path
+
+QEMU=../i386-user/qemu-i386
+
+all: $(TESTS)
+
+hello-i386: hello-i386.c
+	$(CC) -nostdlib $(CFLAGS) -static $(LDFLAGS) -o $@ $<
+	strip $@
+
+testthread: testthread.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< -lpthread
+
+test_path: test_path.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+	./$@ || { rm $@; exit 1; }
+
+# i386/x86_64 emulation test (test various opcodes) */
+test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \
+           test-i386.h test-i386-shift.h test-i386-muldiv.h
+	$(CC) $(CFLAGS) $(LDFLAGS) -static -o $@ \
+              test-i386.c test-i386-code16.S test-i386-vm86.S -lm
+
+test-x86_64: test-i386.c \
+           test-i386.h test-i386-shift.h test-i386-muldiv.h
+	$(CC) $(CFLAGS) $(LDFLAGS) -static -o $@ test-i386.c -lm
+
+ifeq ($(ARCH),i386)
+test: test-i386
+	./test-i386 > test-i386.ref
+else
+test:
+endif
+	$(QEMU) test-i386 > test-i386.out
+	@if diff -u test-i386.ref test-i386.out ; then echo "Auto Test OK"; fi
+ifeq ($(ARCH),i386)
+	$(QEMU) -no-code-copy test-i386 > test-i386.out
+	@if diff -u test-i386.ref test-i386.out ; then echo "Auto Test OK (no code copy)"; fi
+endif
+
+# generic Linux and CPU test
+linux-test: linux-test.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< -lm
+
+# speed test
+sha1-i386: sha1.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+sha1: sha1.c
+	$(HOST_CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+speed: sha1 sha1-i386
+	time ./sha1
+	time $(QEMU) ./sha1-i386
+
+# vm86 test
+runcom: runcom.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+# NOTE: -fomit-frame-pointer is currently needed : this is a bug in libqemu
+qruncom: qruncom.c ../i386-user/libqemu.a
+	$(CC) $(CFLAGS) -fomit-frame-pointer $(LDFLAGS) -I../target-i386 -I.. -I../i386-user -I../fpu \
+              -o $@ $< -L../i386-user -lqemu -lm
+
+# arm test
+hello-arm: hello-arm.o
+	arm-linux-ld -o $@ $<
+
+hello-arm.o: hello-arm.c
+	arm-linux-gcc -Wall -g -O2 -c -o $@ $<
+
+# MIPS test
+hello-mips: hello-mips.c
+	mips-linux-gnu-gcc -nostdlib -static -mno-abicalls -fno-PIC -mabi=32 -Wall -Wextra -g -O2 -o $@ $<
+
+hello-mipsel: hello-mips.c
+	mipsel-linux-gnu-gcc -nostdlib -static -mno-abicalls -fno-PIC -mabi=32 -Wall -Wextra -g -O2 -o $@ $<
+
+# XXX: find a way to compile easily a test for each arch
+test2:
+	@for arch in i386 arm armeb sparc ppc mips mipsel; do \
+           ../$${arch}-user/qemu-$${arch} $${arch}/ls -l linux-test.c ; \
+        done
+
+clean:
+	rm -f *~ *.o test-i386.out test-i386.ref \
+           test-x86_64.log test-x86_64.ref qruncom $(TESTS)
Index: /trunk/src/recompiler_new/tests/hello-arm.c
===================================================================
--- /trunk/src/recompiler_new/tests/hello-arm.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/hello-arm.c	(revision 13168)
@@ -0,0 +1,113 @@
+#define __NR_SYSCALL_BASE	0x900000
+#define __NR_exit1			(__NR_SYSCALL_BASE+  1)
+#define __NR_write			(__NR_SYSCALL_BASE+  4)
+
+#define __sys2(x) #x
+#define __sys1(x) __sys2(x)
+
+#ifndef __syscall
+#define __syscall(name) "swi\t" __sys1(__NR_##name) "\n\t"
+#endif
+
+#define __syscall_return(type, res)					\
+do {									\
+	return (type) (res);						\
+} while (0)
+
+#define _syscall0(type,name)						\
+type name(void) {							\
+  long __res;								\
+  __asm__ __volatile__ (						\
+  __syscall(name)							\
+  "mov %0,r0"								\
+  :"=r" (__res) : : "r0","lr");						\
+  __syscall_return(type,__res);						\
+}
+
+#define _syscall1(type,name,type1,arg1)					\
+type name(type1 arg1) {							\
+  long __res;								\
+  __asm__ __volatile__ (						\
+  "mov\tr0,%1\n\t"							\
+  __syscall(name)							\
+  "mov %0,r0"								\
+        : "=r" (__res)							\
+        : "r" ((long)(arg1))						\
+	: "r0","lr");							\
+  __syscall_return(type,__res);						\
+}
+
+#define _syscall2(type,name,type1,arg1,type2,arg2)			\
+type name(type1 arg1,type2 arg2) {					\
+  long __res;								\
+  __asm__ __volatile__ (						\
+  "mov\tr0,%1\n\t"							\
+  "mov\tr1,%2\n\t"							\
+  __syscall(name)							\
+  "mov\t%0,r0"								\
+        : "=r" (__res)							\
+        : "r" ((long)(arg1)),"r" ((long)(arg2))				\
+	: "r0","r1","lr");						\
+  __syscall_return(type,__res);						\
+}
+
+
+#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)		\
+type name(type1 arg1,type2 arg2,type3 arg3) {				\
+  long __res;								\
+  __asm__ __volatile__ (						\
+  "mov\tr0,%1\n\t"							\
+  "mov\tr1,%2\n\t"							\
+  "mov\tr2,%3\n\t"							\
+  __syscall(name)							\
+  "mov\t%0,r0"								\
+        : "=r" (__res)							\
+        : "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3))	\
+        : "r0","r1","r2","lr");						\
+  __syscall_return(type,__res);						\
+}
+
+
+#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)		\
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {				\
+  long __res;										\
+  __asm__ __volatile__ (								\
+  "mov\tr0,%1\n\t"									\
+  "mov\tr1,%2\n\t"									\
+  "mov\tr2,%3\n\t"									\
+  "mov\tr3,%4\n\t"									\
+  __syscall(name)									\
+  "mov\t%0,r0"										\
+  	: "=r" (__res)									\
+  	: "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3)),"r" ((long)(arg4))	\
+  	: "r0","r1","r2","r3","lr");							\
+  __syscall_return(type,__res);								\
+}
+  
+
+#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5)	\
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) {			\
+  long __res;										\
+  __asm__ __volatile__ (								\
+  "mov\tr0,%1\n\t"									\
+  "mov\tr1,%2\n\t"									\
+  "mov\tr2,%3\n\t"									\
+  "mov\tr3,%4\n\t"									\
+  "mov\tr4,%5\n\t"									\
+  __syscall(name)									\
+  "mov\t%0,r0"										\
+  	: "=r" (__res)									\
+  	: "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3)),"r" ((long)(arg4)),	\
+	  "r" ((long)(arg5))								\
+	: "r0","r1","r2","r3","r4","lr");						\
+  __syscall_return(type,__res);								\
+}
+
+_syscall1(int,exit1,int,status);
+_syscall3(int,write,int,fd,const char *,buf, int, len);
+
+void _start(void)
+{
+    write(1, "Hello World\n", 12);
+    exit1(0);
+}
Index: /trunk/src/recompiler_new/tests/hello-i386.c
===================================================================
--- /trunk/src/recompiler_new/tests/hello-i386.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/hello-i386.c	(revision 13168)
@@ -0,0 +1,26 @@
+#include <asm/unistd.h>
+
+extern inline volatile void exit(int status)
+{
+  int __res;
+  __asm__ volatile ("movl %%ecx,%%ebx\n"\
+		    "int $0x80" \
+		    :  "=a" (__res) : "0" (__NR_exit),"c" ((long)(status)));
+}
+
+extern inline int write(int fd, const char * buf, int len)
+{
+  int status;
+  __asm__ volatile ("pushl %%ebx\n"\
+		    "movl %%esi,%%ebx\n"\
+		    "int $0x80\n" \
+		    "popl %%ebx\n"\
+		    : "=a" (status) \
+		    : "0" (__NR_write),"S" ((long)(fd)),"c" ((long)(buf)),"d" ((long)(len)));
+}
+
+void _start(void)
+{
+    write(1, "Hello World\n", 12);
+    exit(0);
+}
Index: /trunk/src/recompiler_new/tests/linux-test.c
===================================================================
--- /trunk/src/recompiler_new/tests/linux-test.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/linux-test.c	(revision 13168)
@@ -0,0 +1,545 @@
+/*
+ *  linux and CPU test
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Sun GPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the General Public License version 2 (GPLv2) at this time for any software where
+ * a choice of GPL license versions is made available with the language indicating
+ * that GPLv2 or any later version may be used, or where a choice of which version
+ * of the GPL is applied is otherwise unspecified.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <utime.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sched.h>
+#include <dirent.h>
+#include <setjmp.h>
+#include <sys/shm.h>
+
+#define TESTPATH "/tmp/linux-test.tmp"
+#define TESTPORT 7654
+#define STACK_SIZE 16384
+
+void error1(const char *filename, int line, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    fprintf(stderr, "%s:%d: ", filename, line);
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+int __chk_error(const char *filename, int line, int ret)
+{
+    if (ret < 0) {
+        error1(filename, line, "%m (ret=%d, errno=%d)", 
+               ret, errno);
+    }
+    return ret;
+}
+
+#define error(fmt, args...) error1(__FILE__, __LINE__, fmt, ##args)
+
+#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret))
+
+/*******************************************************/
+
+#define FILE_BUF_SIZE 300
+
+void test_file(void)
+{
+    int fd, i, len, ret;
+    uint8_t buf[FILE_BUF_SIZE];
+    uint8_t buf2[FILE_BUF_SIZE];
+    uint8_t buf3[FILE_BUF_SIZE];
+    char cur_dir[1024];
+    struct stat st;
+    struct utimbuf tbuf;
+    struct iovec vecs[2];
+    DIR *dir;
+    struct dirent *de;
+
+    /* clean up, just in case */
+    unlink(TESTPATH "/file1");
+    unlink(TESTPATH "/file2");
+    unlink(TESTPATH "/file3");
+    rmdir(TESTPATH);
+
+    if (getcwd(cur_dir, sizeof(cur_dir)) == NULL)
+        error("getcwd");
+    
+    chk_error(mkdir(TESTPATH, 0755));
+    
+    chk_error(chdir(TESTPATH));
+    
+    /* open/read/write/close/readv/writev/lseek */
+
+    fd = chk_error(open("file1", O_WRONLY | O_TRUNC | O_CREAT, 0644));
+    for(i=0;i < FILE_BUF_SIZE; i++)
+        buf[i] = i;
+    len = chk_error(write(fd, buf, FILE_BUF_SIZE / 2));
+    if (len != (FILE_BUF_SIZE / 2))
+        error("write");
+    vecs[0].iov_base = buf + (FILE_BUF_SIZE / 2);
+    vecs[0].iov_len = 16;
+    vecs[1].iov_base = buf + (FILE_BUF_SIZE / 2) + 16;
+    vecs[1].iov_len = (FILE_BUF_SIZE / 2) - 16;
+    len = chk_error(writev(fd, vecs, 2));
+    if (len != (FILE_BUF_SIZE / 2))
+     error("writev");
+    chk_error(close(fd));
+
+    chk_error(rename("file1", "file2"));
+
+    fd = chk_error(open("file2", O_RDONLY));
+
+    len = chk_error(read(fd, buf2, FILE_BUF_SIZE));
+    if (len != FILE_BUF_SIZE)
+        error("read");
+    if (memcmp(buf, buf2, FILE_BUF_SIZE) != 0)
+        error("memcmp");
+    
+#define FOFFSET 16
+    ret = chk_error(lseek(fd, FOFFSET, SEEK_SET));
+    if (ret != 16)
+        error("lseek");
+    vecs[0].iov_base = buf3;
+    vecs[0].iov_len = 32;
+    vecs[1].iov_base = buf3 + 32;
+    vecs[1].iov_len = FILE_BUF_SIZE - FOFFSET - 32;
+    len = chk_error(readv(fd, vecs, 2));
+    if (len != FILE_BUF_SIZE - FOFFSET)
+        error("readv");
+    if (memcmp(buf + FOFFSET, buf3, FILE_BUF_SIZE - FOFFSET) != 0)
+        error("memcmp");
+    
+    chk_error(close(fd));
+
+    /* access */
+    chk_error(access("file2", R_OK));
+
+    /* stat/chmod/utime/truncate */
+
+    chk_error(chmod("file2", 0600));
+    tbuf.actime = 1001;
+    tbuf.modtime = 1000;
+    chk_error(truncate("file2", 100));
+    chk_error(utime("file2", &tbuf));
+    chk_error(stat("file2", &st));
+    if (st.st_size != 100)
+        error("stat size");
+    if (!S_ISREG(st.st_mode))
+        error("stat mode");
+    if ((st.st_mode & 0777) != 0600)
+        error("stat mode2");
+    if (st.st_atime != 1001 ||
+        st.st_mtime != 1000)
+        error("stat time");
+
+    chk_error(stat(TESTPATH, &st));
+    if (!S_ISDIR(st.st_mode))
+        error("stat mode");
+
+    /* fstat */
+    fd = chk_error(open("file2", O_RDWR));
+    chk_error(ftruncate(fd, 50));
+    chk_error(fstat(fd, &st));
+    chk_error(close(fd));
+    
+    if (st.st_size != 50)
+        error("stat size");
+    if (!S_ISREG(st.st_mode))
+        error("stat mode");
+    
+    /* symlink/lstat */
+    chk_error(symlink("file2", "file3"));
+    chk_error(lstat("file3", &st));
+    if (!S_ISLNK(st.st_mode))
+        error("stat mode");
+    
+    /* getdents */
+    dir = opendir(TESTPATH);
+    if (!dir)
+        error("opendir");
+    len = 0;
+    for(;;) {
+        de = readdir(dir);
+        if (!de)
+            break;
+        if (strcmp(de->d_name, ".") != 0 &&
+            strcmp(de->d_name, "..") != 0 &&
+            strcmp(de->d_name, "file2") != 0 &&
+            strcmp(de->d_name, "file3") != 0)
+            error("readdir");
+        len++;
+    }
+    closedir(dir);
+    if (len != 4)
+        error("readdir");
+
+    chk_error(unlink("file3"));
+    chk_error(unlink("file2"));
+    chk_error(chdir(cur_dir));
+    chk_error(rmdir(TESTPATH));
+}
+
+void test_fork(void)
+{
+    int pid, status;
+
+    pid = chk_error(fork());
+    if (pid == 0) {
+        /* child */
+        exit(2);
+    }
+    chk_error(waitpid(pid, &status, 0));
+    if (!WIFEXITED(status) || WEXITSTATUS(status) != 2)
+        error("waitpid status=0x%x", status);
+}
+
+void test_time(void)
+{
+    struct timeval tv, tv2;
+    struct timespec ts, rem;
+    struct rusage rusg1, rusg2;
+    int ti, i;
+
+    chk_error(gettimeofday(&tv, NULL));
+    rem.tv_sec = 1;
+    ts.tv_sec = 0;
+    ts.tv_nsec = 20 * 1000000;
+    chk_error(nanosleep(&ts, &rem));
+    if (rem.tv_sec != 1)
+        error("nanosleep");
+    chk_error(gettimeofday(&tv2, NULL));
+    ti = tv2.tv_sec - tv.tv_sec;
+    if (ti >= 2)
+        error("gettimeofday");
+    
+    chk_error(getrusage(RUSAGE_SELF, &rusg1));
+    for(i = 0;i < 10000; i++);
+    chk_error(getrusage(RUSAGE_SELF, &rusg2));
+    if ((rusg2.ru_utime.tv_sec - rusg1.ru_utime.tv_sec) < 0 ||
+        (rusg2.ru_stime.tv_sec - rusg1.ru_stime.tv_sec) < 0)
+        error("getrusage");
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+    int c;
+    char *q = buf;
+
+    if (buf_size <= 0)
+        return;
+
+    for(;;) {
+        c = *str++;
+        if (c == 0 || q >= buf + buf_size - 1)
+            break;
+        *q++ = c;
+    }
+    *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+    int len;
+    len = strlen(buf);
+    if (len < buf_size) 
+        pstrcpy(buf + len, buf_size - len, s);
+    return buf;
+}
+
+int server_socket(void)
+{
+    int val, fd;
+    struct sockaddr_in sockaddr;
+
+    /* server socket */
+    fd = chk_error(socket(PF_INET, SOCK_STREAM, 0));
+
+    val = 1;
+    chk_error(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)));
+
+    sockaddr.sin_family = AF_INET;
+    sockaddr.sin_port = htons(TESTPORT);
+    sockaddr.sin_addr.s_addr = 0;
+    chk_error(bind(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)));
+    chk_error(listen(fd, 0));
+    return fd;
+
+}
+
+int client_socket(void)
+{
+    int fd;
+    struct sockaddr_in sockaddr;
+
+    /* server socket */
+    fd = chk_error(socket(PF_INET, SOCK_STREAM, 0));
+    sockaddr.sin_family = AF_INET;
+    sockaddr.sin_port = htons(TESTPORT);
+    inet_aton("127.0.0.1", &sockaddr.sin_addr);
+    chk_error(connect(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)));
+    return fd;
+}
+
+const char socket_msg[] = "hello socket\n";
+
+void test_socket(void)
+{
+    int server_fd, client_fd, fd, pid, ret, val;
+    struct sockaddr_in sockaddr;
+    socklen_t len;
+    char buf[512];
+
+    server_fd = server_socket();
+
+    /* test a few socket options */
+    len = sizeof(val);
+    chk_error(getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &val, &len));
+    if (val != SOCK_STREAM)
+        error("getsockopt");
+    
+    pid = chk_error(fork());
+    if (pid == 0) {
+        client_fd = client_socket();
+        send(client_fd, socket_msg, sizeof(socket_msg), 0);
+        close(client_fd);
+        exit(0);
+    }
+    len = sizeof(sockaddr);
+    fd = chk_error(accept(server_fd, (struct sockaddr *)&sockaddr, &len));
+
+    ret = chk_error(recv(fd, buf, sizeof(buf), 0));
+    if (ret != sizeof(socket_msg))
+        error("recv");
+    if (memcmp(buf, socket_msg, sizeof(socket_msg)) != 0)
+        error("socket_msg");
+    chk_error(close(fd));
+    chk_error(close(server_fd));
+}
+
+#define WCOUNT_MAX 512
+
+void test_pipe(void)
+{
+    fd_set rfds, wfds;
+    int fds[2], fd_max, ret;
+    uint8_t ch;
+    int wcount, rcount;
+
+    chk_error(pipe(fds));
+    chk_error(fcntl(fds[0], F_SETFL, O_NONBLOCK));
+    chk_error(fcntl(fds[1], F_SETFL, O_NONBLOCK));
+    wcount = 0;
+    rcount = 0;
+    for(;;) {
+        FD_ZERO(&rfds);
+        fd_max = fds[0];
+        FD_SET(fds[0], &rfds);
+
+        FD_ZERO(&wfds);
+        FD_SET(fds[1], &wfds);
+        if (fds[1] > fd_max)
+            fd_max = fds[1];
+
+        ret = chk_error(select(fd_max + 1, &rfds, &wfds, NULL, NULL));
+        if (ret > 0) {
+            if (FD_ISSET(fds[0], &rfds)) {
+                chk_error(read(fds[0], &ch, 1));
+                rcount++;
+                if (rcount >= WCOUNT_MAX)
+                    break;
+            }
+            if (FD_ISSET(fds[1], &wfds)) {
+                ch = 'a';
+                chk_error(write(fds[0], &ch, 1));
+                wcount++;
+            }
+        }
+    }
+    chk_error(close(fds[0]));
+    chk_error(close(fds[1]));
+}
+
+int thread1_res;
+int thread2_res;
+
+int thread1_func(void *arg)
+{
+    int i;
+    for(i=0;i<5;i++) {
+        thread1_res++;
+        usleep(10 * 1000);
+    }
+    return 0;
+}
+
+int thread2_func(void *arg)
+{
+    int i;
+    for(i=0;i<6;i++) {
+        thread2_res++;
+        usleep(10 * 1000);
+    }
+    return 0;
+}
+
+void test_clone(void)
+{
+    uint8_t *stack1, *stack2;
+    int pid1, pid2, status1, status2;
+
+    stack1 = malloc(STACK_SIZE);
+    pid1 = chk_error(clone(thread1_func, stack1 + STACK_SIZE, 
+                           CLONE_VM | CLONE_FS | CLONE_FILES | SIGCHLD, "hello1"));
+
+    stack2 = malloc(STACK_SIZE);
+    pid2 = chk_error(clone(thread2_func, stack2 + STACK_SIZE, 
+                           CLONE_VM | CLONE_FS | CLONE_FILES | SIGCHLD, "hello2"));
+
+    while (waitpid(pid1, &status1, 0) != pid1);
+    while (waitpid(pid2, &status2, 0) != pid2);
+    if (thread1_res != 5 ||
+        thread2_res != 6)
+        error("clone");
+}
+
+/***********************************/
+
+volatile int alarm_count;
+jmp_buf jmp_env;
+
+void sig_alarm(int sig)
+{
+    if (sig != SIGALRM)
+        error("signal");
+    alarm_count++;
+}
+
+void sig_segv(int sig, siginfo_t *info, void *puc)
+{
+    if (sig != SIGSEGV)
+        error("signal");
+    longjmp(jmp_env, 1);
+}
+
+void test_signal(void)
+{
+    struct sigaction act;
+    struct itimerval it, oit;
+
+    /* timer test */
+
+    alarm_count = 0;
+
+    act.sa_handler = sig_alarm;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = 0;
+    chk_error(sigaction(SIGALRM, &act, NULL));
+    
+    it.it_interval.tv_sec = 0;
+    it.it_interval.tv_usec = 10 * 1000;
+    it.it_value.tv_sec = 0;
+    it.it_value.tv_usec = 10 * 1000;
+    chk_error(setitimer(ITIMER_REAL, &it, NULL));
+    chk_error(getitimer(ITIMER_REAL, &oit));
+    if (oit.it_value.tv_sec != it.it_value.tv_sec ||
+        oit.it_value.tv_usec != it.it_value.tv_usec)
+        error("itimer");
+    
+    while (alarm_count < 5) {
+        usleep(10 * 1000);
+    }
+
+    it.it_interval.tv_sec = 0;
+    it.it_interval.tv_usec = 0;
+    it.it_value.tv_sec = 0;
+    it.it_value.tv_usec = 0;
+    memset(&oit, 0xff, sizeof(oit));
+    chk_error(setitimer(ITIMER_REAL, &it, &oit));
+    if (oit.it_value.tv_sec != 0 ||
+        oit.it_value.tv_usec != 10 * 1000)
+        error("setitimer");
+
+    /* SIGSEGV test */
+    act.sa_sigaction = sig_segv;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO;
+    chk_error(sigaction(SIGSEGV, &act, NULL));
+    if (setjmp(jmp_env) == 0) {
+        *(uint8_t *)0 = 0;
+    }
+    
+    act.sa_handler = SIG_DFL;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = 0;
+    chk_error(sigaction(SIGSEGV, &act, NULL));
+}
+
+#define SHM_SIZE 32768
+
+void test_shm(void)
+{
+    void *ptr;
+    int shmid;
+
+    shmid = chk_error(shmget(IPC_PRIVATE, SHM_SIZE, IPC_CREAT | 0777));
+    ptr = shmat(shmid, NULL, 0);
+    if (!ptr)
+        error("shmat");
+
+    memset(ptr, 0, SHM_SIZE);
+
+    chk_error(shmctl(shmid, IPC_RMID, 0));
+    chk_error(shmdt(ptr));
+}
+
+int main(int argc, char **argv)
+{
+    test_file();
+    test_fork();
+    test_time();
+    test_socket();
+    //    test_clone();
+    test_signal();
+    test_shm();
+    return 0;
+}
Index: /trunk/src/recompiler_new/tests/qruncom.c
===================================================================
--- /trunk/src/recompiler_new/tests/qruncom.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/qruncom.c	(revision 13168)
@@ -0,0 +1,327 @@
+/*
+ * Example of use of user mode libqemu: launch a basic .com DOS
+ * executable
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <malloc.h>
+
+#include "cpu.h"
+
+//#define SIGTEST
+
+void cpu_outb(CPUState *env, int addr, int val)
+{
+    fprintf(stderr, "outb: port=0x%04x, data=%02x\n", addr, val);
+}
+
+void cpu_outw(CPUState *env, int addr, int val)
+{
+    fprintf(stderr, "outw: port=0x%04x, data=%04x\n", addr, val);
+}
+
+void cpu_outl(CPUState *env, int addr, int val)
+{
+    fprintf(stderr, "outl: port=0x%04x, data=%08x\n", addr, val);
+}
+
+int cpu_inb(CPUState *env, int addr)
+{
+    fprintf(stderr, "inb: port=0x%04x\n", addr);
+    return 0;
+}
+
+int cpu_inw(CPUState *env, int addr)
+{
+    fprintf(stderr, "inw: port=0x%04x\n", addr);
+    return 0;
+}
+
+int cpu_inl(CPUState *env, int addr)
+{
+    fprintf(stderr, "inl: port=0x%04x\n", addr);
+    return 0;
+}
+
+int cpu_get_pic_interrupt(CPUState *env)
+{
+    return -1;
+}
+
+uint64_t cpu_get_tsc(CPUState *env)
+{
+    return 0;
+}
+
+static void set_gate(void *ptr, unsigned int type, unsigned int dpl, 
+                     unsigned long addr, unsigned int sel)
+{
+    unsigned int e1, e2;
+    e1 = (addr & 0xffff) | (sel << 16);
+    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
+    stl((uint8_t *)ptr, e1);
+    stl((uint8_t *)ptr + 4, e2);
+}
+
+uint64_t idt_table[256];
+
+/* only dpl matters as we do only user space emulation */
+static void set_idt(int n, unsigned int dpl)
+{
+    set_gate(idt_table + n, 0, dpl, 0, 0);
+}
+
+void qemu_free(void *ptr)
+{
+    free(ptr);
+}
+
+void *qemu_malloc(size_t size)
+{
+    return malloc(size);
+}
+
+void *qemu_mallocz(size_t size)
+{
+    void *ptr;
+    ptr = qemu_malloc(size);
+    if (!ptr)
+        return NULL;
+    memset(ptr, 0, size);
+    return ptr;
+}
+
+void *qemu_vmalloc(size_t size)
+{
+    return memalign(4096, size);
+}
+
+void qemu_vfree(void *ptr)
+{
+    free(ptr);
+}
+
+void qemu_printf(const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vprintf(fmt, ap);
+    va_end(ap);
+}
+
+/* XXX: this is a bug in helper2.c */
+int errno;
+
+/**********************************************/
+
+#define COM_BASE_ADDR    0x10100
+
+void usage(void)
+{
+    printf("qruncom version 0.1 (c) 2003 Fabrice Bellard\n"
+           "usage: qruncom file.com\n"
+           "user mode libqemu demo: run simple .com DOS executables\n");
+    exit(1);
+}
+
+static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg)
+{
+    return (uint8_t *)((seg << 4) + (reg & 0xffff));
+}
+
+static inline void pushw(CPUState *env, int val)
+{
+    env->regs[R_ESP] = (env->regs[R_ESP] & ~0xffff) | ((env->regs[R_ESP] - 2) & 0xffff);
+    *(uint16_t *)seg_to_linear(env->segs[R_SS].selector, env->regs[R_ESP]) = val;
+}
+
+static void host_segv_handler(int host_signum, siginfo_t *info, 
+                              void *puc)
+{
+    if (cpu_signal_handler(host_signum, info, puc)) {
+        return;
+    }
+    abort();
+}
+
+int main(int argc, char **argv)
+{
+    uint8_t *vm86_mem;
+    const char *filename;
+    int fd, ret, seg;
+    CPUState *env;
+
+    if (argc != 2)
+        usage();
+    filename = argv[1];
+    
+    vm86_mem = mmap((void *)0x00000000, 0x110000, 
+                    PROT_WRITE | PROT_READ | PROT_EXEC, 
+                    MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+    if (vm86_mem == MAP_FAILED) {
+        perror("mmap");
+        exit(1);
+    }
+
+    /* load the MSDOS .com executable */
+    fd = open(filename, O_RDONLY);
+    if (fd < 0) {
+        perror(filename);
+        exit(1);
+    }
+    ret = read(fd, vm86_mem + COM_BASE_ADDR, 65536 - 256);
+    if (ret < 0) {
+        perror("read");
+        exit(1);
+    }
+    close(fd);
+
+    /* install exception handler for CPU emulator */
+    {
+        struct sigaction act;
+        
+        sigfillset(&act.sa_mask);
+        act.sa_flags = SA_SIGINFO;
+        //        act.sa_flags |= SA_ONSTACK;
+
+        act.sa_sigaction = host_segv_handler;
+        sigaction(SIGSEGV, &act, NULL);
+        sigaction(SIGBUS, &act, NULL);
+#if defined (TARGET_I386) && defined(USE_CODE_COPY)
+        sigaction(SIGFPE, &act, NULL);
+#endif
+    }
+
+    //    cpu_set_log(CPU_LOG_TB_IN_ASM | CPU_LOG_TB_OUT_ASM | CPU_LOG_EXEC);
+
+    env = cpu_init();
+
+    /* disable code copy to simplify debugging */
+    code_copy_enabled = 0;
+
+    /* set user mode state (XXX: should be done automatically by
+       cpu_init ?) */
+    env->user_mode_only = 1;
+
+    cpu_x86_set_cpl(env, 3);
+
+    env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
+    /* NOTE: hflags duplicates some of the virtual CPU state */
+    env->hflags |= HF_PE_MASK | VM_MASK;
+
+    /* flags setup : we activate the IRQs by default as in user
+       mode. We also activate the VM86 flag to run DOS code */
+    env->eflags |= IF_MASK | VM_MASK;
+    
+    /* init basic registers */
+    env->eip = 0x100;
+    env->regs[R_ESP] = 0xfffe;
+    seg = (COM_BASE_ADDR - 0x100) >> 4;
+
+    cpu_x86_load_seg_cache(env, R_CS, seg, 
+                           (seg << 4), 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_SS, seg, 
+                           (seg << 4), 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_DS, seg, 
+                           (seg << 4), 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_ES, seg, 
+                           (seg << 4), 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_FS, seg, 
+                           (seg << 4), 0xffff, 0);
+    cpu_x86_load_seg_cache(env, R_GS, seg, 
+                           (seg << 4), 0xffff, 0);
+
+    /* exception support */
+    env->idt.base = (unsigned long)idt_table;
+    env->idt.limit = sizeof(idt_table) - 1;
+    set_idt(0, 0);
+    set_idt(1, 0);
+    set_idt(2, 0);
+    set_idt(3, 3);
+    set_idt(4, 3);
+    set_idt(5, 3);
+    set_idt(6, 0);
+    set_idt(7, 0);
+    set_idt(8, 0);
+    set_idt(9, 0);
+    set_idt(10, 0);
+    set_idt(11, 0);
+    set_idt(12, 0);
+    set_idt(13, 0);
+    set_idt(14, 0);
+    set_idt(15, 0);
+    set_idt(16, 0);
+    set_idt(17, 0);
+    set_idt(18, 0);
+    set_idt(19, 0);
+        
+    /* put return code */
+    *seg_to_linear(env->segs[R_CS].selector, 0) = 0xb4; /* mov ah, $0 */
+    *seg_to_linear(env->segs[R_CS].selector, 1) = 0x00;
+    *seg_to_linear(env->segs[R_CS].selector, 2) = 0xcd; /* int $0x21 */
+    *seg_to_linear(env->segs[R_CS].selector, 3) = 0x21;
+    pushw(env, 0x0000);
+
+    /* the value of these registers seem to be assumed by pi_10.com */
+    env->regs[R_ESI] = 0x100;
+    env->regs[R_ECX] = 0xff;
+    env->regs[R_EBP] = 0x0900;
+    env->regs[R_EDI] = 0xfffe;
+
+    /* inform the emulator of the mmaped memory */
+    page_set_flags(0x00000000, 0x110000, 
+                   PAGE_WRITE | PAGE_READ | PAGE_EXEC | PAGE_VALID);
+
+    for(;;) {
+        ret = cpu_x86_exec(env);
+        switch(ret) {
+        case EXCP0D_GPF:
+            {
+                int int_num, ah;
+                int_num = *(uint8_t *)(env->segs[R_CS].base + env->eip + 1);
+                if (int_num != 0x21)
+                    goto unknown_int;
+                ah = (env->regs[R_EAX] >> 8) & 0xff;
+                switch(ah) {
+                case 0x00: /* exit */
+                    exit(0);
+                case 0x02: /* write char */
+                    {
+                        uint8_t c = env->regs[R_EDX];
+                        write(1, &c, 1);
+                    }
+                    break;
+                case 0x09: /* write string */
+                    {
+                        uint8_t c;
+                        for(;;) {
+                            c = *seg_to_linear(env->segs[R_DS].selector, env->regs[R_EAX]);
+                            if (c == '$')
+                                break;
+                            write(1, &c, 1);
+                        }
+                        env->regs[R_EAX] = (env->regs[R_EAX] & ~0xff) | '$';
+                    }
+                    break;
+                default:
+                unknown_int:
+                    fprintf(stderr, "unsupported int 0x%02x\n", int_num);
+                    cpu_dump_state(env, stderr, fprintf, 0);
+                    //                    exit(1);
+                }
+                env->eip += 2;
+            }
+            break;
+        default:
+            fprintf(stderr, "unhandled cpu_exec return code (0x%x)\n", ret);
+            cpu_dump_state(env, stderr, fprintf, 0);
+            exit(1);
+        }
+    }
+}
Index: /trunk/src/recompiler_new/tests/runcom.c
===================================================================
--- /trunk/src/recompiler_new/tests/runcom.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/runcom.c	(revision 13168)
@@ -0,0 +1,195 @@
+/*
+ * Simple example of use of vm86: launch a basic .com DOS executable
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <signal.h>
+
+#include <linux/unistd.h>
+#include <asm/vm86.h>
+
+//#define SIGTEST
+
+#undef __syscall_return
+#define __syscall_return(type, res) \
+do { \
+	return (type) (res); \
+} while (0)
+
+_syscall2(int, vm86, int, func, struct vm86plus_struct *, v86)
+
+#define COM_BASE_ADDR    0x10100
+
+void usage(void)
+{
+    printf("runcom version 0.1 (c) 2003 Fabrice Bellard\n"
+           "usage: runcom file.com\n"
+           "VM86 Run simple .com DOS executables (linux vm86 test mode)\n");
+    exit(1);
+}
+
+static inline void set_bit(uint8_t *a, unsigned int bit)
+{
+    a[bit / 8] |= (1 << (bit % 8));
+}
+
+static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg)
+{
+    return (uint8_t *)((seg << 4) + (reg & 0xffff));
+}
+
+static inline void pushw(struct vm86_regs *r, int val)
+{
+    r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff);
+    *(uint16_t *)seg_to_linear(r->ss, r->esp) = val;
+}
+
+void dump_regs(struct vm86_regs *r)
+{
+    fprintf(stderr, 
+            "EAX=%08lx EBX=%08lx ECX=%08lx EDX=%08lx\n"
+            "ESI=%08lx EDI=%08lx EBP=%08lx ESP=%08lx\n"
+            "EIP=%08lx EFL=%08lx\n"
+            "CS=%04x DS=%04x ES=%04x SS=%04x FS=%04x GS=%04x\n",
+            r->eax, r->ebx, r->ecx, r->edx, r->esi, r->edi, r->ebp, r->esp,
+            r->eip, r->eflags,
+            r->cs, r->ds, r->es, r->ss, r->fs, r->gs);
+}
+
+#ifdef SIGTEST
+void alarm_handler(int sig)
+{
+    fprintf(stderr, "alarm signal=%d\n", sig);
+    alarm(1);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+    uint8_t *vm86_mem;
+    const char *filename;
+    int fd, ret, seg;
+    struct vm86plus_struct ctx;
+    struct vm86_regs *r;
+
+    if (argc != 2)
+        usage();
+    filename = argv[1];
+    
+    vm86_mem = mmap((void *)0x00000000, 0x110000, 
+                    PROT_WRITE | PROT_READ | PROT_EXEC, 
+                    MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+    if (vm86_mem == MAP_FAILED) {
+        perror("mmap");
+        exit(1);
+    }
+#ifdef SIGTEST
+    {
+        struct sigaction act;
+
+        act.sa_handler = alarm_handler;
+        sigemptyset(&act.sa_mask);
+        act.sa_flags = 0;
+        sigaction(SIGALRM, &act, NULL);
+        alarm(1);
+    }
+#endif
+
+    /* load the MSDOS .com executable */
+    fd = open(filename, O_RDONLY);
+    if (fd < 0) {
+        perror(filename);
+        exit(1);
+    }
+    ret = read(fd, vm86_mem + COM_BASE_ADDR, 65536 - 256);
+    if (ret < 0) {
+        perror("read");
+        exit(1);
+    }
+    close(fd);
+
+    memset(&ctx, 0, sizeof(ctx));
+    /* init basic registers */
+    r = &ctx.regs;
+    r->eip = 0x100;
+    r->esp = 0xfffe;
+    seg = (COM_BASE_ADDR - 0x100) >> 4;
+    r->cs = seg;
+    r->ss = seg;
+    r->ds = seg;
+    r->es = seg;
+    r->fs = seg;
+    r->gs = seg;
+    r->eflags = VIF_MASK;
+
+    /* put return code */
+    set_bit((uint8_t *)&ctx.int_revectored, 0x21);
+    *seg_to_linear(r->cs, 0) = 0xb4; /* mov ah, $0 */
+    *seg_to_linear(r->cs, 1) = 0x00;
+    *seg_to_linear(r->cs, 2) = 0xcd; /* int $0x21 */
+    *seg_to_linear(r->cs, 3) = 0x21;
+    pushw(&ctx.regs, 0x0000);
+
+    /* the value of these registers seem to be assumed by pi_10.com */
+    r->esi = 0x100;
+    r->ecx = 0xff;
+    r->ebp = 0x0900;
+    r->edi = 0xfffe;
+
+    for(;;) {
+        ret = vm86(VM86_ENTER, &ctx);
+        switch(VM86_TYPE(ret)) {
+        case VM86_INTx:
+            {
+                int int_num, ah;
+                
+                int_num = VM86_ARG(ret);
+                if (int_num != 0x21)
+                    goto unknown_int;
+                ah = (r->eax >> 8) & 0xff;
+                switch(ah) {
+                case 0x00: /* exit */
+                    exit(0);
+                case 0x02: /* write char */
+                    {
+                        uint8_t c = r->edx;
+                        write(1, &c, 1);
+                    }
+                    break;
+                case 0x09: /* write string */
+                    {
+                        uint8_t c;
+                        for(;;) {
+                            c = *seg_to_linear(r->ds, r->edx);
+                            if (c == '$')
+                                break;
+                            write(1, &c, 1);
+                        }
+                        r->eax = (r->eax & ~0xff) | '$';
+                    }
+                    break;
+                default:
+                unknown_int:
+                    fprintf(stderr, "unsupported int 0x%02x\n", int_num);
+                    dump_regs(&ctx.regs);
+                    //                    exit(1);
+                }
+            }
+            break;
+        case VM86_SIGNAL:
+            /* a signal came, we just ignore that */
+            break;
+        case VM86_STI:
+            break;
+        default:
+            fprintf(stderr, "unhandled vm86 return code (0x%x)\n", ret);
+            dump_regs(&ctx.regs);
+            exit(1);
+        }
+    }
+}
Index: /trunk/src/recompiler_new/tests/sha1.c
===================================================================
--- /trunk/src/recompiler_new/tests/sha1.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/sha1.c	(revision 13168)
@@ -0,0 +1,242 @@
+
+/* from valgrind tests */
+
+/* ================ sha1.c ================ */
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+  A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+  84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+  34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
+/* #define SHA1HANDSOFF * Copies data before messing with it. */
+
+#define SHA1HANDSOFF
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>	/* for u_int*_t */
+
+/* ================ sha1.h ================ */
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+*/
+
+typedef struct {
+    u_int32_t state[5];
+    u_int32_t count[2];
+    unsigned char buffer[64];
+} SHA1_CTX;
+
+void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64]);
+void SHA1Init(SHA1_CTX* context);
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len);
+void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
+/* ================ end of sha1.h ================ */
+#include <endian.h>
+
+#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
+    |(rol(block->l[i],8)&0x00FF00FF))
+#elif BYTE_ORDER == BIG_ENDIAN
+#define blk0(i) block->l[i]
+#else
+#error "Endianness not defined!"
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
+    ^block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+
+void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64])
+{
+u_int32_t a, b, c, d, e;
+typedef union {
+    unsigned char c[64];
+    u_int32_t l[16];
+} CHAR64LONG16;
+#ifdef SHA1HANDSOFF
+CHAR64LONG16 block[1];  /* use array to appear as a pointer */
+    memcpy(block, buffer, 64);
+#else
+    /* The following had better never be used because it causes the
+     * pointer-to-const buffer to be cast into a pointer to non-const.
+     * And the result is written through.  I threw a "const" in, hoping
+     * this will cause a diagnostic.
+     */
+CHAR64LONG16* block = (const CHAR64LONG16*)buffer;
+#endif
+    /* Copy context->state[] to working vars */
+    a = state[0];
+    b = state[1];
+    c = state[2];
+    d = state[3];
+    e = state[4];
+    /* 4 rounds of 20 operations each. Loop unrolled. */
+    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+    /* Add the working vars back into context.state[] */
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+    state[4] += e;
+    /* Wipe variables */
+    a = b = c = d = e = 0;
+#ifdef SHA1HANDSOFF
+    memset(block, '\0', sizeof(block));
+#endif
+}
+
+
+/* SHA1Init - Initialize new context */
+
+void SHA1Init(SHA1_CTX* context)
+{
+    /* SHA1 initialization constants */
+    context->state[0] = 0x67452301;
+    context->state[1] = 0xEFCDAB89;
+    context->state[2] = 0x98BADCFE;
+    context->state[3] = 0x10325476;
+    context->state[4] = 0xC3D2E1F0;
+    context->count[0] = context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len)
+{
+u_int32_t i;
+u_int32_t j;
+
+    j = context->count[0];
+    if ((context->count[0] += len << 3) < j)
+	context->count[1]++;
+    context->count[1] += (len>>29);
+    j = (j >> 3) & 63;
+    if ((j + len) > 63) {
+        memcpy(&context->buffer[j], data, (i = 64-j));
+        SHA1Transform(context->state, context->buffer);
+        for ( ; i + 63 < len; i += 64) {
+            SHA1Transform(context->state, &data[i]);
+        }
+        j = 0;
+    }
+    else i = 0;
+    memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+
+void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
+{
+unsigned i;
+unsigned char finalcount[8];
+unsigned char c;
+
+#if 0	/* untested "improvement" by DHR */
+    /* Convert context->count to a sequence of bytes
+     * in finalcount.  Second element first, but
+     * big-endian order within element.
+     * But we do it all backwards.
+     */
+    unsigned char *fcp = &finalcount[8];
+
+    for (i = 0; i < 2; i++)
+    {
+	u_int32_t t = context->count[i];
+	int j;
+
+	for (j = 0; j < 4; t >>= 8, j++)
+	    *--fcp = (unsigned char) t
+    }
+#else
+    for (i = 0; i < 8; i++) {
+        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
+         >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */
+    }
+#endif
+    c = 0200;
+    SHA1Update(context, &c, 1);
+    while ((context->count[0] & 504) != 448) {
+	c = 0000;
+        SHA1Update(context, &c, 1);
+    }
+    SHA1Update(context, finalcount, 8);  /* Should cause a SHA1Transform() */
+    for (i = 0; i < 20; i++) {
+        digest[i] = (unsigned char)
+         ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+    }
+    /* Wipe variables */
+    memset(context, '\0', sizeof(*context));
+    memset(&finalcount, '\0', sizeof(finalcount));
+}
+/* ================ end of sha1.c ================ */
+
+#define BUFSIZE 4096
+
+int
+main(int argc, char **argv)
+{
+    SHA1_CTX ctx;
+    unsigned char hash[20], buf[BUFSIZE];
+    int i;
+
+    for(i=0;i<BUFSIZE;i++)
+        buf[i] = i;
+
+    SHA1Init(&ctx);
+    for(i=0;i<1000;i++)
+        SHA1Update(&ctx, buf, BUFSIZE);
+    SHA1Final(hash, &ctx);
+
+    printf("SHA1=");
+    for(i=0;i<20;i++)
+        printf("%02x", hash[i]);
+    printf("\n");
+    return 0;
+}
+
+
Index: /trunk/src/recompiler_new/tests/test-i386-code16.S
===================================================================
--- /trunk/src/recompiler_new/tests/test-i386-code16.S	(revision 13168)
+++ /trunk/src/recompiler_new/tests/test-i386-code16.S	(revision 13168)
@@ -0,0 +1,79 @@
+        .code16
+        .globl code16_start
+        .globl code16_end
+
+CS_SEG = 0xf
+
+code16_start:
+
+        .globl code16_func1
+        
+        /* basic test */
+code16_func1 = . - code16_start
+        mov $1, %eax
+        data32 lret
+
+/* test push/pop in 16 bit mode */
+        .globl code16_func2
+code16_func2 = . - code16_start
+        xor %eax, %eax
+        mov $0x12345678, %ebx
+        movl %esp, %ecx
+        push %bx
+        subl %esp, %ecx
+        pop %ax
+        data32 lret
+
+/* test various jmp opcodes */        
+        .globl code16_func3
+code16_func3 = . - code16_start
+        jmp 1f
+        nop
+1:
+        mov $4, %eax
+        mov $0x12345678, %ebx
+        xor %bx, %bx
+        jz 2f
+        add $2, %ax
+2:
+        
+        call myfunc
+        
+        lcall $CS_SEG, $(myfunc2 - code16_start)
+
+        ljmp $CS_SEG, $(myjmp1 - code16_start)
+myjmp1_next:
+
+        cs lcall myfunc2_addr - code16_start
+
+        cs ljmp myjmp2_addr - code16_start
+myjmp2_next:
+
+        data32 lret
+        
+myfunc2_addr:
+        .short myfunc2 - code16_start
+        .short CS_SEG
+
+myjmp2_addr:
+        .short myjmp2 - code16_start
+        .short CS_SEG
+
+myjmp1:
+        add $8, %ax
+        jmp myjmp1_next
+
+myjmp2:
+        add $16, %ax
+        jmp myjmp2_next
+
+myfunc:
+        add $1, %ax
+        ret
+
+myfunc2:
+        add $4, %ax
+        lret
+
+
+code16_end:
Index: /trunk/src/recompiler_new/tests/test-i386-muldiv.h
===================================================================
--- /trunk/src/recompiler_new/tests/test-i386-muldiv.h	(revision 13168)
+++ /trunk/src/recompiler_new/tests/test-i386-muldiv.h	(revision 13168)
@@ -0,0 +1,76 @@
+
+void glue(glue(test_, OP), b)(long op0, long op1) 
+{
+    long res, s1, s0, flags;
+    s0 = op0;
+    s1 = op1;
+    res = s0;
+    flags = 0;
+    asm ("push %4\n\t"
+         "popf\n\t"
+         stringify(OP)"b %b2\n\t" 
+         "pushf\n\t"
+         "pop %1\n\t"
+         : "=a" (res), "=g" (flags)
+         : "q" (s1), "0" (res), "1" (flags));
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
+           stringify(OP) "b", s0, s1, res, flags & CC_MASK);
+}
+
+void glue(glue(test_, OP), w)(long op0h, long op0, long op1) 
+{
+    long res, s1, flags, resh;
+    s1 = op1;
+    resh = op0h;
+    res = op0;
+    flags = 0;
+    asm ("push %5\n\t"
+         "popf\n\t"
+         stringify(OP) "w %w3\n\t" 
+         "pushf\n\t"
+         "pop %1\n\t"
+         : "=a" (res), "=g" (flags), "=d" (resh)
+         : "q" (s1), "0" (res), "1" (flags), "2" (resh));
+    printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n",
+           stringify(OP) "w", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+
+void glue(glue(test_, OP), l)(long op0h, long op0, long op1) 
+{
+    long res, s1, flags, resh;
+    s1 = op1;
+    resh = op0h;
+    res = op0;
+    flags = 0;
+    asm ("push %5\n\t"
+         "popf\n\t"
+         stringify(OP) "l %k3\n\t" 
+         "pushf\n\t"
+         "pop %1\n\t"
+         : "=a" (res), "=g" (flags), "=d" (resh)
+         : "q" (s1), "0" (res), "1" (flags), "2" (resh));
+    printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n",
+           stringify(OP) "l", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+
+#if defined(__x86_64__)
+void glue(glue(test_, OP), q)(long op0h, long op0, long op1) 
+{
+    long res, s1, flags, resh;
+    s1 = op1;
+    resh = op0h;
+    res = op0;
+    flags = 0;
+    asm ("push %5\n\t"
+         "popf\n\t"
+         stringify(OP) "q %3\n\t" 
+         "pushf\n\t"
+         "pop %1\n\t"
+         : "=a" (res), "=g" (flags), "=d" (resh)
+         : "q" (s1), "0" (res), "1" (flags), "2" (resh));
+    printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n",
+           stringify(OP) "q", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+#endif
+
+#undef OP
Index: /trunk/src/recompiler_new/tests/test-i386-shift.h
===================================================================
--- /trunk/src/recompiler_new/tests/test-i386-shift.h	(revision 13168)
+++ /trunk/src/recompiler_new/tests/test-i386-shift.h	(revision 13168)
@@ -0,0 +1,187 @@
+
+#define exec_op glue(exec_, OP)
+#define exec_opq glue(glue(exec_, OP), q)
+#define exec_opl glue(glue(exec_, OP), l)
+#define exec_opw glue(glue(exec_, OP), w)
+#define exec_opb glue(glue(exec_, OP), b)
+
+#ifndef OP_SHIFTD
+
+#ifdef OP_NOBYTE
+#define EXECSHIFT(size, rsize, res, s1, s2, flags) \
+    asm ("push %4\n\t"\
+         "popf\n\t"\
+         stringify(OP) size " %" rsize "2, %" rsize "0\n\t" \
+         "pushf\n\t"\
+         "pop %1\n\t"\
+         : "=g" (res), "=g" (flags)\
+         : "r" (s1), "0" (res), "1" (flags));
+#else
+#define EXECSHIFT(size, rsize, res, s1, s2, flags) \
+    asm ("push %4\n\t"\
+         "popf\n\t"\
+         stringify(OP) size " %%cl, %" rsize "0\n\t" \
+         "pushf\n\t"\
+         "pop %1\n\t"\
+         : "=q" (res), "=g" (flags)\
+         : "c" (s1), "0" (res), "1" (flags));
+#endif
+
+#if defined(__x86_64__)
+void exec_opq(long s2, long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECSHIFT("q", "", res, s1, s2, flags);
+    /* overflow is undefined if count != 1 */
+    if (s1 != 1)
+      flags &= ~CC_O;
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+           stringify(OP) "q", s0, s1, res, iflags, flags & CC_MASK);
+}
+#endif
+
+void exec_opl(long s2, long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECSHIFT("l", "k", res, s1, s2, flags);
+    /* overflow is undefined if count != 1 */
+    if (s1 != 1)
+      flags &= ~CC_O;
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+           stringify(OP) "l", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+void exec_opw(long s2, long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECSHIFT("w", "w", res, s1, s2, flags);
+    /* overflow is undefined if count != 1 */
+    if (s1 != 1)
+      flags &= ~CC_O;
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+           stringify(OP) "w", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+#else
+#define EXECSHIFT(size, rsize, res, s1, s2, flags) \
+    asm ("push %4\n\t"\
+         "popf\n\t"\
+         stringify(OP) size " %%cl, %" rsize "5, %" rsize "0\n\t" \
+         "pushf\n\t"\
+         "pop %1\n\t"\
+         : "=g" (res), "=g" (flags)\
+         : "c" (s1), "0" (res), "1" (flags), "r" (s2));
+
+#if defined(__x86_64__)
+void exec_opq(long s2, long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECSHIFT("q", "", res, s1, s2, flags);
+    /* overflow is undefined if count != 1 */
+    if (s1 != 1)
+      flags &= ~CC_O;
+    printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+           stringify(OP) "q", s0, s2, s1, res, iflags, flags & CC_MASK);
+}
+#endif
+
+void exec_opl(long s2, long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECSHIFT("l", "k", res, s1, s2, flags);
+    /* overflow is undefined if count != 1 */
+    if (s1 != 1)
+      flags &= ~CC_O;
+    printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+           stringify(OP) "l", s0, s2, s1, res, iflags, flags & CC_MASK);
+}
+
+void exec_opw(long s2, long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECSHIFT("w", "w", res, s1, s2, flags);
+    /* overflow is undefined if count != 1 */
+    if (s1 != 1)
+      flags &= ~CC_O;
+    printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+           stringify(OP) "w", s0, s2, s1, res, iflags, flags & CC_MASK);
+}
+
+#endif
+
+#ifndef OP_NOBYTE
+void exec_opb(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECSHIFT("b", "b", res, s1, 0, flags);
+    /* overflow is undefined if count != 1 */
+    if (s1 != 1)
+      flags &= ~CC_O;
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+           stringify(OP) "b", s0, s1, res, iflags, flags & CC_MASK);
+}
+#endif
+
+void exec_op(long s2, long s0, long s1)
+{
+    s2 = i2l(s2);
+    s0 = i2l(s0);
+#if defined(__x86_64__)
+    exec_opq(s2, s0, s1, 0);
+#endif
+    exec_opl(s2, s0, s1, 0);
+#ifdef OP_SHIFTD
+    if (s1 <= 15)
+        exec_opw(s2, s0, s1, 0);
+#else
+    exec_opw(s2, s0, s1, 0);
+#endif
+#ifndef OP_NOBYTE
+    exec_opb(s0, s1, 0);
+#endif
+#ifdef OP_CC
+#if defined(__x86_64__)
+    exec_opq(s2, s0, s1, CC_C);
+#endif
+    exec_opl(s2, s0, s1, CC_C);
+    exec_opw(s2, s0, s1, CC_C);
+    exec_opb(s0, s1, CC_C);
+#endif
+}
+
+void glue(test_, OP)(void)
+{
+    int i, n;
+#if defined(__x86_64__)
+    n = 64;
+#else
+    n = 32;
+#endif
+    for(i = 0; i < n; i++)
+        exec_op(0x21ad3d34, 0x12345678, i);
+    for(i = 0; i < n; i++)
+        exec_op(0x813f3421, 0x82345679, i);
+}
+
+void *glue(_test_, OP) __init_call = glue(test_, OP);
+
+#undef OP
+#undef OP_CC
+#undef OP_SHIFTD
+#undef OP_NOBYTE
+#undef EXECSHIFT
+
Index: /trunk/src/recompiler_new/tests/test-i386-vm86.S
===================================================================
--- /trunk/src/recompiler_new/tests/test-i386-vm86.S	(revision 13168)
+++ /trunk/src/recompiler_new/tests/test-i386-vm86.S	(revision 13168)
@@ -0,0 +1,104 @@
+        .code16
+        .globl vm86_code_start
+        .globl vm86_code_end
+
+#define GET_OFFSET(x) ((x) - vm86_code_start + 0x100)
+
+vm86_code_start:
+        movw $GET_OFFSET(hello_world), %dx
+        movb $0x09, %ah
+        int $0x21
+
+        /* prepare int 0x90 vector */
+        xorw %ax, %ax
+        movw %ax, %es
+        es movw $GET_OFFSET(int90_test), 0x90 * 4
+        es movw %cs, 0x90 * 4 + 2
+        
+        /* launch int 0x90 */
+
+        int $0x90
+
+        /* test IF support */
+        movw $GET_OFFSET(IF_msg), %dx
+        movb $0x09, %ah
+        int $0x21
+
+        pushf 
+        popw %dx
+        movb $0xff, %ah
+        int $0x21
+
+        cli
+        pushf 
+        popw %dx
+        movb $0xff, %ah
+        int $0x21
+
+        sti        
+        pushfl 
+        popl %edx
+        movb $0xff, %ah
+        int $0x21
+        
+#if 0
+        movw $GET_OFFSET(IF_msg1), %dx
+        movb $0x09, %ah
+        int $0x21
+
+        pushf
+        movw %sp, %bx
+        andw $~0x200, (%bx)
+        popf
+#else
+        cli
+#endif
+
+        pushf 
+        popw %dx
+        movb $0xff, %ah
+        int $0x21
+        
+        pushfl
+        movw %sp, %bx
+        orw $0x200, (%bx)
+        popfl
+
+        pushfl
+        popl %edx
+        movb $0xff, %ah
+        int $0x21
+
+        movb $0x00, %ah
+        int $0x21
+
+int90_test:
+        pushf 
+        pop %dx
+        movb $0xff, %ah
+        int $0x21
+
+        movw %sp, %bx
+        movw 4(%bx), %dx
+        movb $0xff, %ah
+        int $0x21
+        
+        movw $GET_OFFSET(int90_msg), %dx
+        movb $0x09, %ah
+        int $0x21
+        iret
+                    
+int90_msg:
+        .string "INT90 started\n$"
+ 
+hello_world:
+        .string "Hello VM86 world\n$"
+
+IF_msg:
+        .string "VM86 IF test\n$"
+
+IF_msg1:
+        .string "If you see a diff here, your Linux kernel is buggy, please update to 2.4.20 kernel\n$"
+
+vm86_code_end:
+        
Index: /trunk/src/recompiler_new/tests/test-i386.c
===================================================================
--- /trunk/src/recompiler_new/tests/test-i386.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/test-i386.c	(revision 13168)
@@ -0,0 +1,2638 @@
+/*
+ *  x86 CPU test
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Sun GPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the General Public License version 2 (GPLv2) at this time for any software where
+ * a choice of GPL license versions is made available with the language indicating
+ * that GPLv2 or any later version may be used, or where a choice of which version
+ * of the GPL is applied is otherwise unspecified.
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <errno.h>
+#include <sys/ucontext.h>
+#include <sys/mman.h>
+
+#if !defined(__x86_64__)
+#define TEST_VM86
+#define TEST_SEGS
+#endif
+//#define LINUX_VM86_IOPL_FIX
+//#define TEST_P4_FLAGS
+#if defined(__x86_64__)
+#define TEST_SSE
+#define TEST_CMOV  1
+#define TEST_FCOMI 1
+#else
+//#define TEST_SSE
+#define TEST_CMOV  0
+#define TEST_FCOMI 0
+#endif
+
+#if defined(__x86_64__)
+#define FMT64X "%016lx"
+#define FMTLX "%016lx"
+#define X86_64_ONLY(x) x
+#else
+#define FMT64X "%016" PRIx64
+#define FMTLX "%08lx"
+#define X86_64_ONLY(x)
+#endif
+
+#ifdef TEST_VM86
+#include <asm/vm86.h>
+#endif
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s)	tostring(s)
+#define tostring(s)	#s
+
+#define CC_C   	0x0001
+#define CC_P 	0x0004
+#define CC_A	0x0010
+#define CC_Z	0x0040
+#define CC_S    0x0080
+#define CC_O    0x0800
+
+#define __init_call	__attribute__ ((unused,__section__ ("initcall")))
+
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
+
+#if defined(__x86_64__)
+static inline long i2l(long v)
+{
+    return v | ((v ^ 0xabcd) << 32);
+}
+#else
+static inline long i2l(long v)
+{
+    return v;
+}
+#endif
+
+#define OP add
+#include "test-i386.h"
+
+#define OP sub
+#include "test-i386.h"
+
+#define OP xor
+#include "test-i386.h"
+
+#define OP and
+#include "test-i386.h"
+
+#define OP or
+#include "test-i386.h"
+
+#define OP cmp
+#include "test-i386.h"
+
+#define OP adc
+#define OP_CC
+#include "test-i386.h"
+
+#define OP sbb
+#define OP_CC
+#include "test-i386.h"
+
+#define OP inc
+#define OP_CC
+#define OP1
+#include "test-i386.h"
+
+#define OP dec
+#define OP_CC
+#define OP1
+#include "test-i386.h"
+
+#define OP neg
+#define OP_CC
+#define OP1
+#include "test-i386.h"
+
+#define OP not
+#define OP_CC
+#define OP1
+#include "test-i386.h"
+
+#undef CC_MASK
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
+
+#define OP shl
+#include "test-i386-shift.h"
+
+#define OP shr
+#include "test-i386-shift.h"
+
+#define OP sar
+#include "test-i386-shift.h"
+
+#define OP rol
+#include "test-i386-shift.h"
+
+#define OP ror
+#include "test-i386-shift.h"
+
+#define OP rcr
+#define OP_CC
+#include "test-i386-shift.h"
+
+#define OP rcl
+#define OP_CC
+#include "test-i386-shift.h"
+
+#define OP shld
+#define OP_SHIFTD
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+#define OP shrd
+#define OP_SHIFTD
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+/* XXX: should be more precise ? */
+#undef CC_MASK
+#define CC_MASK (CC_C)
+
+#define OP bt
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+#define OP bts
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+#define OP btr
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+#define OP btc
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+/* lea test (modrm support) */
+#define TEST_LEAQ(STR)\
+{\
+    asm("lea " STR ", %0"\
+        : "=r" (res)\
+        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
+    printf("lea %s = " FMTLX "\n", STR, res);\
+}
+
+#define TEST_LEA(STR)\
+{\
+    asm("lea " STR ", %0"\
+        : "=r" (res)\
+        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
+    printf("lea %s = " FMTLX "\n", STR, res);\
+}
+
+#define TEST_LEA16(STR)\
+{\
+    asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
+        : "=wq" (res)\
+        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
+    printf("lea %s = %08lx\n", STR, res);\
+}
+
+
+void test_lea(void)
+{
+    long eax, ebx, ecx, edx, esi, edi, res;
+    eax = i2l(0x0001);
+    ebx = i2l(0x0002);
+    ecx = i2l(0x0004);
+    edx = i2l(0x0008);
+    esi = i2l(0x0010);
+    edi = i2l(0x0020);
+
+    TEST_LEA("0x4000");
+
+    TEST_LEA("(%%eax)");
+    TEST_LEA("(%%ebx)");
+    TEST_LEA("(%%ecx)");
+    TEST_LEA("(%%edx)");
+    TEST_LEA("(%%esi)");
+    TEST_LEA("(%%edi)");
+
+    TEST_LEA("0x40(%%eax)");
+    TEST_LEA("0x40(%%ebx)");
+    TEST_LEA("0x40(%%ecx)");
+    TEST_LEA("0x40(%%edx)");
+    TEST_LEA("0x40(%%esi)");
+    TEST_LEA("0x40(%%edi)");
+
+    TEST_LEA("0x4000(%%eax)");
+    TEST_LEA("0x4000(%%ebx)");
+    TEST_LEA("0x4000(%%ecx)");
+    TEST_LEA("0x4000(%%edx)");
+    TEST_LEA("0x4000(%%esi)");
+    TEST_LEA("0x4000(%%edi)");
+
+    TEST_LEA("(%%eax, %%ecx)");
+    TEST_LEA("(%%ebx, %%edx)");
+    TEST_LEA("(%%ecx, %%ecx)");
+    TEST_LEA("(%%edx, %%ecx)");
+    TEST_LEA("(%%esi, %%ecx)");
+    TEST_LEA("(%%edi, %%ecx)");
+
+    TEST_LEA("0x40(%%eax, %%ecx)");
+    TEST_LEA("0x4000(%%ebx, %%edx)");
+
+    TEST_LEA("(%%ecx, %%ecx, 2)");
+    TEST_LEA("(%%edx, %%ecx, 4)");
+    TEST_LEA("(%%esi, %%ecx, 8)");
+
+    TEST_LEA("(,%%eax, 2)");
+    TEST_LEA("(,%%ebx, 4)");
+    TEST_LEA("(,%%ecx, 8)");
+
+    TEST_LEA("0x40(,%%eax, 2)");
+    TEST_LEA("0x40(,%%ebx, 4)");
+    TEST_LEA("0x40(,%%ecx, 8)");
+
+
+    TEST_LEA("-10(%%ecx, %%ecx, 2)");
+    TEST_LEA("-10(%%edx, %%ecx, 4)");
+    TEST_LEA("-10(%%esi, %%ecx, 8)");
+
+    TEST_LEA("0x4000(%%ecx, %%ecx, 2)");
+    TEST_LEA("0x4000(%%edx, %%ecx, 4)");
+    TEST_LEA("0x4000(%%esi, %%ecx, 8)");
+
+#if defined(__x86_64__)
+    TEST_LEAQ("0x4000");
+    TEST_LEAQ("0x4000(%%rip)");
+
+    TEST_LEAQ("(%%rax)");
+    TEST_LEAQ("(%%rbx)");
+    TEST_LEAQ("(%%rcx)");
+    TEST_LEAQ("(%%rdx)");
+    TEST_LEAQ("(%%rsi)");
+    TEST_LEAQ("(%%rdi)");
+
+    TEST_LEAQ("0x40(%%rax)");
+    TEST_LEAQ("0x40(%%rbx)");
+    TEST_LEAQ("0x40(%%rcx)");
+    TEST_LEAQ("0x40(%%rdx)");
+    TEST_LEAQ("0x40(%%rsi)");
+    TEST_LEAQ("0x40(%%rdi)");
+
+    TEST_LEAQ("0x4000(%%rax)");
+    TEST_LEAQ("0x4000(%%rbx)");
+    TEST_LEAQ("0x4000(%%rcx)");
+    TEST_LEAQ("0x4000(%%rdx)");
+    TEST_LEAQ("0x4000(%%rsi)");
+    TEST_LEAQ("0x4000(%%rdi)");
+
+    TEST_LEAQ("(%%rax, %%rcx)");
+    TEST_LEAQ("(%%rbx, %%rdx)");
+    TEST_LEAQ("(%%rcx, %%rcx)");
+    TEST_LEAQ("(%%rdx, %%rcx)");
+    TEST_LEAQ("(%%rsi, %%rcx)");
+    TEST_LEAQ("(%%rdi, %%rcx)");
+
+    TEST_LEAQ("0x40(%%rax, %%rcx)");
+    TEST_LEAQ("0x4000(%%rbx, %%rdx)");
+
+    TEST_LEAQ("(%%rcx, %%rcx, 2)");
+    TEST_LEAQ("(%%rdx, %%rcx, 4)");
+    TEST_LEAQ("(%%rsi, %%rcx, 8)");
+
+    TEST_LEAQ("(,%%rax, 2)");
+    TEST_LEAQ("(,%%rbx, 4)");
+    TEST_LEAQ("(,%%rcx, 8)");
+
+    TEST_LEAQ("0x40(,%%rax, 2)");
+    TEST_LEAQ("0x40(,%%rbx, 4)");
+    TEST_LEAQ("0x40(,%%rcx, 8)");
+
+
+    TEST_LEAQ("-10(%%rcx, %%rcx, 2)");
+    TEST_LEAQ("-10(%%rdx, %%rcx, 4)");
+    TEST_LEAQ("-10(%%rsi, %%rcx, 8)");
+
+    TEST_LEAQ("0x4000(%%rcx, %%rcx, 2)");
+    TEST_LEAQ("0x4000(%%rdx, %%rcx, 4)");
+    TEST_LEAQ("0x4000(%%rsi, %%rcx, 8)");
+#else
+    /* limited 16 bit addressing test */
+    TEST_LEA16("0x4000");
+    TEST_LEA16("(%%bx)");
+    TEST_LEA16("(%%si)");
+    TEST_LEA16("(%%di)");
+    TEST_LEA16("0x40(%%bx)");
+    TEST_LEA16("0x40(%%si)");
+    TEST_LEA16("0x40(%%di)");
+    TEST_LEA16("0x4000(%%bx)");
+    TEST_LEA16("0x4000(%%si)");
+    TEST_LEA16("(%%bx,%%si)");
+    TEST_LEA16("(%%bx,%%di)");
+    TEST_LEA16("0x40(%%bx,%%si)");
+    TEST_LEA16("0x40(%%bx,%%di)");
+    TEST_LEA16("0x4000(%%bx,%%si)");
+    TEST_LEA16("0x4000(%%bx,%%di)");
+#endif
+}
+
+#define TEST_JCC(JCC, v1, v2)\
+{\
+    int res;\
+    asm("movl $1, %0\n\t"\
+        "cmpl %2, %1\n\t"\
+        "j" JCC " 1f\n\t"\
+        "movl $0, %0\n\t"\
+        "1:\n\t"\
+        : "=r" (res)\
+        : "r" (v1), "r" (v2));\
+    printf("%-10s %d\n", "j" JCC, res);\
+\
+    asm("movl $0, %0\n\t"\
+        "cmpl %2, %1\n\t"\
+        "set" JCC " %b0\n\t"\
+        : "=r" (res)\
+        : "r" (v1), "r" (v2));\
+    printf("%-10s %d\n", "set" JCC, res);\
+ if (TEST_CMOV) {\
+    long val = i2l(1);\
+    long res = i2l(0x12345678);\
+X86_64_ONLY(\
+    asm("cmpl %2, %1\n\t"\
+        "cmov" JCC "q %3, %0\n\t"\
+        : "=r" (res)\
+        : "r" (v1), "r" (v2), "m" (val), "0" (res));\
+        printf("%-10s R=" FMTLX "\n", "cmov" JCC "q", res);)\
+    asm("cmpl %2, %1\n\t"\
+        "cmov" JCC "l %k3, %k0\n\t"\
+        : "=r" (res)\
+        : "r" (v1), "r" (v2), "m" (val), "0" (res));\
+        printf("%-10s R=" FMTLX "\n", "cmov" JCC "l", res);\
+    asm("cmpl %2, %1\n\t"\
+        "cmov" JCC "w %w3, %w0\n\t"\
+        : "=r" (res)\
+        : "r" (v1), "r" (v2), "r" (1), "0" (res));\
+        printf("%-10s R=" FMTLX "\n", "cmov" JCC "w", res);\
+ } \
+}
+
+/* various jump tests */
+void test_jcc(void)
+{
+    TEST_JCC("ne", 1, 1);
+    TEST_JCC("ne", 1, 0);
+
+    TEST_JCC("e", 1, 1);
+    TEST_JCC("e", 1, 0);
+
+    TEST_JCC("l", 1, 1);
+    TEST_JCC("l", 1, 0);
+    TEST_JCC("l", 1, -1);
+
+    TEST_JCC("le", 1, 1);
+    TEST_JCC("le", 1, 0);
+    TEST_JCC("le", 1, -1);
+
+    TEST_JCC("ge", 1, 1);
+    TEST_JCC("ge", 1, 0);
+    TEST_JCC("ge", -1, 1);
+
+    TEST_JCC("g", 1, 1);
+    TEST_JCC("g", 1, 0);
+    TEST_JCC("g", 1, -1);
+
+    TEST_JCC("b", 1, 1);
+    TEST_JCC("b", 1, 0);
+    TEST_JCC("b", 1, -1);
+
+    TEST_JCC("be", 1, 1);
+    TEST_JCC("be", 1, 0);
+    TEST_JCC("be", 1, -1);
+
+    TEST_JCC("ae", 1, 1);
+    TEST_JCC("ae", 1, 0);
+    TEST_JCC("ae", 1, -1);
+
+    TEST_JCC("a", 1, 1);
+    TEST_JCC("a", 1, 0);
+    TEST_JCC("a", 1, -1);
+
+
+    TEST_JCC("p", 1, 1);
+    TEST_JCC("p", 1, 0);
+
+    TEST_JCC("np", 1, 1);
+    TEST_JCC("np", 1, 0);
+
+    TEST_JCC("o", 0x7fffffff, 0);
+    TEST_JCC("o", 0x7fffffff, -1);
+
+    TEST_JCC("no", 0x7fffffff, 0);
+    TEST_JCC("no", 0x7fffffff, -1);
+
+    TEST_JCC("s", 0, 1);
+    TEST_JCC("s", 0, -1);
+    TEST_JCC("s", 0, 0);
+
+    TEST_JCC("ns", 0, 1);
+    TEST_JCC("ns", 0, -1);
+    TEST_JCC("ns", 0, 0);
+}
+
+#undef CC_MASK
+#ifdef TEST_P4_FLAGS
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
+#else
+#define CC_MASK (CC_O | CC_C)
+#endif
+
+#define OP mul
+#include "test-i386-muldiv.h"
+
+#define OP imul
+#include "test-i386-muldiv.h"
+
+void test_imulw2(long op0, long op1) 
+{
+    long res, s1, s0, flags;
+    s0 = op0;
+    s1 = op1;
+    res = s0;
+    flags = 0;
+    asm volatile ("push %4\n\t"
+         "popf\n\t"
+         "imulw %w2, %w0\n\t" 
+         "pushf\n\t"
+         "pop %1\n\t"
+         : "=q" (res), "=g" (flags)
+         : "q" (s1), "0" (res), "1" (flags));
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
+           "imulw", s0, s1, res, flags & CC_MASK);
+}
+
+void test_imull2(long op0, long op1) 
+{
+    long res, s1, s0, flags;
+    s0 = op0;
+    s1 = op1;
+    res = s0;
+    flags = 0;
+    asm volatile ("push %4\n\t"
+         "popf\n\t"
+         "imull %k2, %k0\n\t" 
+         "pushf\n\t"
+         "pop %1\n\t"
+         : "=q" (res), "=g" (flags)
+         : "q" (s1), "0" (res), "1" (flags));
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
+           "imull", s0, s1, res, flags & CC_MASK);
+}
+
+#if defined(__x86_64__)
+void test_imulq2(long op0, long op1) 
+{
+    long res, s1, s0, flags;
+    s0 = op0;
+    s1 = op1;
+    res = s0;
+    flags = 0;
+    asm volatile ("push %4\n\t"
+         "popf\n\t"
+         "imulq %2, %0\n\t" 
+         "pushf\n\t"
+         "pop %1\n\t"
+         : "=q" (res), "=g" (flags)
+         : "q" (s1), "0" (res), "1" (flags));
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
+           "imulq", s0, s1, res, flags & CC_MASK);
+}
+#endif
+
+#define TEST_IMUL_IM(size, rsize, op0, op1)\
+{\
+    long res, flags, s1;\
+    flags = 0;\
+    res = 0;\
+    s1 = op1;\
+    asm volatile ("push %3\n\t"\
+         "popf\n\t"\
+         "imul" size " $" #op0 ", %" rsize "2, %" rsize "0\n\t" \
+         "pushf\n\t"\
+         "pop %1\n\t"\
+         : "=r" (res), "=g" (flags)\
+         : "r" (s1), "1" (flags), "0" (res));\
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",\
+           "imul" size " im", (long)op0, (long)op1, res, flags & CC_MASK);\
+}
+
+
+#undef CC_MASK
+#define CC_MASK (0)
+
+#define OP div
+#include "test-i386-muldiv.h"
+
+#define OP idiv
+#include "test-i386-muldiv.h"
+
+void test_mul(void)
+{
+    test_imulb(0x1234561d, 4);
+    test_imulb(3, -4);
+    test_imulb(0x80, 0x80);
+    test_imulb(0x10, 0x10);
+
+    test_imulw(0, 0x1234001d, 45);
+    test_imulw(0, 23, -45);
+    test_imulw(0, 0x8000, 0x8000);
+    test_imulw(0, 0x100, 0x100);
+
+    test_imull(0, 0x1234001d, 45);
+    test_imull(0, 23, -45);
+    test_imull(0, 0x80000000, 0x80000000);
+    test_imull(0, 0x10000, 0x10000);
+
+    test_mulb(0x1234561d, 4);
+    test_mulb(3, -4);
+    test_mulb(0x80, 0x80);
+    test_mulb(0x10, 0x10);
+
+    test_mulw(0, 0x1234001d, 45);
+    test_mulw(0, 23, -45);
+    test_mulw(0, 0x8000, 0x8000);
+    test_mulw(0, 0x100, 0x100);
+
+    test_mull(0, 0x1234001d, 45);
+    test_mull(0, 23, -45);
+    test_mull(0, 0x80000000, 0x80000000);
+    test_mull(0, 0x10000, 0x10000);
+
+    test_imulw2(0x1234001d, 45);
+    test_imulw2(23, -45);
+    test_imulw2(0x8000, 0x8000);
+    test_imulw2(0x100, 0x100);
+
+    test_imull2(0x1234001d, 45);
+    test_imull2(23, -45);
+    test_imull2(0x80000000, 0x80000000);
+    test_imull2(0x10000, 0x10000);
+
+    TEST_IMUL_IM("w", "w", 45, 0x1234);
+    TEST_IMUL_IM("w", "w", -45, 23);
+    TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
+    TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
+
+    TEST_IMUL_IM("l", "k", 45, 0x1234);
+    TEST_IMUL_IM("l", "k", -45, 23);
+    TEST_IMUL_IM("l", "k", 0x8000, 0x80000000);
+    TEST_IMUL_IM("l", "k", 0x7fff, 0x1000);
+
+    test_idivb(0x12341678, 0x127e);
+    test_idivb(0x43210123, -5);
+    test_idivb(0x12340004, -1);
+
+    test_idivw(0, 0x12345678, 12347);
+    test_idivw(0, -23223, -45);
+    test_idivw(0, 0x12348000, -1);
+    test_idivw(0x12343, 0x12345678, 0x81238567);
+
+    test_idivl(0, 0x12345678, 12347);
+    test_idivl(0, -233223, -45);
+    test_idivl(0, 0x80000000, -1);
+    test_idivl(0x12343, 0x12345678, 0x81234567);
+
+    test_divb(0x12341678, 0x127e);
+    test_divb(0x43210123, -5);
+    test_divb(0x12340004, -1);
+
+    test_divw(0, 0x12345678, 12347);
+    test_divw(0, -23223, -45);
+    test_divw(0, 0x12348000, -1);
+    test_divw(0x12343, 0x12345678, 0x81238567);
+
+    test_divl(0, 0x12345678, 12347);
+    test_divl(0, -233223, -45);
+    test_divl(0, 0x80000000, -1);
+    test_divl(0x12343, 0x12345678, 0x81234567);
+
+#if defined(__x86_64__)
+    test_imulq(0, 0x1234001d1234001d, 45);
+    test_imulq(0, 23, -45);
+    test_imulq(0, 0x8000000000000000, 0x8000000000000000);
+    test_imulq(0, 0x100000000, 0x100000000);
+
+    test_mulq(0, 0x1234001d1234001d, 45);
+    test_mulq(0, 23, -45);
+    test_mulq(0, 0x8000000000000000, 0x8000000000000000);
+    test_mulq(0, 0x100000000, 0x100000000);
+
+    test_imulq2(0x1234001d1234001d, 45);
+    test_imulq2(23, -45);
+    test_imulq2(0x8000000000000000, 0x8000000000000000);
+    test_imulq2(0x100000000, 0x100000000);
+
+    TEST_IMUL_IM("q", "", 45, 0x12341234);
+    TEST_IMUL_IM("q", "", -45, 23);
+    TEST_IMUL_IM("q", "", 0x8000, 0x8000000000000000);
+    TEST_IMUL_IM("q", "", 0x7fff, 0x10000000);
+
+    test_idivq(0, 0x12345678abcdef, 12347);
+    test_idivq(0, -233223, -45);
+    test_idivq(0, 0x8000000000000000, -1);
+    test_idivq(0x12343, 0x12345678, 0x81234567);
+
+    test_divq(0, 0x12345678abcdef, 12347);
+    test_divq(0, -233223, -45);
+    test_divq(0, 0x8000000000000000, -1);
+    test_divq(0x12343, 0x12345678, 0x81234567);
+#endif
+}
+
+#define TEST_BSX(op, size, op0)\
+{\
+    long res, val, resz;\
+    val = op0;\
+    asm("xor %1, %1\n"\
+        "mov $0x12345678, %0\n"\
+        #op " %" size "2, %" size "0 ; setz %b1" \
+        : "=r" (res), "=q" (resz)\
+        : "g" (val));\
+    printf("%-10s A=" FMTLX " R=" FMTLX " %ld\n", #op, val, res, resz);\
+}
+
+void test_bsx(void)
+{
+    TEST_BSX(bsrw, "w", 0);
+    TEST_BSX(bsrw, "w", 0x12340128);
+    TEST_BSX(bsfw, "w", 0);
+    TEST_BSX(bsfw, "w", 0x12340128);
+    TEST_BSX(bsrl, "k", 0);
+    TEST_BSX(bsrl, "k", 0x00340128);
+    TEST_BSX(bsfl, "k", 0);
+    TEST_BSX(bsfl, "k", 0x00340128);
+#if defined(__x86_64__)
+    TEST_BSX(bsrq, "", 0);
+    TEST_BSX(bsrq, "", 0x003401281234);
+    TEST_BSX(bsfq, "", 0);
+    TEST_BSX(bsfq, "", 0x003401281234);
+#endif
+}
+
+/**********************************************/
+
+union float64u {
+    double d;
+    uint64_t l;
+};
+
+union float64u q_nan = { .l = 0xFFF8000000000000 };
+union float64u s_nan = { .l = 0xFFF0000000000000 };
+
+void test_fops(double a, double b)
+{
+    printf("a=%f b=%f a+b=%f\n", a, b, a + b);
+    printf("a=%f b=%f a-b=%f\n", a, b, a - b);
+    printf("a=%f b=%f a*b=%f\n", a, b, a * b);
+    printf("a=%f b=%f a/b=%f\n", a, b, a / b);
+    printf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
+    printf("a=%f sqrt(a)=%f\n", a, sqrt(a));
+    printf("a=%f sin(a)=%f\n", a, sin(a));
+    printf("a=%f cos(a)=%f\n", a, cos(a));
+    printf("a=%f tan(a)=%f\n", a, tan(a));
+    printf("a=%f log(a)=%f\n", a, log(a));
+    printf("a=%f exp(a)=%f\n", a, exp(a));
+    printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
+    /* just to test some op combining */
+    printf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
+    printf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
+    printf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
+
+}
+
+void fpu_clear_exceptions(void)
+{
+    struct __attribute__((packed)) {
+        uint16_t fpuc;
+        uint16_t dummy1;
+        uint16_t fpus;
+        uint16_t dummy2;
+        uint16_t fptag;
+        uint16_t dummy3;
+        uint32_t ignored[4];
+        long double fpregs[8];
+    } float_env32;
+    
+    asm volatile ("fnstenv %0\n" : : "m" (float_env32));
+    float_env32.fpus &= ~0x7f;
+    asm volatile ("fldenv %0\n" : : "m" (float_env32));
+}
+
+/* XXX: display exception bits when supported */
+#define FPUS_EMASK 0x0000
+//#define FPUS_EMASK 0x007f
+
+void test_fcmp(double a, double b)
+{
+    long eflags, fpus;
+
+    fpu_clear_exceptions();
+    asm("fcom %2\n"
+        "fstsw %%ax\n"
+        : "=a" (fpus)
+        : "t" (a), "u" (b));
+    printf("fcom(%f %f)=%04lx \n", 
+           a, b, fpus & (0x4500 | FPUS_EMASK));
+    fpu_clear_exceptions();
+    asm("fucom %2\n"
+        "fstsw %%ax\n"
+        : "=a" (fpus)
+        : "t" (a), "u" (b));
+    printf("fucom(%f %f)=%04lx\n", 
+           a, b, fpus & (0x4500 | FPUS_EMASK));
+    if (TEST_FCOMI) {
+        /* test f(u)comi instruction */
+        fpu_clear_exceptions();
+        asm("fcomi %3, %2\n"
+            "fstsw %%ax\n"
+            "pushf\n"
+            "pop %0\n"
+            : "=r" (eflags), "=a" (fpus)
+            : "t" (a), "u" (b));
+        printf("fcomi(%f %f)=%04lx %02lx\n", 
+               a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C));
+        fpu_clear_exceptions();
+        asm("fucomi %3, %2\n"
+            "fstsw %%ax\n"
+            "pushf\n"
+            "pop %0\n"
+            : "=r" (eflags), "=a" (fpus)
+            : "t" (a), "u" (b));
+        printf("fucomi(%f %f)=%04lx %02lx\n", 
+               a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C));
+    }
+    fpu_clear_exceptions();
+    asm volatile("fxam\n"
+                 "fstsw %%ax\n"
+                 : "=a" (fpus)
+                 : "t" (a));
+    printf("fxam(%f)=%04lx\n", a, fpus & 0x4700);
+    fpu_clear_exceptions();
+}
+
+void test_fcvt(double a)
+{
+    float fa;
+    long double la;
+    int16_t fpuc;
+    int i;
+    int64_t lla;
+    int ia;
+    int16_t wa;
+    double ra;
+
+    fa = a;
+    la = a;
+    printf("(float)%f = %f\n", a, fa);
+    printf("(long double)%f = %Lf\n", a, la);
+    printf("a=" FMT64X "\n", *(uint64_t *)&a);
+    printf("la=" FMT64X " %04x\n", *(uint64_t *)&la, 
+           *(unsigned short *)((char *)(&la) + 8));
+
+    /* test all roundings */
+    asm volatile ("fstcw %0" : "=m" (fpuc));
+    for(i=0;i<4;i++) {
+        asm volatile ("fldcw %0" : : "m" ((fpuc & ~0x0c00) | (i << 10)));
+        asm volatile ("fist %0" : "=m" (wa) : "t" (a));
+        asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
+        asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
+        asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
+        asm volatile ("fldcw %0" : : "m" (fpuc));
+        printf("(short)a = %d\n", wa);
+        printf("(int)a = %d\n", ia);
+        printf("(int64_t)a = " FMT64X "\n", lla);
+        printf("rint(a) = %f\n", ra);
+    }
+}
+
+#define TEST(N) \
+    asm("fld" #N : "=t" (a)); \
+    printf("fld" #N "= %f\n", a);
+
+void test_fconst(void)
+{
+    double a;
+    TEST(1);
+    TEST(l2t);
+    TEST(l2e);
+    TEST(pi);
+    TEST(lg2);
+    TEST(ln2);
+    TEST(z);
+}
+
+void test_fbcd(double a)
+{
+    unsigned short bcd[5];
+    double b;
+
+    asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
+    asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
+    printf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n", 
+           a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
+}
+
+#define TEST_ENV(env, save, restore)\
+{\
+    memset((env), 0xaa, sizeof(*(env)));\
+    for(i=0;i<5;i++)\
+        asm volatile ("fldl %0" : : "m" (dtab[i]));\
+    asm volatile (save " %0\n" : : "m" (*(env)));\
+    asm volatile (restore " %0\n": : "m" (*(env)));\
+    for(i=0;i<5;i++)\
+        asm volatile ("fstpl %0" : "=m" (rtab[i]));\
+    for(i=0;i<5;i++)\
+        printf("res[%d]=%f\n", i, rtab[i]);\
+    printf("fpuc=%04x fpus=%04x fptag=%04x\n",\
+           (env)->fpuc,\
+           (env)->fpus & 0xff00,\
+           (env)->fptag);\
+}
+
+void test_fenv(void)
+{
+    struct __attribute__((packed)) {
+        uint16_t fpuc;
+        uint16_t dummy1;
+        uint16_t fpus;
+        uint16_t dummy2;
+        uint16_t fptag;
+        uint16_t dummy3;
+        uint32_t ignored[4];
+        long double fpregs[8];
+    } float_env32;
+    struct __attribute__((packed)) {
+        uint16_t fpuc;
+        uint16_t fpus;
+        uint16_t fptag;
+        uint16_t ignored[4];
+        long double fpregs[8];
+    } float_env16;
+    double dtab[8];
+    double rtab[8];
+    int i;
+
+    for(i=0;i<8;i++)
+        dtab[i] = i + 1;
+
+    TEST_ENV(&float_env16, "data16 fnstenv", "data16 fldenv");
+    TEST_ENV(&float_env16, "data16 fnsave", "data16 frstor");
+    TEST_ENV(&float_env32, "fnstenv", "fldenv");
+    TEST_ENV(&float_env32, "fnsave", "frstor");
+
+    /* test for ffree */
+    for(i=0;i<5;i++)
+        asm volatile ("fldl %0" : : "m" (dtab[i]));
+    asm volatile("ffree %st(2)");
+    asm volatile ("fnstenv %0\n" : : "m" (float_env32));
+    asm volatile ("fninit");
+    printf("fptag=%04x\n", float_env32.fptag);
+}
+
+
+#define TEST_FCMOV(a, b, eflags, CC)\
+{\
+    double res;\
+    asm("push %3\n"\
+        "popf\n"\
+        "fcmov" CC " %2, %0\n"\
+        : "=t" (res)\
+        : "0" (a), "u" (b), "g" (eflags));\
+    printf("fcmov%s eflags=0x%04lx-> %f\n", \
+           CC, (long)eflags, res);\
+}
+
+void test_fcmov(void)
+{
+    double a, b;
+    long eflags, i;
+
+    a = 1.0;
+    b = 2.0;
+    for(i = 0; i < 4; i++) {
+        eflags = 0;
+        if (i & 1)
+            eflags |= CC_C;
+        if (i & 2)
+            eflags |= CC_Z;
+        TEST_FCMOV(a, b, eflags, "b");
+        TEST_FCMOV(a, b, eflags, "e");
+        TEST_FCMOV(a, b, eflags, "be");
+        TEST_FCMOV(a, b, eflags, "nb");
+        TEST_FCMOV(a, b, eflags, "ne");
+        TEST_FCMOV(a, b, eflags, "nbe");
+    }
+    TEST_FCMOV(a, b, 0, "u");
+    TEST_FCMOV(a, b, CC_P, "u");
+    TEST_FCMOV(a, b, 0, "nu");
+    TEST_FCMOV(a, b, CC_P, "nu");
+}
+
+void test_floats(void)
+{
+    test_fops(2, 3);
+    test_fops(1.4, -5);
+    test_fcmp(2, -1);
+    test_fcmp(2, 2);
+    test_fcmp(2, 3);
+    test_fcmp(2, q_nan.d);
+    test_fcmp(q_nan.d, -1);
+    test_fcmp(-1.0/0.0, -1);
+    test_fcmp(1.0/0.0, -1);
+    test_fcvt(0.5);
+    test_fcvt(-0.5);
+    test_fcvt(1.0/7.0);
+    test_fcvt(-1.0/9.0);
+    test_fcvt(32768);
+    test_fcvt(-1e20);
+    test_fcvt(-1.0/0.0);
+    test_fcvt(1.0/0.0);
+    test_fcvt(q_nan.d);
+    test_fconst();
+    test_fbcd(1234567890123456);
+    test_fbcd(-123451234567890);
+    test_fenv();
+    if (TEST_CMOV) {
+        test_fcmov();
+    }
+}
+
+/**********************************************/
+#if !defined(__x86_64__)
+
+#define TEST_BCD(op, op0, cc_in, cc_mask)\
+{\
+    int res, flags;\
+    res = op0;\
+    flags = cc_in;\
+    asm ("push %3\n\t"\
+         "popf\n\t"\
+         #op "\n\t"\
+         "pushf\n\t"\
+         "pop %1\n\t"\
+        : "=a" (res), "=g" (flags)\
+        : "0" (res), "1" (flags));\
+    printf("%-10s A=%08x R=%08x CCIN=%04x CC=%04x\n",\
+           #op, op0, res, cc_in, flags & cc_mask);\
+}
+
+void test_bcd(void)
+{
+    TEST_BCD(daa, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(daa, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+
+    TEST_BCD(das, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+    TEST_BCD(das, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+
+    TEST_BCD(aaa, 0x12340205, CC_A, (CC_C | CC_A));
+    TEST_BCD(aaa, 0x12340306, CC_A, (CC_C | CC_A));
+    TEST_BCD(aaa, 0x1234040a, CC_A, (CC_C | CC_A));
+    TEST_BCD(aaa, 0x123405fa, CC_A, (CC_C | CC_A));
+    TEST_BCD(aaa, 0x12340205, 0, (CC_C | CC_A));
+    TEST_BCD(aaa, 0x12340306, 0, (CC_C | CC_A));
+    TEST_BCD(aaa, 0x1234040a, 0, (CC_C | CC_A));
+    TEST_BCD(aaa, 0x123405fa, 0, (CC_C | CC_A));
+    
+    TEST_BCD(aas, 0x12340205, CC_A, (CC_C | CC_A));
+    TEST_BCD(aas, 0x12340306, CC_A, (CC_C | CC_A));
+    TEST_BCD(aas, 0x1234040a, CC_A, (CC_C | CC_A));
+    TEST_BCD(aas, 0x123405fa, CC_A, (CC_C | CC_A));
+    TEST_BCD(aas, 0x12340205, 0, (CC_C | CC_A));
+    TEST_BCD(aas, 0x12340306, 0, (CC_C | CC_A));
+    TEST_BCD(aas, 0x1234040a, 0, (CC_C | CC_A));
+    TEST_BCD(aas, 0x123405fa, 0, (CC_C | CC_A));
+
+    TEST_BCD(aam, 0x12340547, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));
+    TEST_BCD(aad, 0x12340407, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));
+}
+#endif
+
+#define TEST_XCHG(op, size, opconst)\
+{\
+    long op0, op1;\
+    op0 = i2l(0x12345678);\
+    op1 = i2l(0xfbca7654);\
+    asm(#op " %" size "0, %" size "1" \
+        : "=q" (op0), opconst (op1) \
+        : "0" (op0), "1" (op1));\
+    printf("%-10s A=" FMTLX " B=" FMTLX "\n",\
+           #op, op0, op1);\
+}
+
+#define TEST_CMPXCHG(op, size, opconst, eax)\
+{\
+    long op0, op1, op2;\
+    op0 = i2l(0x12345678);\
+    op1 = i2l(0xfbca7654);\
+    op2 = i2l(eax);\
+    asm(#op " %" size "0, %" size "1" \
+        : "=q" (op0), opconst (op1) \
+        : "0" (op0), "1" (op1), "a" (op2));\
+    printf("%-10s EAX=" FMTLX " A=" FMTLX " C=" FMTLX "\n",\
+           #op, op2, op0, op1);\
+}
+
+void test_xchg(void)
+{
+#if defined(__x86_64__)
+    TEST_XCHG(xchgq, "", "=q");
+#endif
+    TEST_XCHG(xchgl, "k", "=q");
+    TEST_XCHG(xchgw, "w", "=q");
+    TEST_XCHG(xchgb, "b", "=q");
+
+#if defined(__x86_64__)
+    TEST_XCHG(xchgq, "", "=m");
+#endif
+    TEST_XCHG(xchgl, "k", "=m");
+    TEST_XCHG(xchgw, "w", "=m");
+    TEST_XCHG(xchgb, "b", "=m");
+
+#if defined(__x86_64__)
+    TEST_XCHG(xaddq, "", "=q");
+#endif
+    TEST_XCHG(xaddl, "k", "=q");
+    TEST_XCHG(xaddw, "w", "=q");
+    TEST_XCHG(xaddb, "b", "=q");
+
+    {
+        int res;
+        res = 0x12345678;
+        asm("xaddl %1, %0" : "=r" (res) : "0" (res));
+        printf("xaddl same res=%08x\n", res);
+    }
+
+#if defined(__x86_64__)
+    TEST_XCHG(xaddq, "", "=m");
+#endif
+    TEST_XCHG(xaddl, "k", "=m");
+    TEST_XCHG(xaddw, "w", "=m");
+    TEST_XCHG(xaddb, "b", "=m");
+
+#if defined(__x86_64__)
+    TEST_CMPXCHG(cmpxchgq, "", "=q", 0xfbca7654);
+#endif
+    TEST_CMPXCHG(cmpxchgl, "k", "=q", 0xfbca7654);
+    TEST_CMPXCHG(cmpxchgw, "w", "=q", 0xfbca7654);
+    TEST_CMPXCHG(cmpxchgb, "b", "=q", 0xfbca7654);
+
+#if defined(__x86_64__)
+    TEST_CMPXCHG(cmpxchgq, "", "=q", 0xfffefdfc);
+#endif
+    TEST_CMPXCHG(cmpxchgl, "k", "=q", 0xfffefdfc);
+    TEST_CMPXCHG(cmpxchgw, "w", "=q", 0xfffefdfc);
+    TEST_CMPXCHG(cmpxchgb, "b", "=q", 0xfffefdfc);
+
+#if defined(__x86_64__)
+    TEST_CMPXCHG(cmpxchgq, "", "=m", 0xfbca7654);
+#endif
+    TEST_CMPXCHG(cmpxchgl, "k", "=m", 0xfbca7654);
+    TEST_CMPXCHG(cmpxchgw, "w", "=m", 0xfbca7654);
+    TEST_CMPXCHG(cmpxchgb, "b", "=m", 0xfbca7654);
+
+#if defined(__x86_64__)
+    TEST_CMPXCHG(cmpxchgq, "", "=m", 0xfffefdfc);
+#endif
+    TEST_CMPXCHG(cmpxchgl, "k", "=m", 0xfffefdfc);
+    TEST_CMPXCHG(cmpxchgw, "w", "=m", 0xfffefdfc);
+    TEST_CMPXCHG(cmpxchgb, "b", "=m", 0xfffefdfc);
+
+    {
+        uint64_t op0, op1, op2;
+        long i, eflags;
+
+        for(i = 0; i < 2; i++) {
+            op0 = 0x123456789abcd;
+            if (i == 0)
+                op1 = 0xfbca765423456;
+            else
+                op1 = op0;
+            op2 = 0x6532432432434;
+            asm("cmpxchg8b %1\n" 
+                "pushf\n"
+                "pop %2\n"
+                : "=A" (op0), "=m" (op1), "=g" (eflags)
+                : "0" (op0), "m" (op1), "b" ((int)op2), "c" ((int)(op2 >> 32)));
+            printf("cmpxchg8b: op0=" FMT64X " op1=" FMT64X " CC=%02lx\n", 
+                    op0, op1, eflags & CC_Z);
+        }
+    }
+}
+
+#ifdef TEST_SEGS
+/**********************************************/
+/* segmentation tests */
+
+#include <asm/ldt.h>
+#include <linux/unistd.h>
+#include <linux/version.h>
+
+_syscall3(int, modify_ldt, int, func, void *, ptr, unsigned long, bytecount)
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66)
+#define modify_ldt_ldt_s user_desc
+#endif
+
+#define MK_SEL(n) (((n) << 3) | 7)
+
+uint8_t seg_data1[4096];
+uint8_t seg_data2[4096];
+
+#define TEST_LR(op, size, seg, mask)\
+{\
+    int res, res2;\
+    res = 0x12345678;\
+    asm (op " %" size "2, %" size "0\n" \
+         "movl $0, %1\n"\
+         "jnz 1f\n"\
+         "movl $1, %1\n"\
+         "1:\n"\
+         : "=r" (res), "=r" (res2) : "m" (seg), "0" (res));\
+    printf(op ": Z=%d %08x\n", res2, res & ~(mask));\
+}
+
+/* NOTE: we use Linux modify_ldt syscall */
+void test_segs(void)
+{
+    struct modify_ldt_ldt_s ldt;
+    long long ldt_table[3];
+    int res, res2;
+    char tmp;
+    struct {
+        uint32_t offset;
+        uint16_t seg;
+    } __attribute__((packed)) segoff;
+
+    ldt.entry_number = 1;
+    ldt.base_addr = (unsigned long)&seg_data1;
+    ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12;
+    ldt.seg_32bit = 1;
+    ldt.contents = MODIFY_LDT_CONTENTS_DATA;
+    ldt.read_exec_only = 0;
+    ldt.limit_in_pages = 1;
+    ldt.seg_not_present = 0;
+    ldt.useable = 1;
+    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+
+    ldt.entry_number = 2;
+    ldt.base_addr = (unsigned long)&seg_data2;
+    ldt.limit = (sizeof(seg_data2) + 0xfff) >> 12;
+    ldt.seg_32bit = 1;
+    ldt.contents = MODIFY_LDT_CONTENTS_DATA;
+    ldt.read_exec_only = 0;
+    ldt.limit_in_pages = 1;
+    ldt.seg_not_present = 0;
+    ldt.useable = 1;
+    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+
+    modify_ldt(0, &ldt_table, sizeof(ldt_table)); /* read ldt entries */
+#if 0
+    {
+        int i;
+        for(i=0;i<3;i++)
+            printf("%d: %016Lx\n", i, ldt_table[i]);
+    }
+#endif
+    /* do some tests with fs or gs */
+    asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
+
+    seg_data1[1] = 0xaa;
+    seg_data2[1] = 0x55;
+
+    asm volatile ("fs movzbl 0x1, %0" : "=r" (res));
+    printf("FS[1] = %02x\n", res);
+
+    asm volatile ("pushl %%gs\n"
+                  "movl %1, %%gs\n"
+                  "gs movzbl 0x1, %0\n"
+                  "popl %%gs\n"
+                  : "=r" (res)
+                  : "r" (MK_SEL(2)));
+    printf("GS[1] = %02x\n", res);
+
+    /* tests with ds/ss (implicit segment case) */
+    tmp = 0xa5;
+    asm volatile ("pushl %%ebp\n\t"
+                  "pushl %%ds\n\t"
+                  "movl %2, %%ds\n\t"
+                  "movl %3, %%ebp\n\t"
+                  "movzbl 0x1, %0\n\t"
+                  "movzbl (%%ebp), %1\n\t"
+                  "popl %%ds\n\t"
+                  "popl %%ebp\n\t"
+                  : "=r" (res), "=r" (res2)
+                  : "r" (MK_SEL(1)), "r" (&tmp));
+    printf("DS[1] = %02x\n", res);
+    printf("SS[tmp] = %02x\n", res2);
+
+    segoff.seg = MK_SEL(2);
+    segoff.offset = 0xabcdef12;
+    asm volatile("lfs %2, %0\n\t" 
+                 "movl %%fs, %1\n\t"
+                 : "=r" (res), "=g" (res2) 
+                 : "m" (segoff));
+    printf("FS:reg = %04x:%08x\n", res2, res);
+
+    TEST_LR("larw", "w", MK_SEL(2), 0x0100);
+    TEST_LR("larl", "", MK_SEL(2), 0x0100);
+    TEST_LR("lslw", "w", MK_SEL(2), 0);
+    TEST_LR("lsll", "", MK_SEL(2), 0);
+
+    TEST_LR("larw", "w", 0xfff8, 0);
+    TEST_LR("larl", "", 0xfff8, 0);
+    TEST_LR("lslw", "w", 0xfff8, 0);
+    TEST_LR("lsll", "", 0xfff8, 0);
+}
+
+/* 16 bit code test */
+extern char code16_start, code16_end;
+extern char code16_func1;
+extern char code16_func2;
+extern char code16_func3;
+
+void test_code16(void)
+{
+    struct modify_ldt_ldt_s ldt;
+    int res, res2;
+
+    /* build a code segment */
+    ldt.entry_number = 1;
+    ldt.base_addr = (unsigned long)&code16_start;
+    ldt.limit = &code16_end - &code16_start;
+    ldt.seg_32bit = 0;
+    ldt.contents = MODIFY_LDT_CONTENTS_CODE;
+    ldt.read_exec_only = 0;
+    ldt.limit_in_pages = 0;
+    ldt.seg_not_present = 0;
+    ldt.useable = 1;
+    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+
+    /* call the first function */
+    asm volatile ("lcall %1, %2" 
+                  : "=a" (res)
+                  : "i" (MK_SEL(1)), "i" (&code16_func1): "memory", "cc");
+    printf("func1() = 0x%08x\n", res);
+    asm volatile ("lcall %2, %3" 
+                  : "=a" (res), "=c" (res2)
+                  : "i" (MK_SEL(1)), "i" (&code16_func2): "memory", "cc");
+    printf("func2() = 0x%08x spdec=%d\n", res, res2);
+    asm volatile ("lcall %1, %2" 
+                  : "=a" (res)
+                  : "i" (MK_SEL(1)), "i" (&code16_func3): "memory", "cc");
+    printf("func3() = 0x%08x\n", res);
+}
+#endif
+
+#if defined(__x86_64__)
+asm(".globl func_lret\n"
+    "func_lret:\n"
+    "movl $0x87654641, %eax\n"
+    "lretq\n");
+#else
+asm(".globl func_lret\n"
+    "func_lret:\n"
+    "movl $0x87654321, %eax\n"
+    "lret\n"
+
+    ".globl func_iret\n"
+    "func_iret:\n"
+    "movl $0xabcd4321, %eax\n"
+    "iret\n");
+#endif
+
+extern char func_lret;
+extern char func_iret;
+
+void test_misc(void)
+{
+    char table[256];
+    long res, i;
+
+    for(i=0;i<256;i++) table[i] = 256 - i;
+    res = 0x12345678;
+    asm ("xlat" : "=a" (res) : "b" (table), "0" (res));
+    printf("xlat: EAX=" FMTLX "\n", res);
+
+#if defined(__x86_64__)
+    {
+        static struct __attribute__((packed)) {
+            uint32_t offset;
+            uint16_t seg;
+        } desc;
+        long cs_sel;
+
+        asm volatile ("mov %%cs, %0" : "=r" (cs_sel));
+
+        asm volatile ("push %1\n"
+                      "call func_lret\n" 
+                      : "=a" (res)
+                      : "r" (cs_sel) : "memory", "cc");
+        printf("func_lret=" FMTLX "\n", res);
+
+        /* NOTE: we assume that &func_lret < 4GB */
+        desc.offset = (long)&func_lret;
+        desc.seg = cs_sel;
+        
+        asm volatile ("xor %%rax, %%rax\n"
+                      "rex64 lcall %1\n"
+                      : "=a" (res)
+                      : "m" (desc) 
+                      : "memory", "cc");
+        printf("func_lret2=" FMTLX "\n", res);
+
+        asm volatile ("push %2\n"
+                      "mov $ 1f, %%rax\n"
+                      "push %%rax\n"
+                      "ljmp %1\n"
+                      "1:\n"
+                      : "=a" (res)
+                      : "m" (desc), "b" (cs_sel)
+                      : "memory", "cc");
+        printf("func_lret3=" FMTLX "\n", res);
+    }
+#else
+    asm volatile ("push %%cs ; call %1" 
+                  : "=a" (res)
+                  : "m" (func_lret): "memory", "cc");
+    printf("func_lret=" FMTLX "\n", res);
+
+    asm volatile ("pushf ; push %%cs ; call %1" 
+                  : "=a" (res)
+                  : "m" (func_iret): "memory", "cc");
+    printf("func_iret=" FMTLX "\n", res);
+#endif
+
+#if defined(__x86_64__)
+    /* specific popl test */
+    asm volatile ("push $12345432 ; push $0x9abcdef ; pop (%%rsp) ; pop %0"
+                  : "=g" (res));
+    printf("popl esp=" FMTLX "\n", res);
+#else
+    /* specific popl test */
+    asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popl (%%esp) ; popl %0"
+                  : "=g" (res));
+    printf("popl esp=" FMTLX "\n", res);
+
+    /* specific popw test */
+    asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popw (%%esp) ; addl $2, %%esp ; popl %0"
+                  : "=g" (res));
+    printf("popw esp=" FMTLX "\n", res);
+#endif
+}
+
+uint8_t str_buffer[4096];
+
+#define TEST_STRING1(OP, size, DF, REP)\
+{\
+    long esi, edi, eax, ecx, eflags;\
+\
+    esi = (long)(str_buffer + sizeof(str_buffer) / 2);\
+    edi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
+    eax = i2l(0x12345678);\
+    ecx = 17;\
+\
+    asm volatile ("push $0\n\t"\
+                  "popf\n\t"\
+                  DF "\n\t"\
+                  REP #OP size "\n\t"\
+                  "cld\n\t"\
+                  "pushf\n\t"\
+                  "pop %4\n\t"\
+                  : "=S" (esi), "=D" (edi), "=a" (eax), "=c" (ecx), "=g" (eflags)\
+                  : "0" (esi), "1" (edi), "2" (eax), "3" (ecx));\
+    printf("%-10s ESI=" FMTLX " EDI=" FMTLX " EAX=" FMTLX " ECX=" FMTLX " EFL=%04x\n",\
+           REP #OP size, esi, edi, eax, ecx,\
+           (int)(eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)));\
+}
+
+#define TEST_STRING(OP, REP)\
+    TEST_STRING1(OP, "b", "", REP);\
+    TEST_STRING1(OP, "w", "", REP);\
+    TEST_STRING1(OP, "l", "", REP);\
+    X86_64_ONLY(TEST_STRING1(OP, "q", "", REP));\
+    TEST_STRING1(OP, "b", "std", REP);\
+    TEST_STRING1(OP, "w", "std", REP);\
+    TEST_STRING1(OP, "l", "std", REP);\
+    X86_64_ONLY(TEST_STRING1(OP, "q", "std", REP))
+
+void test_string(void)
+{
+    int i;
+    for(i = 0;i < sizeof(str_buffer); i++)
+        str_buffer[i] = i + 0x56;
+   TEST_STRING(stos, "");
+   TEST_STRING(stos, "rep ");
+   TEST_STRING(lods, ""); /* to verify stos */
+   TEST_STRING(lods, "rep "); 
+   TEST_STRING(movs, "");
+   TEST_STRING(movs, "rep ");
+   TEST_STRING(lods, ""); /* to verify stos */
+
+   /* XXX: better tests */
+   TEST_STRING(scas, "");
+   TEST_STRING(scas, "repz ");
+   TEST_STRING(scas, "repnz ");
+   TEST_STRING(cmps, "");
+   TEST_STRING(cmps, "repz ");
+   TEST_STRING(cmps, "repnz ");
+}
+
+#ifdef TEST_VM86
+/* VM86 test */
+
+static inline void set_bit(uint8_t *a, unsigned int bit)
+{
+    a[bit / 8] |= (1 << (bit % 8));
+}
+
+static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg)
+{
+    return (uint8_t *)((seg << 4) + (reg & 0xffff));
+}
+
+static inline void pushw(struct vm86_regs *r, int val)
+{
+    r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff);
+    *(uint16_t *)seg_to_linear(r->ss, r->esp) = val;
+}
+
+#undef __syscall_return
+#define __syscall_return(type, res) \
+do { \
+	return (type) (res); \
+} while (0)
+
+_syscall2(int, vm86, int, func, struct vm86plus_struct *, v86)
+
+extern char vm86_code_start;
+extern char vm86_code_end;
+
+#define VM86_CODE_CS 0x100
+#define VM86_CODE_IP 0x100
+
+void test_vm86(void)
+{
+    struct vm86plus_struct ctx;
+    struct vm86_regs *r;
+    uint8_t *vm86_mem;
+    int seg, ret;
+
+    vm86_mem = mmap((void *)0x00000000, 0x110000, 
+                    PROT_WRITE | PROT_READ | PROT_EXEC, 
+                    MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+    if (vm86_mem == MAP_FAILED) {
+        printf("ERROR: could not map vm86 memory");
+        return;
+    }
+    memset(&ctx, 0, sizeof(ctx));
+
+    /* init basic registers */
+    r = &ctx.regs;
+    r->eip = VM86_CODE_IP;
+    r->esp = 0xfffe;
+    seg = VM86_CODE_CS;
+    r->cs = seg;
+    r->ss = seg;
+    r->ds = seg;
+    r->es = seg;
+    r->fs = seg;
+    r->gs = seg;
+    r->eflags = VIF_MASK;
+
+    /* move code to proper address. We use the same layout as a .com
+       dos program. */
+    memcpy(vm86_mem + (VM86_CODE_CS << 4) + VM86_CODE_IP, 
+           &vm86_code_start, &vm86_code_end - &vm86_code_start);
+
+    /* mark int 0x21 as being emulated */
+    set_bit((uint8_t *)&ctx.int_revectored, 0x21);
+
+    for(;;) {
+        ret = vm86(VM86_ENTER, &ctx);
+        switch(VM86_TYPE(ret)) {
+        case VM86_INTx:
+            {
+                int int_num, ah, v;
+                
+                int_num = VM86_ARG(ret);
+                if (int_num != 0x21)
+                    goto unknown_int;
+                ah = (r->eax >> 8) & 0xff;
+                switch(ah) {
+                case 0x00: /* exit */
+                    goto the_end;
+                case 0x02: /* write char */
+                    {
+                        uint8_t c = r->edx;
+                        putchar(c);
+                    }
+                    break;
+                case 0x09: /* write string */
+                    {
+                        uint8_t c, *ptr;
+                        ptr = seg_to_linear(r->ds, r->edx);
+                        for(;;) {
+                            c = *ptr++;
+                            if (c == '$')
+                                break;
+                            putchar(c);
+                        }
+                        r->eax = (r->eax & ~0xff) | '$';
+                    }
+                    break;
+                case 0xff: /* extension: write eflags number in edx */
+                    v = (int)r->edx;
+#ifndef LINUX_VM86_IOPL_FIX
+                    v &= ~0x3000;
+#endif
+                    printf("%08x\n", v);
+                    break;
+                default:
+                unknown_int:
+                    printf("unsupported int 0x%02x\n", int_num);
+                    goto the_end;
+                }
+            }
+            break;
+        case VM86_SIGNAL:
+            /* a signal came, we just ignore that */
+            break;
+        case VM86_STI:
+            break;
+        default:
+            printf("ERROR: unhandled vm86 return code (0x%x)\n", ret);
+            goto the_end;
+        }
+    }
+ the_end:
+    printf("VM86 end\n");
+    munmap(vm86_mem, 0x110000);
+}
+#endif
+
+/* exception tests */
+#if defined(__i386__) && !defined(REG_EAX)
+#define REG_EAX EAX
+#define REG_EBX EBX
+#define REG_ECX ECX
+#define REG_EDX EDX
+#define REG_ESI ESI
+#define REG_EDI EDI
+#define REG_EBP EBP
+#define REG_ESP ESP
+#define REG_EIP EIP
+#define REG_EFL EFL
+#define REG_TRAPNO TRAPNO
+#define REG_ERR ERR
+#endif
+
+#if defined(__x86_64__)
+#define REG_EIP REG_RIP
+#endif
+
+jmp_buf jmp_env;
+int v1;
+int tab[2];
+
+void sig_handler(int sig, siginfo_t *info, void *puc)
+{
+    struct ucontext *uc = puc;
+
+    printf("si_signo=%d si_errno=%d si_code=%d",
+           info->si_signo, info->si_errno, info->si_code);
+    printf(" si_addr=0x%08lx",
+           (unsigned long)info->si_addr);
+    printf("\n");
+
+    printf("trapno=" FMTLX " err=" FMTLX,
+           (long)uc->uc_mcontext.gregs[REG_TRAPNO],
+           (long)uc->uc_mcontext.gregs[REG_ERR]);
+    printf(" EIP=" FMTLX, (long)uc->uc_mcontext.gregs[REG_EIP]);
+    printf("\n");
+    longjmp(jmp_env, 1);
+}
+
+void test_exceptions(void)
+{
+    struct sigaction act;
+    volatile int val;
+    
+    act.sa_sigaction = sig_handler;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO | SA_NODEFER;
+    sigaction(SIGFPE, &act, NULL);
+    sigaction(SIGILL, &act, NULL);
+    sigaction(SIGSEGV, &act, NULL);
+    sigaction(SIGBUS, &act, NULL);
+    sigaction(SIGTRAP, &act, NULL);
+
+    /* test division by zero reporting */
+    printf("DIVZ exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        /* now divide by zero */
+        v1 = 0;
+        v1 = 2 / v1;
+    }
+
+#if !defined(__x86_64__)
+    printf("BOUND exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        /* bound exception */
+        tab[0] = 1;
+        tab[1] = 10;
+        asm volatile ("bound %0, %1" : : "r" (11), "m" (tab[0]));
+    }
+#endif
+
+#ifdef TEST_SEGS
+    printf("segment exceptions:\n");
+    if (setjmp(jmp_env) == 0) {
+        /* load an invalid segment */
+        asm volatile ("movl %0, %%fs" : : "r" ((0x1234 << 3) | 1));
+    }
+    if (setjmp(jmp_env) == 0) {
+        /* null data segment is valid */
+        asm volatile ("movl %0, %%fs" : : "r" (3));
+        /* null stack segment */
+        asm volatile ("movl %0, %%ss" : : "r" (3));
+    }
+
+    {
+        struct modify_ldt_ldt_s ldt;
+        ldt.entry_number = 1;
+        ldt.base_addr = (unsigned long)&seg_data1;
+        ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12;
+        ldt.seg_32bit = 1;
+        ldt.contents = MODIFY_LDT_CONTENTS_DATA;
+        ldt.read_exec_only = 0;
+        ldt.limit_in_pages = 1;
+        ldt.seg_not_present = 1;
+        ldt.useable = 1;
+        modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+        
+        if (setjmp(jmp_env) == 0) {
+            /* segment not present */
+            asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
+        }
+    }
+#endif
+
+    /* test SEGV reporting */
+    printf("PF exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        val = 1;
+        /* we add a nop to test a weird PC retrieval case */
+        asm volatile ("nop");
+        /* now store in an invalid address */
+        *(char *)0x1234 = 1;
+    }
+
+    /* test SEGV reporting */
+    printf("PF exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        val = 1;
+        /* read from an invalid address */
+        v1 = *(char *)0x1234;
+    }
+    
+    /* test illegal instruction reporting */
+    printf("UD2 exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        /* now execute an invalid instruction */
+        asm volatile("ud2");
+    }
+    printf("lock nop exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        /* now execute an invalid instruction */
+        asm volatile("lock nop");
+    }
+    
+    printf("INT exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("int $0xfd");
+    }
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("int $0x01");
+    }
+    if (setjmp(jmp_env) == 0) {
+        asm volatile (".byte 0xcd, 0x03");
+    }
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("int $0x04");
+    }
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("int $0x05");
+    }
+
+    printf("INT3 exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("int3");
+    }
+
+    printf("CLI exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("cli");
+    }
+
+    printf("STI exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("cli");
+    }
+
+#if !defined(__x86_64__)
+    printf("INTO exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        /* overflow exception */
+        asm volatile ("addl $1, %0 ; into" : : "r" (0x7fffffff));
+    }
+#endif
+
+    printf("OUTB exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("outb %%al, %%dx" : : "d" (0x4321), "a" (0));
+    }
+
+    printf("INB exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("inb %%dx, %%al" : "=a" (val) : "d" (0x4321));
+    }
+
+    printf("REP OUTSB exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("rep outsb" : : "d" (0x4321), "S" (tab), "c" (1));
+    }
+
+    printf("REP INSB exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("rep insb" : : "d" (0x4321), "D" (tab), "c" (1));
+    }
+
+    printf("HLT exception:\n");
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("hlt");
+    }
+
+    printf("single step exception:\n");
+    val = 0;
+    if (setjmp(jmp_env) == 0) {
+        asm volatile ("pushf\n"
+                      "orl $0x00100, (%%esp)\n"
+                      "popf\n"
+                      "movl $0xabcd, %0\n" 
+                      "movl $0x0, %0\n" : "=m" (val) : : "cc", "memory");
+    }
+    printf("val=0x%x\n", val);
+}
+
+#if !defined(__x86_64__)
+/* specific precise single step test */
+void sig_trap_handler(int sig, siginfo_t *info, void *puc)
+{
+    struct ucontext *uc = puc;
+    printf("EIP=" FMTLX "\n", (long)uc->uc_mcontext.gregs[REG_EIP]);
+}
+
+const uint8_t sstep_buf1[4] = { 1, 2, 3, 4};
+uint8_t sstep_buf2[4];
+
+void test_single_step(void)
+{
+    struct sigaction act;
+    volatile int val;
+    int i;
+
+    val = 0;
+    act.sa_sigaction = sig_trap_handler;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO;
+    sigaction(SIGTRAP, &act, NULL);
+    asm volatile ("pushf\n"
+                  "orl $0x00100, (%%esp)\n"
+                  "popf\n"
+                  "movl $0xabcd, %0\n" 
+
+                  /* jmp test */
+                  "movl $3, %%ecx\n"
+                  "1:\n"
+                  "addl $1, %0\n"
+                  "decl %%ecx\n"
+                  "jnz 1b\n"
+
+                  /* movsb: the single step should stop at each movsb iteration */
+                  "movl $sstep_buf1, %%esi\n"
+                  "movl $sstep_buf2, %%edi\n"
+                  "movl $0, %%ecx\n"
+                  "rep movsb\n"
+                  "movl $3, %%ecx\n"
+                  "rep movsb\n"
+                  "movl $1, %%ecx\n"
+                  "rep movsb\n"
+
+                  /* cmpsb: the single step should stop at each cmpsb iteration */
+                  "movl $sstep_buf1, %%esi\n"
+                  "movl $sstep_buf2, %%edi\n"
+                  "movl $0, %%ecx\n"
+                  "rep cmpsb\n"
+                  "movl $4, %%ecx\n"
+                  "rep cmpsb\n"
+                  
+                  /* getpid() syscall: single step should skip one
+                     instruction */
+                  "movl $20, %%eax\n"
+                  "int $0x80\n"
+                  "movl $0, %%eax\n"
+                  
+                  /* when modifying SS, trace is not done on the next
+                     instruction */
+                  "movl %%ss, %%ecx\n"
+                  "movl %%ecx, %%ss\n"
+                  "addl $1, %0\n"
+                  "movl $1, %%eax\n"
+                  "movl %%ecx, %%ss\n"
+                  "jmp 1f\n"
+                  "addl $1, %0\n"
+                  "1:\n"
+                  "movl $1, %%eax\n"
+                  "pushl %%ecx\n"
+                  "popl %%ss\n"
+                  "addl $1, %0\n"
+                  "movl $1, %%eax\n"
+                  
+                  "pushf\n"
+                  "andl $~0x00100, (%%esp)\n"
+                  "popf\n"
+                  : "=m" (val) 
+                  : 
+                  : "cc", "memory", "eax", "ecx", "esi", "edi");
+    printf("val=%d\n", val);
+    for(i = 0; i < 4; i++)
+        printf("sstep_buf2[%d] = %d\n", i, sstep_buf2[i]);
+}
+
+/* self modifying code test */
+uint8_t code[] = {
+    0xb8, 0x1, 0x00, 0x00, 0x00, /* movl $1, %eax */
+    0xc3, /* ret */
+};
+
+asm("smc_code2:\n"
+    "movl 4(%esp), %eax\n"
+    "movl %eax, smc_patch_addr2 + 1\n"
+    "nop\n"
+    "nop\n"
+    "nop\n"
+    "nop\n"
+    "nop\n"
+    "nop\n"
+    "nop\n"
+    "nop\n"
+    "smc_patch_addr2:\n"
+    "movl $1, %eax\n"
+    "ret\n");
+
+typedef int FuncType(void);
+extern int smc_code2(int);
+void test_self_modifying_code(void)
+{
+    int i;
+
+    printf("self modifying code:\n");
+    printf("func1 = 0x%x\n", ((FuncType *)code)());
+    for(i = 2; i <= 4; i++) {
+        code[1] = i;
+        printf("func%d = 0x%x\n", i, ((FuncType *)code)());
+    }
+
+    /* more difficult test : the modified code is just after the
+       modifying instruction. It is forbidden in Intel specs, but it
+       is used by old DOS programs */
+    for(i = 2; i <= 4; i++) {
+        printf("smc_code2(%d) = %d\n", i, smc_code2(i));
+    }
+}
+#endif
+
+long enter_stack[4096];
+
+#if defined(__x86_64__)
+#define RSP "%%rsp"
+#define RBP "%%rbp"
+#else
+#define RSP "%%esp"
+#define RBP "%%ebp"
+#endif
+
+#define TEST_ENTER(size, stack_type, level)\
+{\
+    long esp_save, esp_val, ebp_val, ebp_save, i;\
+    stack_type *ptr, *stack_end, *stack_ptr;\
+    memset(enter_stack, 0, sizeof(enter_stack));\
+    stack_end = stack_ptr = (stack_type *)(enter_stack + 4096);\
+    ebp_val = (long)stack_ptr;\
+    for(i=1;i<=32;i++)\
+       *--stack_ptr = i;\
+    esp_val = (long)stack_ptr;\
+    asm("mov " RSP ", %[esp_save]\n"\
+        "mov " RBP ", %[ebp_save]\n"\
+        "mov %[esp_val], " RSP "\n"\
+        "mov %[ebp_val], " RBP "\n"\
+        "enter" size " $8, $" #level "\n"\
+        "mov " RSP ", %[esp_val]\n"\
+        "mov " RBP ", %[ebp_val]\n"\
+        "mov %[esp_save], " RSP "\n"\
+        "mov %[ebp_save], " RBP "\n"\
+        : [esp_save] "=r" (esp_save),\
+        [ebp_save] "=r" (ebp_save),\
+        [esp_val] "=r" (esp_val),\
+        [ebp_val] "=r" (ebp_val)\
+        :  "[esp_val]" (esp_val),\
+        "[ebp_val]" (ebp_val));\
+    printf("level=%d:\n", level);\
+    printf("esp_val=" FMTLX "\n", esp_val - (long)stack_end);\
+    printf("ebp_val=" FMTLX "\n", ebp_val - (long)stack_end);\
+    for(ptr = (stack_type *)esp_val; ptr < stack_end; ptr++)\
+        printf(FMTLX "\n", (long)ptr[0]);\
+}
+
+static void test_enter(void)
+{
+#if defined(__x86_64__)
+    TEST_ENTER("q", uint64_t, 0);
+    TEST_ENTER("q", uint64_t, 1);
+    TEST_ENTER("q", uint64_t, 2);
+    TEST_ENTER("q", uint64_t, 31);
+#else
+    TEST_ENTER("l", uint32_t, 0);
+    TEST_ENTER("l", uint32_t, 1);
+    TEST_ENTER("l", uint32_t, 2);
+    TEST_ENTER("l", uint32_t, 31);
+#endif
+
+    TEST_ENTER("w", uint16_t, 0);
+    TEST_ENTER("w", uint16_t, 1);
+    TEST_ENTER("w", uint16_t, 2);
+    TEST_ENTER("w", uint16_t, 31);
+}
+
+#ifdef TEST_SSE
+
+typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
+typedef int __m128 __attribute__ ((__mode__(__V4SF__)));
+
+typedef union {
+    double d[2];
+    float s[4];
+    uint32_t l[4];
+    uint64_t q[2];
+    __m128 dq;
+} XMMReg;
+
+static uint64_t __attribute__((aligned(16))) test_values[4][2] = {
+    { 0x456723c698694873, 0xdc515cff944a58ec },
+    { 0x1f297ccd58bad7ab, 0x41f21efba9e3e146 },
+    { 0x007c62c2085427f8, 0x231be9e8cde7438d },
+    { 0x0f76255a085427f8, 0xc233e9e8c4c9439a },
+};
+
+#define SSE_OP(op)\
+{\
+    asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
+    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           b.q[1], b.q[0],\
+           r.q[1], r.q[0]);\
+}
+
+#define SSE_OP2(op)\
+{\
+    int i;\
+    for(i=0;i<2;i++) {\
+    a.q[0] = test_values[2*i][0];\
+    a.q[1] = test_values[2*i][1];\
+    b.q[0] = test_values[2*i+1][0];\
+    b.q[1] = test_values[2*i+1][1];\
+    SSE_OP(op);\
+    }\
+}
+
+#define MMX_OP2(op)\
+{\
+    int i;\
+    for(i=0;i<2;i++) {\
+    a.q[0] = test_values[2*i][0];\
+    b.q[0] = test_values[2*i+1][0];\
+    asm volatile (#op " %2, %0" : "=y" (r.q[0]) : "0" (a.q[0]), "y" (b.q[0]));\
+    printf("%-9s: a=" FMT64X " b=" FMT64X " r=" FMT64X "\n",\
+           #op,\
+           a.q[0],\
+           b.q[0],\
+           r.q[0]);\
+    }\
+    SSE_OP2(op);\
+}
+
+#define SHUF_OP(op, ib)\
+{\
+    a.q[0] = test_values[0][0];\
+    a.q[1] = test_values[0][1];\
+    b.q[0] = test_values[1][0];\
+    b.q[1] = test_values[1][1];\
+    asm volatile (#op " $" #ib ", %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
+    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           b.q[1], b.q[0],\
+           ib,\
+           r.q[1], r.q[0]);\
+}
+
+#define PSHUF_OP(op, ib)\
+{\
+    int i;\
+    for(i=0;i<2;i++) {\
+    a.q[0] = test_values[2*i][0];\
+    a.q[1] = test_values[2*i][1];\
+    asm volatile (#op " $" #ib ", %1, %0" : "=x" (r.dq) : "x" (a.dq));\
+    printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           ib,\
+           r.q[1], r.q[0]);\
+    }\
+}
+
+#define SHIFT_IM(op, ib)\
+{\
+    int i;\
+    for(i=0;i<2;i++) {\
+    a.q[0] = test_values[2*i][0];\
+    a.q[1] = test_values[2*i][1];\
+    asm volatile (#op " $" #ib ", %0" : "=x" (r.dq) : "0" (a.dq));\
+    printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           ib,\
+           r.q[1], r.q[0]);\
+    }\
+}
+
+#define SHIFT_OP(op, ib)\
+{\
+    int i;\
+    SHIFT_IM(op, ib);\
+    for(i=0;i<2;i++) {\
+    a.q[0] = test_values[2*i][0];\
+    a.q[1] = test_values[2*i][1];\
+    b.q[0] = ib;\
+    b.q[1] = 0;\
+    asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
+    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           b.q[1], b.q[0],\
+           r.q[1], r.q[0]);\
+    }\
+}
+
+#define MOVMSK(op)\
+{\
+    int i, reg;\
+    for(i=0;i<2;i++) {\
+    a.q[0] = test_values[2*i][0];\
+    a.q[1] = test_values[2*i][1];\
+    asm volatile (#op " %1, %0" : "=r" (reg) : "x" (a.dq));\
+    printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           reg);\
+    }\
+}
+
+#define SSE_OPS(a) \
+SSE_OP(a ## ps);\
+SSE_OP(a ## ss);
+
+#define SSE_OPD(a) \
+SSE_OP(a ## pd);\
+SSE_OP(a ## sd);
+
+#define SSE_COMI(op, field)\
+{\
+    unsigned int eflags;\
+    XMMReg a, b;\
+    a.field[0] = a1;\
+    b.field[0] = b1;\
+    asm volatile (#op " %2, %1\n"\
+        "pushf\n"\
+        "pop %0\n"\
+        : "=m" (eflags)\
+        : "x" (a.dq), "x" (b.dq));\
+    printf("%-9s: a=%f b=%f cc=%04x\n",\
+           #op, a1, b1,\
+           eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
+}
+
+void test_sse_comi(double a1, double b1)
+{
+    SSE_COMI(ucomiss, s);
+    SSE_COMI(ucomisd, d);
+    SSE_COMI(comiss, s);
+    SSE_COMI(comisd, d);
+}
+
+#define CVT_OP_XMM(op)\
+{\
+    asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\
+    printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           r.q[1], r.q[0]);\
+}
+
+/* Force %xmm0 usage to avoid the case where both register index are 0
+   to test intruction decoding more extensively */
+#define CVT_OP_XMM2MMX(op)\
+{\
+    asm volatile (#op " %1, %0" : "=y" (r.q[0]) : "x" (a.dq) \
+                  : "%xmm0");\
+    printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           r.q[0]);\
+}
+
+#define CVT_OP_MMX2XMM(op)\
+{\
+    asm volatile (#op " %1, %0" : "=x" (r.dq) : "y" (a.q[0]));\
+    printf("%-9s: a=" FMT64X " r=" FMT64X "" FMT64X "\n",\
+           #op,\
+           a.q[0],\
+           r.q[1], r.q[0]);\
+}
+
+#define CVT_OP_REG2XMM(op)\
+{\
+    asm volatile (#op " %1, %0" : "=x" (r.dq) : "r" (a.l[0]));\
+    printf("%-9s: a=%08x r=" FMT64X "" FMT64X "\n",\
+           #op,\
+           a.l[0],\
+           r.q[1], r.q[0]);\
+}
+
+#define CVT_OP_XMM2REG(op)\
+{\
+    asm volatile (#op " %1, %0" : "=r" (r.l[0]) : "x" (a.dq));\
+    printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\
+           #op,\
+           a.q[1], a.q[0],\
+           r.l[0]);\
+}
+
+struct fpxstate {
+    uint16_t fpuc;
+    uint16_t fpus;
+    uint16_t fptag;
+    uint16_t fop;
+    uint32_t fpuip;
+    uint16_t cs_sel;
+    uint16_t dummy0;
+    uint32_t fpudp;
+    uint16_t ds_sel;
+    uint16_t dummy1;
+    uint32_t mxcsr;
+    uint32_t mxcsr_mask;
+    uint8_t fpregs1[8 * 16];
+    uint8_t xmm_regs[8 * 16];
+    uint8_t dummy2[224];
+};
+
+static struct fpxstate fpx_state __attribute__((aligned(16)));
+static struct fpxstate fpx_state2 __attribute__((aligned(16)));
+
+void test_fxsave(void)
+{
+    struct fpxstate *fp = &fpx_state;
+    struct fpxstate *fp2 = &fpx_state2;
+    int i, nb_xmm;
+    XMMReg a, b;
+    a.q[0] = test_values[0][0];
+    a.q[1] = test_values[0][1];
+    b.q[0] = test_values[1][0];
+    b.q[1] = test_values[1][1];
+
+    asm("movdqa %2, %%xmm0\n"
+        "movdqa %3, %%xmm7\n"
+#if defined(__x86_64__)
+        "movdqa %2, %%xmm15\n"
+#endif
+        " fld1\n"
+        " fldpi\n"
+        " fldln2\n"
+        " fxsave %0\n"
+        " fxrstor %0\n"
+        " fxsave %1\n"
+        " fninit\n"
+        : "=m" (*(uint32_t *)fp2), "=m" (*(uint32_t *)fp) 
+        : "m" (a), "m" (b));
+    printf("fpuc=%04x\n", fp->fpuc);
+    printf("fpus=%04x\n", fp->fpus);
+    printf("fptag=%04x\n", fp->fptag);
+    for(i = 0; i < 3; i++) {
+        printf("ST%d: " FMT64X " %04x\n",
+               i, 
+               *(uint64_t *)&fp->fpregs1[i * 16],
+               *(uint16_t *)&fp->fpregs1[i * 16 + 8]);
+    }
+    printf("mxcsr=%08x\n", fp->mxcsr & 0x1f80);
+#if defined(__x86_64__)
+    nb_xmm = 16;
+#else
+    nb_xmm = 8;
+#endif
+    for(i = 0; i < nb_xmm; i++) {
+        printf("xmm%d: " FMT64X "" FMT64X "\n",
+               i, 
+               *(uint64_t *)&fp->xmm_regs[i * 16],
+               *(uint64_t *)&fp->xmm_regs[i * 16 + 8]);
+    }
+}
+
+void test_sse(void)
+{
+    XMMReg r, a, b;
+    int i;
+
+    MMX_OP2(punpcklbw);
+    MMX_OP2(punpcklwd);
+    MMX_OP2(punpckldq);
+    MMX_OP2(packsswb);
+    MMX_OP2(pcmpgtb);
+    MMX_OP2(pcmpgtw);
+    MMX_OP2(pcmpgtd);
+    MMX_OP2(packuswb);
+    MMX_OP2(punpckhbw);
+    MMX_OP2(punpckhwd);
+    MMX_OP2(punpckhdq);
+    MMX_OP2(packssdw);
+    MMX_OP2(pcmpeqb);
+    MMX_OP2(pcmpeqw);
+    MMX_OP2(pcmpeqd);
+
+    MMX_OP2(paddq);
+    MMX_OP2(pmullw);
+    MMX_OP2(psubusb);
+    MMX_OP2(psubusw);
+    MMX_OP2(pminub);
+    MMX_OP2(pand);
+    MMX_OP2(paddusb);
+    MMX_OP2(paddusw);
+    MMX_OP2(pmaxub);
+    MMX_OP2(pandn);
+
+    MMX_OP2(pmulhuw);
+    MMX_OP2(pmulhw);
+    
+    MMX_OP2(psubsb);
+    MMX_OP2(psubsw);
+    MMX_OP2(pminsw);
+    MMX_OP2(por);
+    MMX_OP2(paddsb);
+    MMX_OP2(paddsw);
+    MMX_OP2(pmaxsw);
+    MMX_OP2(pxor);
+    MMX_OP2(pmuludq);
+    MMX_OP2(pmaddwd);
+    MMX_OP2(psadbw);
+    MMX_OP2(psubb);
+    MMX_OP2(psubw);
+    MMX_OP2(psubd);
+    MMX_OP2(psubq);
+    MMX_OP2(paddb);
+    MMX_OP2(paddw);
+    MMX_OP2(paddd);
+
+    MMX_OP2(pavgb);
+    MMX_OP2(pavgw);
+
+    asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678));
+    printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]);
+
+    asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678));
+    printf("%-9s: r=" FMT64X "" FMT64X "\n", "pinsrw", r.q[1], r.q[0]);
+
+    a.q[0] = test_values[0][0];
+    a.q[1] = test_values[0][1];
+    asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
+    printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
+
+    asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
+    printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
+
+    asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
+    printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
+    
+    asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
+    printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
+
+    {
+        r.q[0] = -1;
+        r.q[1] = -1;
+
+        a.q[0] = test_values[0][0];
+        a.q[1] = test_values[0][1];
+        b.q[0] = test_values[1][0];
+        b.q[1] = test_values[1][1];
+        asm volatile("maskmovq %1, %0" : 
+                     : "y" (a.q[0]), "y" (b.q[0]), "D" (&r)
+                     : "memory"); 
+        printf("%-9s: r=" FMT64X " a=" FMT64X " b=" FMT64X "\n", 
+               "maskmov", 
+               r.q[0], 
+               a.q[0], 
+               b.q[0]);
+        asm volatile("maskmovdqu %1, %0" : 
+                     : "x" (a.dq), "x" (b.dq), "D" (&r)
+                     : "memory"); 
+        printf("%-9s: r=" FMT64X "" FMT64X " a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X "\n", 
+               "maskmov", 
+               r.q[1], r.q[0], 
+               a.q[1], a.q[0], 
+               b.q[1], b.q[0]);
+    }
+
+    asm volatile ("emms");
+
+    SSE_OP2(punpcklqdq);
+    SSE_OP2(punpckhqdq);
+    SSE_OP2(andps);
+    SSE_OP2(andpd);
+    SSE_OP2(andnps);
+    SSE_OP2(andnpd);
+    SSE_OP2(orps);
+    SSE_OP2(orpd);
+    SSE_OP2(xorps);
+    SSE_OP2(xorpd);
+
+    SSE_OP2(unpcklps);
+    SSE_OP2(unpcklpd);
+    SSE_OP2(unpckhps);
+    SSE_OP2(unpckhpd);
+
+    SHUF_OP(shufps, 0x78);
+    SHUF_OP(shufpd, 0x02);
+
+    PSHUF_OP(pshufd, 0x78);
+    PSHUF_OP(pshuflw, 0x78);
+    PSHUF_OP(pshufhw, 0x78);
+
+    SHIFT_OP(psrlw, 7);
+    SHIFT_OP(psrlw, 16);
+    SHIFT_OP(psraw, 7);
+    SHIFT_OP(psraw, 16);
+    SHIFT_OP(psllw, 7);
+    SHIFT_OP(psllw, 16);
+
+    SHIFT_OP(psrld, 7);
+    SHIFT_OP(psrld, 32);
+    SHIFT_OP(psrad, 7);
+    SHIFT_OP(psrad, 32);
+    SHIFT_OP(pslld, 7);
+    SHIFT_OP(pslld, 32);
+
+    SHIFT_OP(psrlq, 7);
+    SHIFT_OP(psrlq, 32);
+    SHIFT_OP(psllq, 7);
+    SHIFT_OP(psllq, 32);
+
+    SHIFT_IM(psrldq, 16);
+    SHIFT_IM(psrldq, 7);
+    SHIFT_IM(pslldq, 16);
+    SHIFT_IM(pslldq, 7);
+
+    MOVMSK(movmskps);
+    MOVMSK(movmskpd);
+
+    /* FPU specific ops */
+
+    {
+        uint32_t mxcsr;
+        asm volatile("stmxcsr %0" : "=m" (mxcsr));
+        printf("mxcsr=%08x\n", mxcsr & 0x1f80);
+        asm volatile("ldmxcsr %0" : : "m" (mxcsr));
+    }
+
+    test_sse_comi(2, -1);
+    test_sse_comi(2, 2);
+    test_sse_comi(2, 3);
+    test_sse_comi(2, q_nan.d);
+    test_sse_comi(q_nan.d, -1);
+
+    for(i = 0; i < 2; i++) {
+        a.s[0] = 2.7;
+        a.s[1] = 3.4;
+        a.s[2] = 4;
+        a.s[3] = -6.3;
+        b.s[0] = 45.7;
+        b.s[1] = 353.4;
+        b.s[2] = 4;
+        b.s[3] = 56.3;
+        if (i == 1) {
+            a.s[0] = q_nan.d;
+            b.s[3] = q_nan.d;
+        }
+
+        SSE_OPS(add);
+        SSE_OPS(mul);
+        SSE_OPS(sub);
+        SSE_OPS(min);
+        SSE_OPS(div);
+        SSE_OPS(max);
+        SSE_OPS(sqrt);
+        SSE_OPS(cmpeq);
+        SSE_OPS(cmplt);
+        SSE_OPS(cmple);
+        SSE_OPS(cmpunord);
+        SSE_OPS(cmpneq);
+        SSE_OPS(cmpnlt);
+        SSE_OPS(cmpnle);
+        SSE_OPS(cmpord);
+        
+        
+        a.d[0] = 2.7;
+        a.d[1] = -3.4;
+        b.d[0] = 45.7;
+        b.d[1] = -53.4;
+        if (i == 1) {
+            a.d[0] = q_nan.d;
+            b.d[1] = q_nan.d;
+        }
+        SSE_OPD(add);
+        SSE_OPD(mul);
+        SSE_OPD(sub);
+        SSE_OPD(min);
+        SSE_OPD(div);
+        SSE_OPD(max);
+        SSE_OPD(sqrt);
+        SSE_OPD(cmpeq);
+        SSE_OPD(cmplt);
+        SSE_OPD(cmple);
+        SSE_OPD(cmpunord);
+        SSE_OPD(cmpneq);
+        SSE_OPD(cmpnlt);
+        SSE_OPD(cmpnle);
+        SSE_OPD(cmpord);
+    }
+
+    /* float to float/int */
+    a.s[0] = 2.7;
+    a.s[1] = 3.4;
+    a.s[2] = 4;
+    a.s[3] = -6.3;
+    CVT_OP_XMM(cvtps2pd);
+    CVT_OP_XMM(cvtss2sd);
+    CVT_OP_XMM2MMX(cvtps2pi);
+    CVT_OP_XMM2MMX(cvttps2pi);
+    CVT_OP_XMM2REG(cvtss2si);
+    CVT_OP_XMM2REG(cvttss2si);
+    CVT_OP_XMM(cvtps2dq);
+    CVT_OP_XMM(cvttps2dq);
+
+    a.d[0] = 2.6;
+    a.d[1] = -3.4;
+    CVT_OP_XMM(cvtpd2ps);
+    CVT_OP_XMM(cvtsd2ss);
+    CVT_OP_XMM2MMX(cvtpd2pi);
+    CVT_OP_XMM2MMX(cvttpd2pi);
+    CVT_OP_XMM2REG(cvtsd2si);
+    CVT_OP_XMM2REG(cvttsd2si);
+    CVT_OP_XMM(cvtpd2dq);
+    CVT_OP_XMM(cvttpd2dq);
+
+    /* sse/mmx moves */
+    CVT_OP_XMM2MMX(movdq2q);
+    CVT_OP_MMX2XMM(movq2dq);
+
+    /* int to float */
+    a.l[0] = -6;
+    a.l[1] = 2;
+    a.l[2] = 100;
+    a.l[3] = -60000;
+    CVT_OP_MMX2XMM(cvtpi2ps);
+    CVT_OP_MMX2XMM(cvtpi2pd);
+    CVT_OP_REG2XMM(cvtsi2ss);
+    CVT_OP_REG2XMM(cvtsi2sd);
+    CVT_OP_XMM(cvtdq2ps);
+    CVT_OP_XMM(cvtdq2pd);
+
+    /* XXX: test PNI insns */
+#if 0
+    SSE_OP2(movshdup);
+#endif
+    asm volatile ("emms");
+}
+
+#endif
+
+extern void *__start_initcall;
+extern void *__stop_initcall;
+
+
+int main(int argc, char **argv)
+{
+    void **ptr;
+    void (*func)(void);
+
+    ptr = &__start_initcall;
+    while (ptr != &__stop_initcall) {
+        func = *ptr++;
+        func();
+    }
+    test_bsx();
+    test_mul();
+    test_jcc();
+    test_floats();
+#if !defined(__x86_64__)
+    test_bcd();
+#endif
+    test_xchg();
+    test_string();
+    test_misc();
+    test_lea();
+#ifdef TEST_SEGS
+    test_segs();
+    test_code16();
+#endif
+#ifdef TEST_VM86
+    test_vm86();
+#endif
+    test_exceptions();
+#if !defined(__x86_64__)
+    test_self_modifying_code();
+    test_single_step();
+#endif
+    test_enter();
+#ifdef TEST_SSE
+    test_sse();
+    test_fxsave();
+#endif
+    return 0;
+}
Index: /trunk/src/recompiler_new/tests/test-i386.h
===================================================================
--- /trunk/src/recompiler_new/tests/test-i386.h	(revision 13168)
+++ /trunk/src/recompiler_new/tests/test-i386.h	(revision 13168)
@@ -0,0 +1,152 @@
+
+#define exec_op glue(exec_, OP)
+#define exec_opq glue(glue(exec_, OP), q)
+#define exec_opl glue(glue(exec_, OP), l)
+#define exec_opw glue(glue(exec_, OP), w)
+#define exec_opb glue(glue(exec_, OP), b)
+
+#define EXECOP2(size, rsize, res, s1, flags) \
+    asm ("push %4\n\t"\
+         "popf\n\t"\
+         stringify(OP) size " %" rsize "2, %" rsize "0\n\t" \
+         "pushf\n\t"\
+         "pop %1\n\t"\
+         : "=q" (res), "=g" (flags)\
+         : "q" (s1), "0" (res), "1" (flags)); \
+    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", \
+           stringify(OP) size, s0, s1, res, iflags, flags & CC_MASK);
+
+#define EXECOP1(size, rsize, res, flags) \
+    asm ("push %3\n\t"\
+         "popf\n\t"\
+         stringify(OP) size " %" rsize "0\n\t" \
+         "pushf\n\t"\
+         "pop %1\n\t"\
+         : "=q" (res), "=g" (flags)\
+         : "0" (res), "1" (flags)); \
+    printf("%-10s A=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", \
+           stringify(OP) size, s0, res, iflags, flags & CC_MASK);
+
+#ifdef OP1
+#if defined(__x86_64__)
+void exec_opq(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECOP1("q", "", res, flags);
+}
+#endif
+
+void exec_opl(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECOP1("l", "k", res, flags);
+}
+
+void exec_opw(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECOP1("w", "w", res, flags);
+}
+
+void exec_opb(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECOP1("b", "b", res, flags);
+}
+#else
+#if defined(__x86_64__)
+void exec_opq(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECOP2("q", "", res, s1, flags);
+}
+#endif
+
+void exec_opl(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECOP2("l", "k", res, s1, flags);
+}
+
+void exec_opw(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECOP2("w", "w", res, s1, flags);
+}
+
+void exec_opb(long s0, long s1, long iflags)
+{
+    long res, flags;
+    res = s0;
+    flags = iflags;
+    EXECOP2("b", "b", res, s1, flags);
+}
+#endif
+
+void exec_op(long s0, long s1)
+{
+    s0 = i2l(s0);
+    s1 = i2l(s1);
+#if defined(__x86_64__)
+    exec_opq(s0, s1, 0);
+#endif
+    exec_opl(s0, s1, 0);
+    exec_opw(s0, s1, 0);
+    exec_opb(s0, s1, 0);
+#ifdef OP_CC
+#if defined(__x86_64__)
+    exec_opq(s0, s1, CC_C);
+#endif
+    exec_opl(s0, s1, CC_C);
+    exec_opw(s0, s1, CC_C);
+    exec_opb(s0, s1, CC_C);
+#endif
+}
+
+void glue(test_, OP)(void)
+{
+    exec_op(0x12345678, 0x812FADA);
+    exec_op(0x12341, 0x12341);
+    exec_op(0x12341, -0x12341);
+    exec_op(0xffffffff, 0);
+    exec_op(0xffffffff, -1);
+    exec_op(0xffffffff, 1);
+    exec_op(0xffffffff, 2);
+    exec_op(0x7fffffff, 0);
+    exec_op(0x7fffffff, 1);
+    exec_op(0x7fffffff, -1);
+    exec_op(0x80000000, -1);
+    exec_op(0x80000000, 1);
+    exec_op(0x80000000, -2);
+    exec_op(0x12347fff, 0);
+    exec_op(0x12347fff, 1);
+    exec_op(0x12347fff, -1);
+    exec_op(0x12348000, -1);
+    exec_op(0x12348000, 1);
+    exec_op(0x12348000, -2);
+    exec_op(0x12347f7f, 0);
+    exec_op(0x12347f7f, 1);
+    exec_op(0x12347f7f, -1);
+    exec_op(0x12348080, -1);
+    exec_op(0x12348080, 1);
+    exec_op(0x12348080, -2);
+}
+
+void *glue(_test_, OP) __init_call = glue(test_, OP);
+
+#undef OP
+#undef OP_CC
Index: /trunk/src/recompiler_new/tests/test_path.c
===================================================================
--- /trunk/src/recompiler_new/tests/test_path.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/test_path.c	(revision 13168)
@@ -0,0 +1,152 @@
+/* Test path override code */
+#define _GNU_SOURCE
+#include "../path.c"
+#include <stdarg.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+/* Any log message kills the test. */
+void gemu_log(const char *fmt, ...)
+{
+    va_list ap;
+
+    fprintf(stderr, "FATAL: ");
+    va_start(ap, fmt);
+    vfprintf(stderr, fmt, ap);
+    va_end(ap);
+    exit(1);
+}
+
+#define NO_CHANGE(_path)						\
+	do {								\
+	    if (strcmp(path(_path), _path) != 0) return __LINE__;	\
+	} while(0)
+
+#define CHANGE_TO(_path, _newpath)					\
+	do {								\
+	    if (strcmp(path(_path), _newpath) != 0) return __LINE__;	\
+	} while(0)
+
+static void cleanup(void)
+{
+    unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE2");
+    unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE3");
+    unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE4");
+    unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE5");
+    rmdir("/tmp/qemu-test_path/DIR1/DIR2");
+    rmdir("/tmp/qemu-test_path/DIR1/DIR3");
+    rmdir("/tmp/qemu-test_path/DIR1");
+    rmdir("/tmp/qemu-test_path");
+}
+
+static unsigned int do_test(void)
+{
+    if (mkdir("/tmp/qemu-test_path", 0700) != 0)
+	return __LINE__;
+
+    if (mkdir("/tmp/qemu-test_path/DIR1", 0700) != 0)
+	return __LINE__;
+
+    if (mkdir("/tmp/qemu-test_path/DIR1/DIR2", 0700) != 0)
+	return __LINE__;
+
+    if (mkdir("/tmp/qemu-test_path/DIR1/DIR3", 0700) != 0)
+	return __LINE__;
+
+    if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE", 0600)) != 0)
+	return __LINE__;
+
+    if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE2", 0600)) != 0)
+	return __LINE__;
+
+    if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE3", 0600)) != 0)
+	return __LINE__;
+
+    if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE4", 0600)) != 0)
+	return __LINE__;
+
+    if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE5", 0600)) != 0)
+	return __LINE__;
+
+    init_paths("/tmp/qemu-test_path");
+
+    NO_CHANGE("/tmp");
+    NO_CHANGE("/tmp/");
+    NO_CHANGE("/tmp/qemu-test_path");
+    NO_CHANGE("/tmp/qemu-test_path/");
+    NO_CHANGE("/tmp/qemu-test_path/D");
+    NO_CHANGE("/tmp/qemu-test_path/DI");
+    NO_CHANGE("/tmp/qemu-test_path/DIR");
+    NO_CHANGE("/tmp/qemu-test_path/DIR1");
+    NO_CHANGE("/tmp/qemu-test_path/DIR1/");
+
+    NO_CHANGE("/D");
+    NO_CHANGE("/DI");
+    NO_CHANGE("/DIR");
+    NO_CHANGE("/DIR2");
+    NO_CHANGE("/DIR1.");
+
+    CHANGE_TO("/DIR1", "/tmp/qemu-test_path/DIR1");
+    CHANGE_TO("/DIR1/", "/tmp/qemu-test_path/DIR1");
+
+    NO_CHANGE("/DIR1/D");
+    NO_CHANGE("/DIR1/DI");
+    NO_CHANGE("/DIR1/DIR");
+    NO_CHANGE("/DIR1/DIR1");
+
+    CHANGE_TO("/DIR1/DIR2", "/tmp/qemu-test_path/DIR1/DIR2");
+    CHANGE_TO("/DIR1/DIR2/", "/tmp/qemu-test_path/DIR1/DIR2");
+
+    CHANGE_TO("/DIR1/DIR3", "/tmp/qemu-test_path/DIR1/DIR3");
+    CHANGE_TO("/DIR1/DIR3/", "/tmp/qemu-test_path/DIR1/DIR3");
+
+    NO_CHANGE("/DIR1/DIR2/F");
+    NO_CHANGE("/DIR1/DIR2/FI");
+    NO_CHANGE("/DIR1/DIR2/FIL");
+    NO_CHANGE("/DIR1/DIR2/FIL.");
+
+    CHANGE_TO("/DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    CHANGE_TO("/DIR1/DIR2/FILE2", "/tmp/qemu-test_path/DIR1/DIR2/FILE2");
+    CHANGE_TO("/DIR1/DIR2/FILE3", "/tmp/qemu-test_path/DIR1/DIR2/FILE3");
+    CHANGE_TO("/DIR1/DIR2/FILE4", "/tmp/qemu-test_path/DIR1/DIR2/FILE4");
+    CHANGE_TO("/DIR1/DIR2/FILE5", "/tmp/qemu-test_path/DIR1/DIR2/FILE5");
+
+    NO_CHANGE("/DIR1/DIR2/FILE6");
+    NO_CHANGE("/DIR1/DIR2/FILE/X");
+
+    CHANGE_TO("/DIR1/../DIR1", "/tmp/qemu-test_path/DIR1");
+    CHANGE_TO("/DIR1/../DIR1/", "/tmp/qemu-test_path/DIR1");
+    CHANGE_TO("/../DIR1", "/tmp/qemu-test_path/DIR1");
+    CHANGE_TO("/../DIR1/", "/tmp/qemu-test_path/DIR1");
+    CHANGE_TO("/DIR1/DIR2/../DIR2", "/tmp/qemu-test_path/DIR1/DIR2");
+    CHANGE_TO("/DIR1/DIR2/../DIR2/../../DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    CHANGE_TO("/DIR1/DIR2/../DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+
+    NO_CHANGE("/DIR1/DIR2/../DIR1");
+    NO_CHANGE("/DIR1/DIR2/../FILE");
+
+    CHANGE_TO("/./DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    CHANGE_TO("/././DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    CHANGE_TO("/DIR1/./DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    CHANGE_TO("/DIR1/././DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    CHANGE_TO("/DIR1/DIR2/./FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    CHANGE_TO("/DIR1/DIR2/././FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+    CHANGE_TO("/./DIR1/./DIR2/./FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+
+    return 0;
+}
+
+int main(int argc, char *argv[])
+{
+    int ret;
+
+    ret = do_test();
+    cleanup();
+    if (ret) {
+	fprintf(stderr, "test_path: failed on line %i\n", ret);
+	return 1;
+    }
+    return 0;
+}
+	
Index: /trunk/src/recompiler_new/tests/testthread.c
===================================================================
--- /trunk/src/recompiler_new/tests/testthread.c	(revision 13168)
+++ /trunk/src/recompiler_new/tests/testthread.c	(revision 13168)
@@ -0,0 +1,51 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <sys/wait.h>
+#include <sched.h>
+
+void *thread1_func(void *arg)
+{
+    int i;
+    char buf[512];
+
+    for(i=0;i<10;i++) {
+        snprintf(buf, sizeof(buf), "thread1: %d %s\n", i, (char *)arg);
+        write(1, buf, strlen(buf));
+        usleep(100 * 1000);
+    }
+    return NULL;
+}
+
+void *thread2_func(void *arg)
+{
+    int i;
+    char buf[512];
+    for(i=0;i<20;i++) {
+        snprintf(buf, sizeof(buf), "thread2: %d %s\n", i, (char *)arg);
+        write(1, buf, strlen(buf));
+        usleep(150 * 1000);
+    }
+    return NULL;
+}
+
+void test_pthread(void)
+{
+    pthread_t tid1, tid2;
+
+    pthread_create(&tid1, NULL, thread1_func, "hello1");
+    pthread_create(&tid2, NULL, thread2_func, "hello2");
+    pthread_join(tid1, NULL);
+    pthread_join(tid2, NULL);
+    printf("End of pthread test.\n");
+}
+
+int main(int argc, char **argv)
+{
+    test_pthread();
+    return 0;
+}
Index: /trunk/src/recompiler_new/translate-all.c
===================================================================
--- /trunk/src/recompiler_new/translate-all.c	(revision 13168)
+++ /trunk/src/recompiler_new/translate-all.c	(revision 13168)
@@ -0,0 +1,336 @@
+/*
+ *  Host code generation
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "config.h"
+
+#define NO_CPU_IO_DEFS
+#include "cpu.h"
+#include "exec-all.h"
+#include "disas.h"
+
+extern int dyngen_code(uint8_t *gen_code_buf,
+                       uint16_t *label_offsets, uint16_t *jmp_offsets,
+                       const uint16_t *opc_buf, const uint32_t *opparam_buf, const long *gen_labels);
+
+enum {
+#define DEF(s, n, copy_size) INDEX_op_ ## s,
+#include "opc.h"
+#undef DEF
+    NB_OPS,
+};
+
+uint16_t gen_opc_buf[OPC_BUF_SIZE];
+uint32_t gen_opparam_buf[OPPARAM_BUF_SIZE];
+long gen_labels[OPC_BUF_SIZE];
+int nb_gen_labels;
+
+target_ulong gen_opc_pc[OPC_BUF_SIZE];
+uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+#if defined(TARGET_I386)
+uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+#elif defined(TARGET_SPARC)
+target_ulong gen_opc_npc[OPC_BUF_SIZE];
+target_ulong gen_opc_jump_pc[2];
+#elif defined(TARGET_MIPS)
+uint32_t gen_opc_hflags[OPC_BUF_SIZE];
+#endif
+
+int code_copy_enabled = 1;
+
+#ifdef DEBUG_DISAS
+static const char *op_str[] = {
+#define DEF(s, n, copy_size) #s,
+#include "opc.h"
+#undef DEF
+};
+
+static uint8_t op_nb_args[] = {
+#define DEF(s, n, copy_size) n,
+#include "opc.h"
+#undef DEF
+};
+#endif /* bird: opc_copy_size is used ouside DEBUG_DISAS and VBOX isn't necessarily defining DEBUG_DISAS presently. */
+
+static const unsigned short opc_copy_size[] = {
+#define DEF(s, n, copy_size) copy_size,
+#include "opc.h"
+#undef DEF
+};
+
+#ifdef DEBUG_DISAS /* bird: see previous bird comment. */
+void dump_ops(const uint16_t *opc_buf, const uint32_t *opparam_buf)
+{
+    const uint16_t *opc_ptr;
+    const uint32_t *opparam_ptr;
+    int c, n, i;
+
+    opc_ptr = opc_buf;
+    opparam_ptr = opparam_buf;
+    for(;;) {
+        c = *opc_ptr++;
+        n = op_nb_args[c];
+        fprintf(logfile, "0x%04x: %s", 
+                (int)(opc_ptr - opc_buf - 1), op_str[c]);
+        for(i = 0; i < n; i++) {
+            fprintf(logfile, " 0x%x", opparam_ptr[i]);
+        }
+        fprintf(logfile, "\n");
+        if (c == INDEX_op_end)
+            break;
+        opparam_ptr += n;
+    }
+}
+
+#endif
+
+/* compute label info */
+static void dyngen_labels(long *gen_labels, int nb_gen_labels,
+                          uint8_t *gen_code_buf, const uint16_t *opc_buf)
+{
+    uint8_t *gen_code_ptr;
+    int c, i;
+    unsigned long gen_code_addr[OPC_BUF_SIZE];
+    
+    if (nb_gen_labels == 0)
+        return;
+    /* compute the address of each op code */
+    
+    gen_code_ptr = gen_code_buf;
+    i = 0;
+    for(;;) {
+        c = opc_buf[i];
+        gen_code_addr[i] =(unsigned long)gen_code_ptr;
+        if (c == INDEX_op_end)
+            break;
+        gen_code_ptr += opc_copy_size[c];
+        i++;
+    }
+    
+    /* compute the address of each label */
+    for(i = 0; i < nb_gen_labels; i++) {
+        gen_labels[i] = gen_code_addr[gen_labels[i]];
+    }
+}
+
+/* return non zero if the very first instruction is invalid so that
+   the virtual CPU can trigger an exception. 
+
+   '*gen_code_size_ptr' contains the size of the generated code (host
+   code).
+*/
+int cpu_gen_code(CPUState *env, TranslationBlock *tb,
+                 int max_code_size, int *gen_code_size_ptr)
+{
+    uint8_t *gen_code_buf;
+    int gen_code_size;
+
+#ifdef USE_CODE_COPY
+    if (code_copy_enabled &&
+        cpu_gen_code_copy(env, tb, max_code_size, &gen_code_size) == 0) {
+        /* nothing more to do */
+    } else
+#endif
+    {
+#ifdef VBOX
+        RAWEx_ProfileStart(env, STATS_QEMU_COMPILATION);
+        if (gen_intermediate_code(env, tb) < 0)
+        {
+            RAWEx_ProfileStop(env, STATS_QEMU_COMPILATION);
+            return -1;
+        }
+#else /* !VBOX */
+        if (gen_intermediate_code(env, tb) < 0)
+            return -1;
+#endif /* !VBOX */
+
+        /* generate machine code */
+        tb->tb_next_offset[0] = 0xffff;
+        tb->tb_next_offset[1] = 0xffff;
+        gen_code_buf = tb->tc_ptr;
+#ifdef USE_DIRECT_JUMP
+        /* the following two entries are optional (only used for string ops) */
+        tb->tb_jmp_offset[2] = 0xffff;
+        tb->tb_jmp_offset[3] = 0xffff;
+#endif
+        dyngen_labels(gen_labels, nb_gen_labels, gen_code_buf, gen_opc_buf);
+
+        gen_code_size = dyngen_code(gen_code_buf, tb->tb_next_offset,
+#ifdef USE_DIRECT_JUMP
+                                    tb->tb_jmp_offset,
+#else
+                                    NULL,
+#endif
+                                    gen_opc_buf, gen_opparam_buf, gen_labels);
+#ifdef VBOX
+        RAWEx_ProfileStop(env, STATS_QEMU_COMPILATION);
+#endif
+    }
+    *gen_code_size_ptr = gen_code_size;
+#ifdef DEBUG_DISAS
+    if (loglevel & CPU_LOG_TB_OUT_ASM) {
+        fprintf(logfile, "OUT: [size=%d]\n", *gen_code_size_ptr);
+        disas(logfile, tb->tc_ptr, *gen_code_size_ptr);
+        fprintf(logfile, "\n");
+        fflush(logfile);
+    }
+#endif
+    return 0;
+}
+
+/* The cpu state corresponding to 'searched_pc' is restored. 
+ */
+int cpu_restore_state(TranslationBlock *tb, 
+                      CPUState *env, unsigned long searched_pc,
+                      void *puc)
+{
+    int j, c;
+    unsigned long tc_ptr;
+    uint16_t *opc_ptr;
+
+#ifdef USE_CODE_COPY
+    if (tb->cflags & CF_CODE_COPY) {
+        return cpu_restore_state_copy(tb, env, searched_pc, puc);
+    }
+#endif
+    if (gen_intermediate_code_pc(env, tb) < 0)
+        return -1;
+    
+    /* find opc index corresponding to search_pc */
+    tc_ptr = (unsigned long)tb->tc_ptr;
+    if (searched_pc < tc_ptr)
+        return -1;
+    j = 0;
+    opc_ptr = gen_opc_buf;
+    for(;;) {
+        c = *opc_ptr;
+        if (c == INDEX_op_end)
+            return -1;
+        tc_ptr += opc_copy_size[c];
+        if (searched_pc < tc_ptr)
+            break;
+        opc_ptr++;
+    }
+    j = opc_ptr - gen_opc_buf;
+    /* now find start of instruction before */
+    while (gen_opc_instr_start[j] == 0)
+        j--;
+#if defined(TARGET_I386)
+    {
+        int cc_op;
+#ifdef DEBUG_DISAS
+        if (loglevel & CPU_LOG_TB_OP) {
+            int i;
+            fprintf(logfile, "RESTORE:\n");
+            for(i=0;i<=j; i++) {
+                if (gen_opc_instr_start[i]) {
+                    fprintf(logfile, "0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
+                }
+            }
+            fprintf(logfile, "spc=0x%08lx j=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n", 
+                    searched_pc, j, gen_opc_pc[j] - tb->cs_base, 
+                    (uint32_t)tb->cs_base);
+        }
+#endif
+        env->eip = gen_opc_pc[j] - tb->cs_base;
+        cc_op = gen_opc_cc_op[j];
+        if (cc_op != CC_OP_DYNAMIC)
+            env->cc_op = cc_op;
+    }
+#elif defined(TARGET_ARM)
+    env->regs[15] = gen_opc_pc[j];
+#elif defined(TARGET_SPARC)
+    {
+        target_ulong npc;
+        env->pc = gen_opc_pc[j];
+        npc = gen_opc_npc[j];
+        if (npc == 1) {
+            /* dynamic NPC: already stored */
+        } else if (npc == 2) {
+            target_ulong t2 = (target_ulong)puc;
+            /* jump PC: use T2 and the jump targets of the translation */
+            if (t2) 
+                env->npc = gen_opc_jump_pc[0];
+            else
+                env->npc = gen_opc_jump_pc[1];
+        } else {
+            env->npc = npc;
+        }
+    }
+#elif defined(TARGET_PPC)
+    {
+        int type;
+        /* for PPC, we need to look at the micro operation to get the
+           access type */
+        env->nip = gen_opc_pc[j];
+        switch(c) {
+#if defined(CONFIG_USER_ONLY)
+#define CASE3(op)\
+        case INDEX_op_ ## op ## _raw
+#else
+#define CASE3(op)\
+        case INDEX_op_ ## op ## _user:\
+        case INDEX_op_ ## op ## _kernel
+#endif
+            
+        CASE3(stfd):
+        CASE3(stfs):
+        CASE3(lfd):
+        CASE3(lfs):
+            type = ACCESS_FLOAT;
+            break;
+        CASE3(lwarx):
+            type = ACCESS_RES;
+            break;
+        CASE3(stwcx):
+            type = ACCESS_RES;
+            break;
+        CASE3(eciwx):
+        CASE3(ecowx):
+            type = ACCESS_EXT;
+            break;
+        default:
+            type = ACCESS_INT;
+            break;
+        }
+        env->access_type = type;
+    }
+#elif defined(TARGET_M68K)
+    env->pc = gen_opc_pc[j];
+#elif defined(TARGET_MIPS)
+    env->PC = gen_opc_pc[j];
+    env->hflags &= ~MIPS_HFLAG_BMASK;
+    env->hflags |= gen_opc_hflags[j];
+#endif
+    return 0;
+}
Index: /trunk/src/recompiler_new/translate-op.c
===================================================================
--- /trunk/src/recompiler_new/translate-op.c	(revision 13168)
+++ /trunk/src/recompiler_new/translate-op.c	(revision 13168)
@@ -0,0 +1,80 @@
+/*
+ *  Host code generation
+ * 
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Sun elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "config.h"         
+
+#if defined(VBOX)
+void     remR3PhysRead(RTGCPHYS SrcGCPhys, void *pvDst, unsigned cb);
+uint8_t  remR3PhysReadU8(RTGCPHYS SrcGCPhys);
+int8_t   remR3PhysReadS8(RTGCPHYS SrcGCPhys);
+uint16_t remR3PhysReadU16(RTGCPHYS SrcGCPhys);
+int16_t  remR3PhysReadS16(RTGCPHYS SrcGCPhys);
+uint32_t remR3PhysReadU32(RTGCPHYS SrcGCPhys);
+int32_t  remR3PhysReadS32(RTGCPHYS SrcGCPhys);
+uint64_t remR3PhysReadU64(RTGCPHYS SrcGCPhys);
+int64_t  remR3PhysReadS64(RTGCPHYS SrcGCPhys);
+void     remR3PhysWrite(RTGCPHYS DstGCPhys, const void *pvSrc, unsigned cb);
+void     remR3PhysWriteU8(RTGCPHYS DstGCPhys, uint8_t val);
+void     remR3PhysWriteU16(RTGCPHYS DstGCPhys, uint16_t val);
+void     remR3PhysWriteU32(RTGCPHYS DstGCPhys, uint32_t val);
+void     remR3PhysWriteU64(RTGCPHYS DstGCPhys, uint64_t val);
+
+# ifndef REM_PHYS_ADDR_IN_TLB
+void     remR3PhysReadHCPtr(uint8_t *pbSrcPhys, void *pvDst, unsigned cb);
+uint8_t  remR3PhysReadHCPtrU8(uint8_t *pbSrcPhys);
+int8_t   remR3PhysReadHCPtrS8(uint8_t *pbSrcPhys);
+uint16_t remR3PhysReadHCPtrU16(uint8_t *pbSrcPhys);
+int16_t  remR3PhysReadHCPtrS16(uint8_t *pbSrcPhys);
+uint32_t remR3PhysReadHCPtrU32(uint8_t *pbSrcPhys);
+int32_t  remR3PhysReadHCPtrS32(uint8_t *pbSrcPhys);
+uint64_t remR3PhysReadHCPtrU64(uint8_t *pbSrcPhys);
+int64_t  remR3PhysReadHCPtrS64(uint8_t *pbSrcPhys);
+void     remR3PhysWriteHCPtr(uint8_t *pbDstPhys, const void *pvSrc, unsigned cb);
+void     remR3PhysWriteHCPtrU8(uint8_t *pbDstPhys, uint8_t val);
+void     remR3PhysWriteHCPtrU16(uint8_t *pbDstPhys, uint16_t val);
+void     remR3PhysWriteHCPtrU32(uint8_t *pbDstPhys, uint32_t val);
+void     remR3PhysWriteHCPtrU64(uint8_t *pbDstPhys, uint64_t val);
+# endif
+#endif /* VBOX */
+
+enum {
+#define DEF(s, n, copy_size) INDEX_op_ ## s,
+#include "opc.h"
+#undef DEF
+    NB_OPS,
+};
+
+#include "dyngen.h"
+#include "op.h"
+
Index: /trunk/src/recompiler_new/vl.h
===================================================================
--- /trunk/src/recompiler_new/vl.h	(revision 13168)
+++ /trunk/src/recompiler_new/vl.h	(revision 13168)
@@ -0,0 +1,1414 @@
+/*
+ * QEMU System Emulator header
+ * 
+ * Copyright (c) 2003 Fabrice Bellard
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef VL_H
+#define VL_H
+
+/* we put basic includes here to avoid repeating them in device drivers */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+#ifndef VBOX
+#include <limits.h>
+#include <time.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include "audio/audio.h"
+#endif /* !VBOX */
+
+#ifndef O_LARGEFILE
+#define O_LARGEFILE 0
+#endif
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#ifndef ENOMEDIUM
+#define ENOMEDIUM ENODEV
+#endif
+
+#ifdef _WIN32
+#ifndef VBOX 
+#include <windows.h>
+#define fsync _commit
+#define lseek _lseeki64
+#define ENOTSUP 4096
+extern int qemu_ftruncate64(int, int64_t);
+#define ftruncate qemu_ftruncate64
+
+
+static inline char *realpath(const char *path, char *resolved_path)
+{
+    _fullpath(resolved_path, path, _MAX_PATH);
+    return resolved_path;
+}
+
+#define PRId64 "I64d"
+#define PRIx64 "I64x"
+#define PRIu64 "I64u"
+#define PRIo64 "I64o"
+#endif /* !VBOX */
+#endif
+
+#ifdef QEMU_TOOL
+
+/* we use QEMU_TOOL in the command line tools which do not depend on
+   the target CPU type */
+#include "config-host.h"
+#include <setjmp.h>
+#include "osdep.h"
+#include "bswap.h"
+
+#else
+
+#ifndef VBOX
+#include "audio/audio.h"
+#endif /* !VBOX */
+#include "cpu.h"
+
+#endif /* !defined(QEMU_TOOL) */
+
+#ifdef VBOX
+# include <VBox/types.h>
+# include "REMInternal.h"
+#endif /* VBOX */
+
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s)	tostring(s)
+#define tostring(s)	#s
+#endif
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+/* cutils.c */
+void pstrcpy(char *buf, int buf_size, const char *str);
+char *pstrcat(char *buf, int buf_size, const char *s);
+int strstart(const char *str, const char *val, const char **ptr);
+int stristart(const char *str, const char *val, const char **ptr);
+
+/* vl.c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c);
+
+void hw_error(const char *fmt, ...);
+
+extern const char *bios_dir;
+
+extern int vm_running;
+
+typedef struct vm_change_state_entry VMChangeStateEntry;
+typedef void VMChangeStateHandler(void *opaque, int running);
+typedef void VMStopHandler(void *opaque, int reason);
+
+VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
+                                                     void *opaque);
+void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
+
+int qemu_add_vm_stop_handler(VMStopHandler *cb, void *opaque);
+void qemu_del_vm_stop_handler(VMStopHandler *cb, void *opaque);
+
+void vm_start(void);
+void vm_stop(int reason);
+
+typedef void QEMUResetHandler(void *opaque);
+
+void qemu_register_reset(QEMUResetHandler *func, void *opaque);
+void qemu_system_reset_request(void);
+void qemu_system_shutdown_request(void);
+void qemu_system_powerdown_request(void);
+#if !defined(TARGET_SPARC)
+// Please implement a power failure function to signal the OS
+#define qemu_system_powerdown() do{}while(0)
+#else
+void qemu_system_powerdown(void);
+#endif
+
+void main_loop_wait(int timeout);
+
+extern int ram_size;
+extern int bios_size;
+extern int rtc_utc;
+extern int cirrus_vga_enabled;
+extern int graphic_width;
+extern int graphic_height;
+extern int graphic_depth;
+extern const char *keyboard_layout;
+extern int kqemu_allowed;
+extern int win2k_install_hack;
+extern int usb_enabled;
+extern int smp_cpus;
+extern int no_quit;
+extern int semihosting_enabled;
+extern int autostart;
+
+#ifndef VBOX
+#define MAX_OPTION_ROMS 16
+extern const char *option_rom[MAX_OPTION_ROMS];
+extern int nb_option_roms;
+
+/* XXX: make it dynamic */
+#if defined (TARGET_PPC) || defined (TARGET_SPARC64)
+#define BIOS_SIZE ((512 + 32) * 1024)
+#elif defined(TARGET_MIPS)
+#define BIOS_SIZE (4 * 1024 * 1024)
+#else
+#define BIOS_SIZE ((256 + 64) * 1024)
+#endif
+
+/* keyboard/mouse support */
+
+#define MOUSE_EVENT_LBUTTON 0x01
+#define MOUSE_EVENT_RBUTTON 0x02
+#define MOUSE_EVENT_MBUTTON 0x04
+
+typedef void QEMUPutKBDEvent(void *opaque, int keycode);
+typedef void QEMUPutMouseEvent(void *opaque, int dx, int dy, int dz, int buttons_state);
+
+typedef struct QEMUPutMouseEntry {
+    QEMUPutMouseEvent *qemu_put_mouse_event;
+    void *qemu_put_mouse_event_opaque;
+    int qemu_put_mouse_event_absolute;
+    char *qemu_put_mouse_event_name;
+
+    /* used internally by qemu for handling mice */
+    struct QEMUPutMouseEntry *next;
+} QEMUPutMouseEntry;
+
+void qemu_add_kbd_event_handler(QEMUPutKBDEvent *func, void *opaque);
+QEMUPutMouseEntry *qemu_add_mouse_event_handler(QEMUPutMouseEvent *func,
+                                                void *opaque, int absolute,
+                                                const char *name);
+void qemu_remove_mouse_event_handler(QEMUPutMouseEntry *entry);
+
+void kbd_put_keycode(int keycode);
+void kbd_mouse_event(int dx, int dy, int dz, int buttons_state);
+int kbd_mouse_is_absolute(void);
+
+void do_info_mice(void);
+void do_mouse_set(int index);
+
+/* keysym is a unicode code except for special keys (see QEMU_KEY_xxx
+   constants) */
+#define QEMU_KEY_ESC1(c) ((c) | 0xe100)
+#define QEMU_KEY_BACKSPACE  0x007f
+#define QEMU_KEY_UP         QEMU_KEY_ESC1('A')
+#define QEMU_KEY_DOWN       QEMU_KEY_ESC1('B')
+#define QEMU_KEY_RIGHT      QEMU_KEY_ESC1('C')
+#define QEMU_KEY_LEFT       QEMU_KEY_ESC1('D')
+#define QEMU_KEY_HOME       QEMU_KEY_ESC1(1)
+#define QEMU_KEY_END        QEMU_KEY_ESC1(4)
+#define QEMU_KEY_PAGEUP     QEMU_KEY_ESC1(5)
+#define QEMU_KEY_PAGEDOWN   QEMU_KEY_ESC1(6)
+#define QEMU_KEY_DELETE     QEMU_KEY_ESC1(3)
+
+#define QEMU_KEY_CTRL_UP         0xe400
+#define QEMU_KEY_CTRL_DOWN       0xe401
+#define QEMU_KEY_CTRL_LEFT       0xe402
+#define QEMU_KEY_CTRL_RIGHT      0xe403
+#define QEMU_KEY_CTRL_HOME       0xe404
+#define QEMU_KEY_CTRL_END        0xe405
+#define QEMU_KEY_CTRL_PAGEUP     0xe406
+#define QEMU_KEY_CTRL_PAGEDOWN   0xe407
+
+void kbd_put_keysym(int keysym);
+
+/* async I/O support */
+
+typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
+typedef int IOCanRWHandler(void *opaque);
+typedef void IOHandler(void *opaque);
+
+int qemu_set_fd_handler2(int fd, 
+                         IOCanRWHandler *fd_read_poll, 
+                         IOHandler *fd_read, 
+                         IOHandler *fd_write, 
+                         void *opaque);
+int qemu_set_fd_handler(int fd,
+                        IOHandler *fd_read, 
+                        IOHandler *fd_write,
+                        void *opaque);
+
+/* Polling handling */
+
+/* return TRUE if no sleep should be done afterwards */
+typedef int PollingFunc(void *opaque);
+
+int qemu_add_polling_cb(PollingFunc *func, void *opaque);
+void qemu_del_polling_cb(PollingFunc *func, void *opaque);
+
+#ifdef _WIN32
+/* Wait objects handling */
+typedef void WaitObjectFunc(void *opaque);
+
+int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
+void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
+#endif
+
+typedef struct QEMUBH QEMUBH;
+
+/* character device */
+
+#define CHR_EVENT_BREAK 0 /* serial break char */
+#define CHR_EVENT_FOCUS 1 /* focus to this terminal (modal input needed) */
+#define CHR_EVENT_RESET 2 /* new connection established */
+
+
+#define CHR_IOCTL_SERIAL_SET_PARAMS   1
+typedef struct {
+    int speed;
+    int parity;
+    int data_bits;
+    int stop_bits;
+} QEMUSerialSetParams;
+
+#define CHR_IOCTL_SERIAL_SET_BREAK    2
+
+#define CHR_IOCTL_PP_READ_DATA        3
+#define CHR_IOCTL_PP_WRITE_DATA       4
+#define CHR_IOCTL_PP_READ_CONTROL     5
+#define CHR_IOCTL_PP_WRITE_CONTROL    6
+#define CHR_IOCTL_PP_READ_STATUS      7
+
+typedef void IOEventHandler(void *opaque, int event);
+
+typedef struct CharDriverState {
+    int (*chr_write)(struct CharDriverState *s, const uint8_t *buf, int len);
+    void (*chr_update_read_handler)(struct CharDriverState *s);
+    int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg);
+    IOEventHandler *chr_event;
+    IOCanRWHandler *chr_can_read;
+    IOReadHandler *chr_read;
+    void *handler_opaque;
+    void (*chr_send_event)(struct CharDriverState *chr, int event);
+    void (*chr_close)(struct CharDriverState *chr);
+    void *opaque;
+    QEMUBH *bh;
+} CharDriverState;
+
+CharDriverState *qemu_chr_open(const char *filename);
+void qemu_chr_printf(CharDriverState *s, const char *fmt, ...);
+int qemu_chr_write(CharDriverState *s, const uint8_t *buf, int len);
+void qemu_chr_send_event(CharDriverState *s, int event);
+void qemu_chr_add_handlers(CharDriverState *s, 
+                           IOCanRWHandler *fd_can_read, 
+                           IOReadHandler *fd_read,
+                           IOEventHandler *fd_event,
+                           void *opaque);
+int qemu_chr_ioctl(CharDriverState *s, int cmd, void *arg);
+void qemu_chr_reset(CharDriverState *s);
+int qemu_chr_can_read(CharDriverState *s);
+void qemu_chr_read(CharDriverState *s, uint8_t *buf, int len);
+
+/* consoles */
+
+typedef struct DisplayState DisplayState;
+typedef struct TextConsole TextConsole;
+
+typedef void (*vga_hw_update_ptr)(void *);
+typedef void (*vga_hw_invalidate_ptr)(void *);
+typedef void (*vga_hw_screen_dump_ptr)(void *, const char *);
+
+TextConsole *graphic_console_init(DisplayState *ds, vga_hw_update_ptr update,
+                                  vga_hw_invalidate_ptr invalidate,
+                                  vga_hw_screen_dump_ptr screen_dump,
+                                  void *opaque);
+void vga_hw_update(void);
+void vga_hw_invalidate(void);
+void vga_hw_screen_dump(const char *filename);
+
+int is_graphic_console(void);
+CharDriverState *text_console_init(DisplayState *ds);
+void console_select(unsigned int index);
+
+/* serial ports */
+
+#define MAX_SERIAL_PORTS 4
+
+extern CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+
+/* parallel ports */
+
+#define MAX_PARALLEL_PORTS 3
+
+extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
+
+/* VLANs support */
+
+typedef struct VLANClientState VLANClientState;
+
+struct VLANClientState {
+    IOReadHandler *fd_read;
+    /* Packets may still be sent if this returns zero.  It's used to
+       rate-limit the slirp code.  */
+    IOCanRWHandler *fd_can_read;
+    void *opaque;
+    struct VLANClientState *next;
+    struct VLANState *vlan;
+    char info_str[256];
+};
+
+typedef struct VLANState {
+    int id;
+    VLANClientState *first_client;
+    struct VLANState *next;
+} VLANState;
+
+VLANState *qemu_find_vlan(int id);
+VLANClientState *qemu_new_vlan_client(VLANState *vlan,
+                                      IOReadHandler *fd_read,
+                                      IOCanRWHandler *fd_can_read,
+                                      void *opaque);
+int qemu_can_send_packet(VLANClientState *vc);
+void qemu_send_packet(VLANClientState *vc, const uint8_t *buf, int size);
+void qemu_handler_true(void *opaque);
+
+void do_info_network(void);
+
+/* TAP win32 */
+int tap_win32_init(VLANState *vlan, const char *ifname);
+
+/* NIC info */
+
+#define MAX_NICS 8
+
+typedef struct NICInfo {
+    uint8_t macaddr[6];
+    const char *model;
+    VLANState *vlan;
+} NICInfo;
+
+extern int nb_nics;
+extern NICInfo nd_table[MAX_NICS];
+
+/* timers */
+
+typedef struct QEMUClock QEMUClock;
+typedef struct QEMUTimer QEMUTimer;
+typedef void QEMUTimerCB(void *opaque);
+
+/* The real time clock should be used only for stuff which does not
+   change the virtual machine state, as it is run even if the virtual
+   machine is stopped. The real time clock has a frequency of 1000
+   Hz. */
+extern QEMUClock *rt_clock;
+
+/* The virtual clock is only run during the emulation. It is stopped
+   when the virtual machine is stopped. Virtual timers use a high
+   precision clock, usually cpu cycles (use ticks_per_sec). */
+extern QEMUClock *vm_clock;
+
+int64_t qemu_get_clock(QEMUClock *clock);
+
+QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque);
+void qemu_free_timer(QEMUTimer *ts);
+void qemu_del_timer(QEMUTimer *ts);
+void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time);
+int qemu_timer_pending(QEMUTimer *ts);
+
+extern int64_t ticks_per_sec;
+extern int pit_min_timer_count;
+
+int64_t cpu_get_ticks(void);
+void cpu_enable_ticks(void);
+void cpu_disable_ticks(void);
+
+/* VM Load/Save */
+
+typedef struct QEMUFile QEMUFile;
+
+QEMUFile *qemu_fopen(const char *filename, const char *mode);
+void qemu_fflush(QEMUFile *f);
+void qemu_fclose(QEMUFile *f);
+void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
+void qemu_put_byte(QEMUFile *f, int v);
+void qemu_put_be16(QEMUFile *f, unsigned int v);
+void qemu_put_be32(QEMUFile *f, unsigned int v);
+void qemu_put_be64(QEMUFile *f, uint64_t v);
+int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
+int qemu_get_byte(QEMUFile *f);
+unsigned int qemu_get_be16(QEMUFile *f);
+unsigned int qemu_get_be32(QEMUFile *f);
+uint64_t qemu_get_be64(QEMUFile *f);
+
+static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
+{
+    qemu_put_be64(f, *pv);
+}
+
+static inline void qemu_put_be32s(QEMUFile *f, const uint32_t *pv)
+{
+    qemu_put_be32(f, *pv);
+}
+
+static inline void qemu_put_be16s(QEMUFile *f, const uint16_t *pv)
+{
+    qemu_put_be16(f, *pv);
+}
+
+static inline void qemu_put_8s(QEMUFile *f, const uint8_t *pv)
+{
+    qemu_put_byte(f, *pv);
+}
+
+static inline void qemu_get_be64s(QEMUFile *f, uint64_t *pv)
+{
+    *pv = qemu_get_be64(f);
+}
+
+static inline void qemu_get_be32s(QEMUFile *f, uint32_t *pv)
+{
+    *pv = qemu_get_be32(f);
+}
+
+static inline void qemu_get_be16s(QEMUFile *f, uint16_t *pv)
+{
+    *pv = qemu_get_be16(f);
+}
+
+static inline void qemu_get_8s(QEMUFile *f, uint8_t *pv)
+{
+    *pv = qemu_get_byte(f);
+}
+
+#if TARGET_LONG_BITS == 64
+#define qemu_put_betl qemu_put_be64
+#define qemu_get_betl qemu_get_be64
+#define qemu_put_betls qemu_put_be64s
+#define qemu_get_betls qemu_get_be64s
+#else
+#define qemu_put_betl qemu_put_be32
+#define qemu_get_betl qemu_get_be32
+#define qemu_put_betls qemu_put_be32s
+#define qemu_get_betls qemu_get_be32s
+#endif
+
+int64_t qemu_ftell(QEMUFile *f);
+int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
+
+typedef void SaveStateHandler(QEMUFile *f, void *opaque);
+typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
+
+int register_savevm(const char *idstr, 
+                    int instance_id, 
+                    int version_id,
+                    SaveStateHandler *save_state,
+                    LoadStateHandler *load_state,
+                    void *opaque);
+void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
+void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
+
+void cpu_save(QEMUFile *f, void *opaque);
+int cpu_load(QEMUFile *f, void *opaque, int version_id);
+
+void do_savevm(const char *name);
+void do_loadvm(const char *name);
+void do_delvm(const char *name);
+void do_info_snapshots(void);
+
+/* bottom halves */
+typedef void QEMUBHFunc(void *opaque);
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+void qemu_bh_schedule(QEMUBH *bh);
+void qemu_bh_cancel(QEMUBH *bh);
+void qemu_bh_delete(QEMUBH *bh);
+int qemu_bh_poll(void);
+
+/* block.c */
+typedef struct BlockDriverState BlockDriverState;
+typedef struct BlockDriver BlockDriver;
+
+extern BlockDriver bdrv_raw;
+extern BlockDriver bdrv_host_device;
+extern BlockDriver bdrv_cow;
+extern BlockDriver bdrv_qcow;
+extern BlockDriver bdrv_vmdk;
+extern BlockDriver bdrv_cloop;
+extern BlockDriver bdrv_dmg;
+extern BlockDriver bdrv_bochs;
+extern BlockDriver bdrv_vpc;
+extern BlockDriver bdrv_vvfat;
+extern BlockDriver bdrv_qcow2;
+
+typedef struct BlockDriverInfo {
+    /* in bytes, 0 if irrelevant */
+    int cluster_size; 
+    /* offset at which the VM state can be saved (0 if not possible) */
+    int64_t vm_state_offset; 
+} BlockDriverInfo;
+
+typedef struct QEMUSnapshotInfo {
+    char id_str[128]; /* unique snapshot id */
+    /* the following fields are informative. They are not needed for
+       the consistency of the snapshot */
+    char name[256]; /* user choosen name */
+    uint32_t vm_state_size; /* VM state info size */
+    uint32_t date_sec; /* UTC date of the snapshot */
+    uint32_t date_nsec;
+    uint64_t vm_clock_nsec; /* VM clock relative to boot */
+} QEMUSnapshotInfo;
+
+#define BDRV_O_RDONLY      0x0000
+#define BDRV_O_RDWR        0x0002
+#define BDRV_O_ACCESS      0x0003
+#define BDRV_O_CREAT       0x0004 /* create an empty file */
+#define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save writes in a snapshot */
+#define BDRV_O_FILE        0x0010 /* open as a raw file (do not try to
+                                     use a disk image format on top of
+                                     it (default for
+                                     bdrv_file_open()) */
+
+void bdrv_init(void);
+BlockDriver *bdrv_find_format(const char *format_name);
+int bdrv_create(BlockDriver *drv, 
+                const char *filename, int64_t size_in_sectors,
+                const char *backing_file, int flags);
+BlockDriverState *bdrv_new(const char *device_name);
+void bdrv_delete(BlockDriverState *bs);
+int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
+int bdrv_open(BlockDriverState *bs, const char *filename, int flags);
+int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
+               BlockDriver *drv);
+void bdrv_close(BlockDriverState *bs);
+int bdrv_read(BlockDriverState *bs, int64_t sector_num, 
+              uint8_t *buf, int nb_sectors);
+int bdrv_write(BlockDriverState *bs, int64_t sector_num, 
+               const uint8_t *buf, int nb_sectors);
+int bdrv_pread(BlockDriverState *bs, int64_t offset, 
+               void *buf, int count);
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 
+                const void *buf, int count);
+int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_getlength(BlockDriverState *bs);
+void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr);
+int bdrv_commit(BlockDriverState *bs);
+void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size);
+/* async block I/O */
+typedef struct BlockDriverAIOCB BlockDriverAIOCB;
+typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+
+BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
+                                uint8_t *buf, int nb_sectors,
+                                BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
+                                 const uint8_t *buf, int nb_sectors,
+                                 BlockDriverCompletionFunc *cb, void *opaque);
+void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+
+void qemu_aio_init(void);
+void qemu_aio_poll(void);
+void qemu_aio_flush(void);
+void qemu_aio_wait_start(void);
+void qemu_aio_wait(void);
+void qemu_aio_wait_end(void);
+
+/* Ensure contents are flushed to disk.  */
+void bdrv_flush(BlockDriverState *bs);
+
+#define BDRV_TYPE_HD     0
+#define BDRV_TYPE_CDROM  1
+#define BDRV_TYPE_FLOPPY 2
+#define BIOS_ATA_TRANSLATION_AUTO   0
+#define BIOS_ATA_TRANSLATION_NONE   1
+#define BIOS_ATA_TRANSLATION_LBA    2
+#define BIOS_ATA_TRANSLATION_LARGE  3
+#define BIOS_ATA_TRANSLATION_RECHS  4
+
+void bdrv_set_geometry_hint(BlockDriverState *bs, 
+                            int cyls, int heads, int secs);
+void bdrv_set_type_hint(BlockDriverState *bs, int type);
+void bdrv_set_translation_hint(BlockDriverState *bs, int translation);
+void bdrv_get_geometry_hint(BlockDriverState *bs, 
+                            int *pcyls, int *pheads, int *psecs);
+int bdrv_get_type_hint(BlockDriverState *bs);
+int bdrv_get_translation_hint(BlockDriverState *bs);
+int bdrv_is_removable(BlockDriverState *bs);
+int bdrv_is_read_only(BlockDriverState *bs);
+int bdrv_is_inserted(BlockDriverState *bs);
+int bdrv_media_changed(BlockDriverState *bs);
+int bdrv_is_locked(BlockDriverState *bs);
+void bdrv_set_locked(BlockDriverState *bs, int locked);
+void bdrv_eject(BlockDriverState *bs, int eject_flag);
+void bdrv_set_change_cb(BlockDriverState *bs, 
+                        void (*change_cb)(void *opaque), void *opaque);
+void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size);
+void bdrv_info(void);
+BlockDriverState *bdrv_find(const char *name);
+void bdrv_iterate(void (*it)(void *opaque, const char *name), void *opaque);
+int bdrv_is_encrypted(BlockDriverState *bs);
+int bdrv_set_key(BlockDriverState *bs, const char *key);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 
+                         void *opaque);
+const char *bdrv_get_device_name(BlockDriverState *bs);
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 
+                          const uint8_t *buf, int nb_sectors);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+void bdrv_get_backing_filename(BlockDriverState *bs, 
+                               char *filename, int filename_size);
+int bdrv_snapshot_create(BlockDriverState *bs, 
+                         QEMUSnapshotInfo *sn_info);
+int bdrv_snapshot_goto(BlockDriverState *bs, 
+                       const char *snapshot_id);
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
+int bdrv_snapshot_list(BlockDriverState *bs, 
+                       QEMUSnapshotInfo **psn_info);
+char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn);
+
+char *get_human_readable_size(char *buf, int buf_size, int64_t size);
+int path_is_absolute(const char *path);
+void path_combine(char *dest, int dest_size,
+                  const char *base_path,
+                  const char *filename);
+
+#ifndef QEMU_TOOL
+
+typedef void QEMUMachineInitFunc(int ram_size, int vga_ram_size, 
+                                 int boot_device,
+             DisplayState *ds, const char **fd_filename, int snapshot,
+             const char *kernel_filename, const char *kernel_cmdline,
+             const char *initrd_filename);
+
+typedef struct QEMUMachine {
+    const char *name;
+    const char *desc;
+    QEMUMachineInitFunc *init;
+    struct QEMUMachine *next;
+} QEMUMachine;
+
+int qemu_register_machine(QEMUMachine *m);
+
+typedef void SetIRQFunc(void *opaque, int irq_num, int level);
+typedef void IRQRequestFunc(void *opaque, int level);
+
+/* ISA bus */
+
+extern target_phys_addr_t isa_mem_base;
+
+typedef void (IOPortWriteFunc)(void *opaque, uint32_t address, uint32_t data);
+typedef uint32_t (IOPortReadFunc)(void *opaque, uint32_t address);
+
+int register_ioport_read(int start, int length, int size, 
+                         IOPortReadFunc *func, void *opaque);
+int register_ioport_write(int start, int length, int size, 
+                          IOPortWriteFunc *func, void *opaque);
+void isa_unassign_ioport(int start, int length);
+
+void isa_mmio_init(target_phys_addr_t base, target_phys_addr_t size);
+
+/* PCI bus */
+
+extern target_phys_addr_t pci_mem_base;
+
+typedef struct PCIBus PCIBus;
+typedef struct PCIDevice PCIDevice;
+
+typedef void PCIConfigWriteFunc(PCIDevice *pci_dev, 
+                                uint32_t address, uint32_t data, int len);
+typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev, 
+                                   uint32_t address, int len);
+typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, 
+                                uint32_t addr, uint32_t size, int type);
+
+#define PCI_ADDRESS_SPACE_MEM		0x00
+#define PCI_ADDRESS_SPACE_IO		0x01
+#define PCI_ADDRESS_SPACE_MEM_PREFETCH	0x08
+
+typedef struct PCIIORegion {
+    uint32_t addr; /* current PCI mapping address. -1 means not mapped */
+    uint32_t size;
+    uint8_t type;
+    PCIMapIORegionFunc *map_func;
+} PCIIORegion;
+
+#define PCI_ROM_SLOT 6
+#define PCI_NUM_REGIONS 7
+
+#define PCI_DEVICES_MAX 64
+
+#define PCI_VENDOR_ID		0x00	/* 16 bits */
+#define PCI_DEVICE_ID		0x02	/* 16 bits */
+#define PCI_COMMAND		0x04	/* 16 bits */
+#define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
+#define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
+#define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
+#define PCI_MIN_GNT		0x3e	/* 8 bits */
+#define PCI_MAX_LAT		0x3f	/* 8 bits */
+
+struct PCIDevice {
+    /* PCI config space */
+    uint8_t config[256];
+
+    /* the following fields are read only */
+    PCIBus *bus;
+    int devfn;
+    char name[64];
+    PCIIORegion io_regions[PCI_NUM_REGIONS];
+    
+    /* do not access the following fields */
+    PCIConfigReadFunc *config_read;
+    PCIConfigWriteFunc *config_write;
+    /* ??? This is a PC-specific hack, and should be removed.  */
+    int irq_index;
+
+    /* Current IRQ levels.  Used internally by the generic PCI code.  */
+    int irq_state[4];
+};
+
+PCIDevice *pci_register_device(PCIBus *bus, const char *name,
+                               int instance_size, int devfn,
+                               PCIConfigReadFunc *config_read, 
+                               PCIConfigWriteFunc *config_write);
+
+void pci_register_io_region(PCIDevice *pci_dev, int region_num, 
+                            uint32_t size, int type, 
+                            PCIMapIORegionFunc *map_func);
+
+void pci_set_irq(PCIDevice *pci_dev, int irq_num, int level);
+
+uint32_t pci_default_read_config(PCIDevice *d, 
+                                 uint32_t address, int len);
+void pci_default_write_config(PCIDevice *d, 
+                              uint32_t address, uint32_t val, int len);
+void pci_device_save(PCIDevice *s, QEMUFile *f);
+int pci_device_load(PCIDevice *s, QEMUFile *f);
+
+typedef void (*pci_set_irq_fn)(void *pic, int irq_num, int level);
+typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
+PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
+                         void *pic, int devfn_min, int nirq);
+
+void pci_nic_init(PCIBus *bus, NICInfo *nd, int devfn);
+void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len);
+uint32_t pci_data_read(void *opaque, uint32_t addr, int len);
+int pci_bus_num(PCIBus *s);
+void pci_for_each_device(int bus_num, void (*fn)(PCIDevice *d));
+
+void pci_info(void);
+PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint32_t id,
+                        pci_map_irq_fn map_irq, const char *name);
+
+/* prep_pci.c */
+PCIBus *pci_prep_init(void);
+
+/* grackle_pci.c */
+PCIBus *pci_grackle_init(uint32_t base, void *pic);
+
+/* unin_pci.c */
+PCIBus *pci_pmac_init(void *pic);
+
+/* apb_pci.c */
+PCIBus *pci_apb_init(target_ulong special_base, target_ulong mem_base,
+                     void *pic);
+
+PCIBus *pci_vpb_init(void *pic, int irq, int realview);
+
+/* piix_pci.c */
+PCIBus *i440fx_init(PCIDevice **pi440fx_state);
+void i440fx_set_smm(PCIDevice *d, int val);
+int piix3_init(PCIBus *bus, int devfn);
+void i440fx_init_memory_mappings(PCIDevice *d);
+
+int piix4_init(PCIBus *bus, int devfn);
+
+/* openpic.c */
+typedef struct openpic_t openpic_t;
+void openpic_set_irq(void *opaque, int n_IRQ, int level);
+openpic_t *openpic_init (PCIBus *bus, int *pmem_index, int nb_cpus,
+                         CPUState **envp);
+
+/* heathrow_pic.c */
+typedef struct HeathrowPICS HeathrowPICS;
+void heathrow_pic_set_irq(void *opaque, int num, int level);
+HeathrowPICS *heathrow_pic_init(int *pmem_index);
+
+/* gt64xxx.c */
+PCIBus *pci_gt64120_init(void *pic);
+
+#ifdef HAS_AUDIO
+struct soundhw {
+    const char *name;
+    const char *descr;
+    int enabled;
+    int isa;
+    union {
+        int (*init_isa) (AudioState *s);
+        int (*init_pci) (PCIBus *bus, AudioState *s);
+    } init;
+};
+
+extern struct soundhw soundhw[];
+#endif
+
+/* vga.c */
+
+#define VGA_RAM_SIZE (8192 * 1024)
+
+struct DisplayState {
+    uint8_t *data;
+    int linesize;
+    int depth;
+    int bgr; /* BGR color order instead of RGB. Only valid for depth == 32 */
+    int width;
+    int height;
+    void *opaque;
+
+    void (*dpy_update)(struct DisplayState *s, int x, int y, int w, int h);
+    void (*dpy_resize)(struct DisplayState *s, int w, int h);
+    void (*dpy_refresh)(struct DisplayState *s);
+    void (*dpy_copy)(struct DisplayState *s, int src_x, int src_y, int dst_x, int dst_y, int w, int h);
+};
+
+static inline void dpy_update(DisplayState *s, int x, int y, int w, int h)
+{
+    s->dpy_update(s, x, y, w, h);
+}
+
+static inline void dpy_resize(DisplayState *s, int w, int h)
+{
+    s->dpy_resize(s, w, h);
+}
+
+int isa_vga_init(DisplayState *ds, uint8_t *vga_ram_base, 
+                 unsigned long vga_ram_offset, int vga_ram_size);
+int pci_vga_init(PCIBus *bus, DisplayState *ds, uint8_t *vga_ram_base, 
+                 unsigned long vga_ram_offset, int vga_ram_size,
+                 unsigned long vga_bios_offset, int vga_bios_size);
+
+/* cirrus_vga.c */
+void pci_cirrus_vga_init(PCIBus *bus, DisplayState *ds, uint8_t *vga_ram_base, 
+                         unsigned long vga_ram_offset, int vga_ram_size);
+void isa_cirrus_vga_init(DisplayState *ds, uint8_t *vga_ram_base, 
+                         unsigned long vga_ram_offset, int vga_ram_size);
+
+/* sdl.c */
+void sdl_display_init(DisplayState *ds, int full_screen);
+
+/* cocoa.m */
+void cocoa_display_init(DisplayState *ds, int full_screen);
+
+/* vnc.c */
+void vnc_display_init(DisplayState *ds, const char *display);
+
+/* x_keymap.c */
+extern uint8_t _translate_keycode(const int key);
+
+/* ide.c */
+#define MAX_DISKS 4
+
+extern BlockDriverState *bs_table[MAX_DISKS + 1];
+
+void isa_ide_init(int iobase, int iobase2, int irq,
+                  BlockDriverState *hd0, BlockDriverState *hd1);
+void pci_cmd646_ide_init(PCIBus *bus, BlockDriverState **hd_table,
+                         int secondary_ide_enabled);
+void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn);
+int pmac_ide_init (BlockDriverState **hd_table,
+                   SetIRQFunc *set_irq, void *irq_opaque, int irq);
+
+/* cdrom.c */
+int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track);
+int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num);
+
+/* es1370.c */
+int es1370_init (PCIBus *bus, AudioState *s);
+
+/* sb16.c */
+int SB16_init (AudioState *s);
+
+/* adlib.c */
+int Adlib_init (AudioState *s);
+
+/* gus.c */
+int GUS_init (AudioState *s);
+
+/* dma.c */
+typedef int (*DMA_transfer_handler) (void *opaque, int nchan, int pos, int size);
+int DMA_get_channel_mode (int nchan);
+int DMA_read_memory (int nchan, void *buf, int pos, int size);
+int DMA_write_memory (int nchan, void *buf, int pos, int size);
+void DMA_hold_DREQ (int nchan);
+void DMA_release_DREQ (int nchan);
+void DMA_schedule(int nchan);
+void DMA_run (void);
+void DMA_init (int high_page_enable);
+void DMA_register_channel (int nchan,
+                           DMA_transfer_handler transfer_handler,
+                           void *opaque);
+/* fdc.c */
+#define MAX_FD 2
+extern BlockDriverState *fd_table[MAX_FD];
+
+typedef struct fdctrl_t fdctrl_t;
+
+fdctrl_t *fdctrl_init (int irq_lvl, int dma_chann, int mem_mapped, 
+                       uint32_t io_base,
+                       BlockDriverState **fds);
+int fdctrl_get_drive_type(fdctrl_t *fdctrl, int drive_num);
+
+/* ne2000.c */
+
+void isa_ne2000_init(int base, int irq, NICInfo *nd);
+void pci_ne2000_init(PCIBus *bus, NICInfo *nd, int devfn);
+
+/* rtl8139.c */
+
+void pci_rtl8139_init(PCIBus *bus, NICInfo *nd, int devfn);
+
+/* pcnet.c */
+
+void pci_pcnet_init(PCIBus *bus, NICInfo *nd, int devfn);
+void pcnet_h_reset(void *opaque);
+void *lance_init(NICInfo *nd, uint32_t leaddr, void *dma_opaque);
+
+
+/* pckbd.c */
+
+void kbd_init(void);
+
+/* mc146818rtc.c */
+
+typedef struct RTCState RTCState;
+
+RTCState *rtc_init(int base, int irq);
+void rtc_set_memory(RTCState *s, int addr, int val);
+void rtc_set_date(RTCState *s, const struct tm *tm);
+
+/* serial.c */
+
+typedef struct SerialState SerialState;
+SerialState *serial_init(SetIRQFunc *set_irq, void *opaque,
+                         int base, int irq, CharDriverState *chr);
+SerialState *serial_mm_init (SetIRQFunc *set_irq, void *opaque,
+                             target_ulong base, int it_shift,
+                             int irq, CharDriverState *chr);
+
+/* parallel.c */
+
+typedef struct ParallelState ParallelState;
+ParallelState *parallel_init(int base, int irq, CharDriverState *chr);
+
+/* i8259.c */
+
+typedef struct PicState2 PicState2;
+extern PicState2 *isa_pic;
+void pic_set_irq(int irq, int level);
+void pic_set_irq_new(void *opaque, int irq, int level);
+PicState2 *pic_init(IRQRequestFunc *irq_request, void *irq_request_opaque);
+void pic_set_alt_irq_func(PicState2 *s, SetIRQFunc *alt_irq_func,
+                          void *alt_irq_opaque);
+int pic_read_irq(PicState2 *s);
+void pic_update_irq(PicState2 *s);
+uint32_t pic_intack_read(PicState2 *s);
+void pic_info(void);
+void irq_info(void);
+
+/* APIC */
+typedef struct IOAPICState IOAPICState;
+
+int apic_init(CPUState *env);
+int apic_get_interrupt(CPUState *env);
+IOAPICState *ioapic_init(void);
+void ioapic_set_irq(void *opaque, int vector, int level);
+
+/* i8254.c */
+
+#define PIT_FREQ 1193182
+
+typedef struct PITState PITState;
+
+PITState *pit_init(int base, int irq);
+void pit_set_gate(PITState *pit, int channel, int val);
+int pit_get_gate(PITState *pit, int channel);
+int pit_get_initial_count(PITState *pit, int channel);
+int pit_get_mode(PITState *pit, int channel);
+int pit_get_out(PITState *pit, int channel, int64_t current_time);
+
+/* pcspk.c */
+void pcspk_init(PITState *);
+int pcspk_audio_init(AudioState *);
+
+#include "hw/smbus.h"
+
+/* acpi.c */
+extern int acpi_enabled;
+void piix4_pm_init(PCIBus *bus, int devfn);
+void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr);
+void acpi_bios_init(void);
+
+/* smbus_eeprom.c */
+SMBusDevice *smbus_eeprom_device_init(uint8_t addr, uint8_t *buf);
+
+/* pc.c */
+extern QEMUMachine pc_machine;
+extern QEMUMachine isapc_machine;
+extern int fd_bootchk;
+
+void ioport_set_a20(int enable);
+int ioport_get_a20(void);
+
+/* ppc.c */
+extern QEMUMachine prep_machine;
+extern QEMUMachine core99_machine;
+extern QEMUMachine heathrow_machine;
+
+/* mips_r4k.c */
+extern QEMUMachine mips_machine;
+
+/* mips_malta.c */
+extern QEMUMachine mips_malta_machine;
+
+/* mips_int */
+extern void cpu_mips_irq_request(void *opaque, int irq, int level);
+
+/* mips_timer.c */
+extern void cpu_mips_clock_init(CPUState *);
+extern void cpu_mips_irqctrl_init (void);
+
+/* shix.c */
+extern QEMUMachine shix_machine;
+
+#ifdef TARGET_PPC
+ppc_tb_t *cpu_ppc_tb_init (CPUState *env, uint32_t freq);
+#endif
+void PREP_debug_write (void *opaque, uint32_t addr, uint32_t val);
+
+extern CPUWriteMemoryFunc *PPC_io_write[];
+extern CPUReadMemoryFunc *PPC_io_read[];
+void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val);
+
+/* sun4m.c */
+extern QEMUMachine sun4m_machine;
+void pic_set_irq_cpu(int irq, int level, unsigned int cpu);
+
+/* iommu.c */
+void *iommu_init(uint32_t addr);
+void sparc_iommu_memory_rw(void *opaque, target_phys_addr_t addr,
+                                 uint8_t *buf, int len, int is_write);
+static inline void sparc_iommu_memory_read(void *opaque,
+                                           target_phys_addr_t addr,
+                                           uint8_t *buf, int len)
+{
+    sparc_iommu_memory_rw(opaque, addr, buf, len, 0);
+}
+
+static inline void sparc_iommu_memory_write(void *opaque,
+                                            target_phys_addr_t addr,
+                                            uint8_t *buf, int len)
+{
+    sparc_iommu_memory_rw(opaque, addr, buf, len, 1);
+}
+
+/* tcx.c */
+void tcx_init(DisplayState *ds, uint32_t addr, uint8_t *vram_base,
+	       unsigned long vram_offset, int vram_size, int width, int height);
+
+/* slavio_intctl.c */
+void *slavio_intctl_init();
+void slavio_intctl_set_cpu(void *opaque, unsigned int cpu, CPUState *env);
+void slavio_pic_info(void *opaque);
+void slavio_irq_info(void *opaque);
+void slavio_pic_set_irq(void *opaque, int irq, int level);
+void slavio_pic_set_irq_cpu(void *opaque, int irq, int level, unsigned int cpu);
+
+/* loader.c */
+int get_image_size(const char *filename);
+int load_image(const char *filename, uint8_t *addr);
+int load_elf(const char *filename, int64_t virt_to_phys_addend, uint64_t *pentry);
+int load_aout(const char *filename, uint8_t *addr);
+
+/* slavio_timer.c */
+void slavio_timer_init(uint32_t addr, int irq, int mode, unsigned int cpu);
+
+/* slavio_serial.c */
+SerialState *slavio_serial_init(int base, int irq, CharDriverState *chr1, CharDriverState *chr2);
+void slavio_serial_ms_kbd_init(int base, int irq);
+
+/* slavio_misc.c */
+void *slavio_misc_init(uint32_t base, int irq);
+void slavio_set_power_fail(void *opaque, int power_failing);
+
+/* esp.c */
+void esp_scsi_attach(void *opaque, BlockDriverState *bd, int id);
+void *esp_init(BlockDriverState **bd, uint32_t espaddr, void *dma_opaque);
+void esp_reset(void *opaque);
+
+/* sparc32_dma.c */
+void *sparc32_dma_init(uint32_t daddr, int espirq, int leirq, void *iommu,
+                       void *intctl);
+void ledma_set_irq(void *opaque, int isr);
+void ledma_memory_read(void *opaque, target_phys_addr_t addr, 
+                       uint8_t *buf, int len, int do_bswap);
+void ledma_memory_write(void *opaque, target_phys_addr_t addr, 
+                        uint8_t *buf, int len, int do_bswap);
+void espdma_raise_irq(void *opaque);
+void espdma_clear_irq(void *opaque);
+void espdma_memory_read(void *opaque, uint8_t *buf, int len);
+void espdma_memory_write(void *opaque, uint8_t *buf, int len);
+void sparc32_dma_set_reset_data(void *opaque, void *esp_opaque,
+                                void *lance_opaque);
+
+/* cs4231.c */
+void cs_init(target_phys_addr_t base, int irq, void *intctl);
+
+/* sun4u.c */
+extern QEMUMachine sun4u_machine;
+
+/* NVRAM helpers */
+#include "hw/m48t59.h"
+
+void NVRAM_set_byte (m48t59_t *nvram, uint32_t addr, uint8_t value);
+uint8_t NVRAM_get_byte (m48t59_t *nvram, uint32_t addr);
+void NVRAM_set_word (m48t59_t *nvram, uint32_t addr, uint16_t value);
+uint16_t NVRAM_get_word (m48t59_t *nvram, uint32_t addr);
+void NVRAM_set_lword (m48t59_t *nvram, uint32_t addr, uint32_t value);
+uint32_t NVRAM_get_lword (m48t59_t *nvram, uint32_t addr);
+void NVRAM_set_string (m48t59_t *nvram, uint32_t addr,
+                       const unsigned char *str, uint32_t max);
+int NVRAM_get_string (m48t59_t *nvram, uint8_t *dst, uint16_t addr, int max);
+void NVRAM_set_crc (m48t59_t *nvram, uint32_t addr,
+                    uint32_t start, uint32_t count);
+int PPC_NVRAM_set_params (m48t59_t *nvram, uint16_t NVRAM_size,
+                          const unsigned char *arch,
+                          uint32_t RAM_size, int boot_device,
+                          uint32_t kernel_image, uint32_t kernel_size,
+                          const char *cmdline,
+                          uint32_t initrd_image, uint32_t initrd_size,
+                          uint32_t NVRAM_image,
+                          int width, int height, int depth);
+
+/* adb.c */
+
+#define MAX_ADB_DEVICES 16
+
+#define ADB_MAX_OUT_LEN 16
+
+typedef struct ADBDevice ADBDevice;
+
+/* buf = NULL means polling */
+typedef int ADBDeviceRequest(ADBDevice *d, uint8_t *buf_out,
+                              const uint8_t *buf, int len);
+typedef int ADBDeviceReset(ADBDevice *d);
+
+struct ADBDevice {
+    struct ADBBusState *bus;
+    int devaddr;
+    int handler;
+    ADBDeviceRequest *devreq;
+    ADBDeviceReset *devreset;
+    void *opaque;
+};
+
+typedef struct ADBBusState {
+    ADBDevice devices[MAX_ADB_DEVICES];
+    int nb_devices;
+    int poll_index;
+} ADBBusState;
+
+int adb_request(ADBBusState *s, uint8_t *buf_out,
+                const uint8_t *buf, int len);
+int adb_poll(ADBBusState *s, uint8_t *buf_out);
+
+ADBDevice *adb_register_device(ADBBusState *s, int devaddr, 
+                               ADBDeviceRequest *devreq, 
+                               ADBDeviceReset *devreset, 
+                               void *opaque);
+void adb_kbd_init(ADBBusState *bus);
+void adb_mouse_init(ADBBusState *bus);
+
+/* cuda.c */
+
+extern ADBBusState adb_bus;
+int cuda_init(SetIRQFunc *set_irq, void *irq_opaque, int irq);
+
+#include "hw/usb.h"
+
+/* usb ports of the VM */
+
+void qemu_register_usb_port(USBPort *port, void *opaque, int index,
+                            usb_attachfn attach);
+
+#define VM_USB_HUB_SIZE 8
+
+void do_usb_add(const char *devname);
+void do_usb_del(const char *devname);
+void usb_info(void);
+
+/* scsi-disk.c */
+enum scsi_reason {
+    SCSI_REASON_DONE, /* Command complete.  */
+    SCSI_REASON_DATA  /* Transfer complete, more data required.  */
+};
+
+typedef struct SCSIDevice SCSIDevice;
+typedef void (*scsi_completionfn)(void *opaque, int reason, uint32_t tag,
+                                  uint32_t arg);
+
+SCSIDevice *scsi_disk_init(BlockDriverState *bdrv,
+                           int tcq,
+                           scsi_completionfn completion,
+                           void *opaque);
+void scsi_disk_destroy(SCSIDevice *s);
+
+int32_t scsi_send_command(SCSIDevice *s, uint32_t tag, uint8_t *buf, int lun);
+/* SCSI data transfers are asynchrnonous.  However, unlike the block IO
+   layer the completion routine may be called directly by
+   scsi_{read,write}_data.  */
+void scsi_read_data(SCSIDevice *s, uint32_t tag);
+int scsi_write_data(SCSIDevice *s, uint32_t tag);
+void scsi_cancel_io(SCSIDevice *s, uint32_t tag);
+uint8_t *scsi_get_buf(SCSIDevice *s, uint32_t tag);
+
+/* lsi53c895a.c */
+void lsi_scsi_attach(void *opaque, BlockDriverState *bd, int id);
+void *lsi_scsi_init(PCIBus *bus, int devfn);
+
+/* integratorcp.c */
+extern QEMUMachine integratorcp926_machine;
+extern QEMUMachine integratorcp1026_machine;
+
+/* versatilepb.c */
+extern QEMUMachine versatilepb_machine;
+extern QEMUMachine versatileab_machine;
+
+/* realview.c */
+extern QEMUMachine realview_machine;
+
+/* ps2.c */
+void *ps2_kbd_init(void (*update_irq)(void *, int), void *update_arg);
+void *ps2_mouse_init(void (*update_irq)(void *, int), void *update_arg);
+void ps2_write_mouse(void *, int val);
+void ps2_write_keyboard(void *, int val);
+uint32_t ps2_read_data(void *);
+void ps2_queue(void *, int b);
+void ps2_keyboard_set_translation(void *opaque, int mode);
+
+/* smc91c111.c */
+void smc91c111_init(NICInfo *, uint32_t, void *, int);
+
+/* pl110.c */
+void *pl110_init(DisplayState *ds, uint32_t base, void *pic, int irq, int);
+
+/* pl011.c */
+void pl011_init(uint32_t base, void *pic, int irq, CharDriverState *chr);
+
+/* pl050.c */
+void pl050_init(uint32_t base, void *pic, int irq, int is_mouse);
+
+/* pl080.c */
+void *pl080_init(uint32_t base, void *pic, int irq, int nchannels);
+
+/* pl190.c */
+void *pl190_init(uint32_t base, void *parent, int irq, int fiq);
+
+/* arm-timer.c */
+void sp804_init(uint32_t base, void *pic, int irq);
+void icp_pit_init(uint32_t base, void *pic, int irq);
+
+/* arm_sysctl.c */
+void arm_sysctl_init(uint32_t base, uint32_t sys_id);
+
+/* arm_gic.c */
+void *arm_gic_init(uint32_t base, void *parent, int parent_irq);
+
+/* arm_boot.c */
+
+void arm_load_kernel(CPUState *env, int ram_size, const char *kernel_filename,
+                     const char *kernel_cmdline, const char *initrd_filename,
+                     int board_id);
+
+/* sh7750.c */
+struct SH7750State;
+
+struct SH7750State *sh7750_init(CPUState * cpu);
+
+typedef struct {
+    /* The callback will be triggered if any of the designated lines change */
+    uint16_t portamask_trigger;
+    uint16_t portbmask_trigger;
+    /* Return 0 if no action was taken */
+    int (*port_change_cb) (uint16_t porta, uint16_t portb,
+			   uint16_t * periph_pdtra,
+			   uint16_t * periph_portdira,
+			   uint16_t * periph_pdtrb,
+			   uint16_t * periph_portdirb);
+} sh7750_io_device;
+
+int sh7750_register_io_device(struct SH7750State *s,
+			      sh7750_io_device * device);
+/* tc58128.c */
+int tc58128_init(struct SH7750State *s, char *zone1, char *zone2);
+
+/* NOR flash devices */
+typedef struct pflash_t pflash_t;
+
+pflash_t *pflash_register (target_ulong base, ram_addr_t off,
+                           BlockDriverState *bs,
+                           target_ulong sector_len, int nb_blocs, int width,
+                           uint16_t id0, uint16_t id1, 
+                           uint16_t id2, uint16_t id3);
+
+#include "gdbstub.h"
+
+#endif /* defined(QEMU_TOOL) */
+
+/* monitor.c */
+void monitor_init(CharDriverState *hd, int show_banner);
+void term_puts(const char *str);
+void term_vprintf(const char *fmt, va_list ap);
+void term_printf(const char *fmt, ...) __attribute__ ((__format__ (__printf__, 1, 2)));
+void term_print_filename(const char *filename);
+void term_flush(void);
+void term_print_help(void);
+void monitor_readline(const char *prompt, int is_password,
+                      char *buf, int buf_size);
+
+/* readline.c */
+typedef void ReadLineFunc(void *opaque, const char *str);
+
+extern int completion_index;
+void add_completion(const char *str);
+void readline_handle_byte(int ch);
+void readline_find_completion(const char *cmdline);
+const char *readline_get_history(unsigned int index);
+void readline_start(const char *prompt, int is_password,
+                    ReadLineFunc *readline_func, void *opaque);
+
+void kqemu_record_dump(void);
+
+#endif /* !VBOX */
+
+#endif /* VL_H */
