Index: /trunk/src/bldprogs/VBoxDef2LazyLoad.cpp
===================================================================
--- /trunk/src/bldprogs/VBoxDef2LazyLoad.cpp	(revision 87281)
+++ /trunk/src/bldprogs/VBoxDef2LazyLoad.cpp	(revision 87282)
@@ -27,4 +27,5 @@
 #include <stdlib.h>
 #include <iprt/types.h>
+#include <iprt/ldr.h> /* For RTLDRARCH. */
 
 
@@ -46,5 +47,4 @@
 
 
-
 /*********************************************************************************************************************************
 *   Global Variables                                                                                                             *
@@ -59,4 +59,13 @@
 static bool         g_fWithExplictLoadFunction = false;
 static bool         g_fSystemLibrary = false;
+#if   defined(RT_ARCH_AMD64)
+static RTLDRARCH    g_enmTarget = RTLDRARCH_AMD64;
+#elif defined(RT_ARCH_X86)
+static RTLDRARCH    g_enmTarget = RTLDRARCH_X86;
+#elif defined(RT_ARCH_ARM64)
+static RTLDRARCH    g_enmTarget = RTLDRARCH_ARM64;
+#else
+# error "Port me!"
+#endif
 /** @} */
 
@@ -321,5 +330,6 @@
 
 /**
- * Generates the assembly source code, writing it to @a pOutput.
+ * Generates the assembly source code for AMD64 and x86, writing it
+ * to @a pOutput.
  *
  * @returns RTEXITCODE_SUCCESS or RTEXITCODE_FAILURE, in the latter case full
@@ -328,5 +338,5 @@
  *                               when closing).
  */
-static RTEXITCODE generateOutputInner(FILE *pOutput)
+static RTEXITCODE generateOutputInnerX86AndAMD64(FILE *pOutput)
 {
     fprintf(pOutput, ";;\n");
@@ -983,4 +993,460 @@
 
 /**
+ * Generates the assembly source code for ARM64, writing it
+ * to @a pOutput.
+ *
+ * @returns RTEXITCODE_SUCCESS or RTEXITCODE_FAILURE, in the latter case full
+ *          details has been displayed.
+ * @param   pOutput              The output stream (caller checks it for errors
+ *                               when closing).
+ */
+static RTEXITCODE generateOutputInnerArm64(FILE *pOutput)
+{
+//    bool fMachO  = true;
+//    bool fDarwin = true;
+    const char *pszNmPfx = "_";
+
+    fprintf(pOutput, ";;\n");
+    for (unsigned i = 0; i < g_cInputs; i++)
+        fprintf(pOutput, ";; Autogenerated from '%s'.\n", g_apszInputs[i]);
+
+    fprintf(pOutput,
+            ";; DO NOT EDIT!\n"
+            ";;\n"
+            "\n"
+            "\n"
+            /*"%%include \"iprt/asmdefs.mac\"\n"*/
+            "\n"
+            "\n");
+
+    /*
+     * Put the thunks first for alignment and other reasons. It's the hot part of the code.
+     */
+    fprintf(pOutput,
+            ";\n"
+            "; Thunks.\n"
+            ";\n"
+            ".section __TEXT,__text,regular,pure_instructions\n"
+            ".p2align 2\n");
+    for (PMYEXPORT pExp = g_pExpHead; pExp; pExp = pExp->pNext)
+        fprintf(pOutput,
+                ".globl %s%s\n"
+                "%s%s:\n"
+                "    ldr     x9, =g_pfn%s\n"
+                "    blr     x9\n",
+                pszNmPfx, pExp->szName, pszNmPfx, pExp->szName, pExp->szName);
+    fprintf(pOutput,
+            "\n"
+            "\n");
+
+    /*
+     * Import pointers
+     */
+    fprintf(pOutput,
+            ";\n"
+            "; Import pointers. Initialized to point a lazy loading stubs.\n"
+            ";\n"
+            ".section __DATA,__data\n"
+            ".p2align 2\n"
+            "g_apfnImports:\n");
+    for (PMYEXPORT pExp = g_pExpHead; pExp; pExp = pExp->pNext)
+        fprintf(pOutput,
+                ".globl __imp_%s\n"
+                "__imp_%s:\n"
+                ".globl %sg_pfn%s\n"
+                "%sg_pfn%s:\n"
+                "    .quad ___LazyLoad___%s\n"
+                "\n",
+                pExp->szName, pExp->szName,
+                pszNmPfx, pExp->szName, pszNmPfx, pExp->szName,
+                pExp->pszExportedNm);
+    fprintf(pOutput,
+            "    .quad 0 ; Terminator entry for traversal.\n"
+            "\n"
+            "\n");
+
+    /*
+     * Now for the less important stuff, starting with the names.
+     *
+     * We keep the names separate so we can traverse them in parallel to
+     * g_apfnImports in the load-everything routine further down.
+     */
+    fprintf(pOutput,
+            ";\n"
+            "; Imported names.\n"
+            ";\n"
+            ".section __TEXT,__cstring,cstring_literals\n"
+            "g_szLibrary:\n"
+            "    .asciz \"%s\"\n"
+            "\n"
+            "g_szzNames:\n",
+            g_pszLibrary);
+    for (PMYEXPORT pExp = g_pExpHead; pExp; pExp = pExp->pNext)
+        if (!pExp->fNoName)
+            fprintf(pOutput, "  g_sz%s:\n    .asciz \"%s\"\n", pExp->pszExportedNm, pExp->pszExportedNm);
+        else
+            fprintf(pOutput, "  g_sz%s:\n    .asciz \"#%u\"\n", pExp->pszExportedNm, pExp->uOrdinal);
+    fprintf(pOutput,
+            "g_EndOfNames: .byte 0\n"
+            "\n"
+            "g_szFailLoadFmt:    .asciz \"Lazy loader failed to load \\\"%%s\\\": %%Rrc\\n\"\n"
+            "g_szFailResolveFmt: .asciz \"Lazy loader failed to resolve symbol \\\"%%s\\\" in \\\"%%s\\\": %%Rrc\\n\"\n"
+            "\n"
+            "\n");
+
+    /*
+     * The per import lazy load code.
+     */
+    fprintf(pOutput,
+            ";\n"
+            "; Lazy load+resolve stubs.\n"
+            ";\n"
+            ".section __TEXT,__text,regular,pure_instructions\n"
+            ".p2align 2\n");
+    for (PMYEXPORT pExp = g_pExpHead; pExp; pExp = pExp->pNext)
+    {
+        if (!pExp->fNoName)
+            fprintf(pOutput,
+                    "___LazyLoad___%s:\n"
+                    "    ldr     x9, =g_sz%s\n"
+                    "    ldr     x10, =g_pfn%s\n"
+                    "    bl      LazyLoadResolver\n"
+                    , pExp->pszExportedNm, pExp->pszExportedNm, pExp->pszExportedNm);
+        else
+            fprintf(pOutput,
+                    "___LazyLoad___%s:\n"
+                    "    movk    w9, #%u\n"
+                    "    ldr     x10, =g_pfn%s\n"
+                    "    bl      LazyLoadResolver\n"
+                    , pExp->pszExportedNm, pExp->uOrdinal, pExp->pszExportedNm);
+        fprintf(pOutput, "    b       %s%s\n", pszNmPfx, pExp->szName);
+        fprintf(pOutput, "\n");
+    }
+    fprintf(pOutput,
+            "\n"
+            "\n"
+            "\n");
+
+    /*
+     * The code that does the loading and resolving.
+     */
+    fprintf(pOutput,
+            ";\n"
+            "; The module handle.\n"
+            ";\n"
+            ".section __DATA,__data\n"
+            "g_hMod:\n"
+            "    .quad 0\n"
+            "\n"
+            "\n"
+            "\n");
+
+    /*
+     * Common lazy loader and resolved.
+     */
+    fprintf(pOutput,
+            ";\n"
+            "; The resolver code.\n"
+            ";\n"
+            ".section __TEXT,__text,regular,pure_instructions\n"
+            ".p2align 2\n"
+            "LazyLoadResolver:\n"
+            "    .cfi_startproc\n"
+            "    ; Create frame.\n"
+            "    sub     sp, sp, #(16 + 192)\n"
+            "    stp     x29, x30, [sp, #192]\n"
+            "    add     x29, sp, #192\n"
+            "    .cfi_def_cfa x29, 16\n"
+            "    .cfi_offset x30, -8\n"
+            "    .cfi_offset x29, -16\n"
+            "    ; Save all argument registers and a handful of preserved ones.\n"
+            "    stp     x0,   x1, [sp, #(192 - 16)]\n"
+            "    .cfi_offset  x0, -32\n"
+            "    .cfi_offset  x1, -24\n"
+            "    stp     x2,   x3, [sp, #(192 - 32)]\n"
+            "    .cfi_offset  x3, -40\n"
+            "    .cfi_offset  x2, -48\n"
+            "    stp     x4,   x5, [sp, #(192 - 48)]\n"
+            "    .cfi_offset  x6, -56\n"
+            "    .cfi_offset  x5, -64\n"
+            "    stp     x6,   x7, [sp, #(192 - 64)]\n"
+            "    .cfi_offset  x7, -72\n"
+            "    .cfi_offset  x6, -80\n"
+            "    stp     x16, x17, [sp, #(192 - 80)]\n"
+            "    .cfi_offset x17, -88\n"
+            "    .cfi_offset x16, -96\n"
+            "    stp     x18, x19, [sp, #(192 - 96)]\n"
+            "    .cfi_offset x19, -104\n"
+            "    .cfi_offset x18, -112\n"
+            "    stp     x20, x21, [sp, #(192 - 112)]\n"
+            "    .cfi_offset x21, -120\n"
+            "    .cfi_offset x20, -128\n"
+            "    stp     x22, x23, [sp, #(192 - 128)]\n"
+            "    .cfi_offset x23, -136\n"
+            "    .cfi_offset x22, -144\n"
+            "    str     x8,       [sp, #(192 - 144)]\n"
+            "\n"
+            "    ; Shift the symbol name to x19 and g_pfnXXXX pointer to x20 as these are preserved registers\n"
+            "    ; (in case we need to call LazyLoadModule/RTLdrLoad)\n"
+            "    mov     x19, x9\n"
+            "    mov     x20, x10\n"
+            "\n"
+            "    ; Get the module handle and call RTLdrGetSymbol(RTLDRMOD hLdrMod, const char *pszSymbol, void **ppvValue)\n"
+            "    ldr     x0, =g_hMod\n"
+            "    ldr     x0, [x0]\n"
+            "    cmp     x0, #0\n"
+            "    b.eq    LazyLoading\n"
+            "    mov     x1, x19\n"
+            "    mov     x2, x20\n"
+            "    bl      %sRTLdrGetSymbol\n"
+            "\n"
+            "    cmp     w0, #0\n"
+            "    b.eq    Lreturn\n"
+            "\n"
+            "Lbadsym: ; Call sRTAssertMsg2Weak. Variadic (...) arguments are passed on the stack it seems.\n"
+            "    mov     x3, x0\n"
+            "    ldr     x2, =g_szLibrary\n"
+            "    mov     x1, x19\n"
+            "    ldr     x0, =g_szFailLoadFmt\n"
+            "    stp     x1, x2, [sp]\n"
+            "    str     x3,     [sp, #16]\n"
+            "    bl      %sRTAssertMsg2Weak\n"
+            "Lbadsymloop:\n"
+            "    brk     #0x1\n"
+            "    b       Lbadsymloop\n"
+
+            "Lreturn:\n"
+            "    ; Restore saved register\n"
+            "    ldr     x8,       [sp, #(192 - 144)]\n"
+            "    .cfi_restore x8\n"
+            "    ldp     x22, x23, [sp, #(192 - 128)]\n"
+            "    .cfi_restore x23\n"
+            "    .cfi_restore x22\n"
+            "    ldp     x20, x21, [sp, #(192 - 112)]\n"
+            "    .cfi_restore x21\n"
+            "    .cfi_restore x20\n"
+            "    ldp     x18, x19, [sp, #(192 - 96)]\n"
+            "    .cfi_restore x19\n"
+            "    .cfi_restore x18\n"
+            "    ldp     x16, x17, [sp, #(192 - 80)]\n"
+            "    .cfi_restore x17\n"
+            "    .cfi_restore x18\n"
+            "    ldp     x6,   x7, [sp, #(192 - 64)]\n"
+            "    .cfi_restore x7\n"
+            "    .cfi_restore x6\n"
+            "    ldp     x4,   x5, [sp, #(192 - 48)]\n"
+            "    .cfi_restore x5\n"
+            "    .cfi_restore x4\n"
+            "    ldp     x2,   x3, [sp, #(192 - 32)]\n"
+            "    .cfi_restore x3\n"
+            "    .cfi_restore x2\n"
+            "    ldp     x0,   x1, [sp, #(192 - 16)]\n"
+            "    .cfi_restore x1\n"
+            "    .cfi_restore x0\n"
+            "\n"
+            "    ldp     x29, x30, [sp, #192]\n"
+            "    .cfi_restore x29\n"
+            "    .cfi_restore x30\n"
+            "    add     sp, sp, #(16 + 192)\n"
+            "    ret\n"
+            "    .cfi_endproc\n"
+            "\n"
+            "\n"
+            , pszNmPfx, pszNmPfx);
+
+    fprintf(pOutput,
+            ";\n"
+            "; Loads the module.\n"
+            "; ASSUMES called from LazyLoadResolver where all relevant registers are already saved.\n"
+            ";\n"
+            "LazyLoading:\n"
+            "    .cfi_startproc\n"
+            "    ; Create frame.\n"
+            "    sub     sp, sp, #(16 + 48)\n"
+            "    stp     x29, x30, [sp, #48]\n"
+            "    add     x29, sp, #48\n"
+            "    .cfi_def_cfa x29, 16\n"
+            "    .cfi_offset x30, -8\n"
+            "    .cfi_offset x29, -16\n"
+            "\n");
+
+    if (!g_fSystemLibrary)
+        fprintf(pOutput,
+                "    ; Call SUPR3HardenedLdrLoadAppPriv(const char *pszFilename, PRTLDRMOD phLdrMod, uint32_t fFlags, PRTERRINFO pErrInfo);\n"
+                "    mov     x3, #0\n"
+                "    mov     x2, #0\n"
+                "    ldr     x1, =g_hMod\n"
+                "    ldr     x0, =g_szLibrary\n"
+                "    bl      %sSUPR3HardenedLdrLoadAppPriv\n"
+                , pszNmPfx);
+    else
+        fprintf(pOutput,
+                "    ; Call RTLdrLoadSystem(const char *pszFilename, bool fNoUnload, PRTLDRMOD phLdrMod);\n"
+                "    ldr     x2, =g_hMod\n"
+                "    mov     x1, #1\n"
+                "    ldr     x0, =g_szLibrary\n"
+                "    bl      %sRTLdrLoadSystem\n"
+                , pszNmPfx);
+
+    fprintf(pOutput,
+            "    cmp     w0, #0\n"
+            "    b.eq    Lload_return\n"
+            "\n"
+            "Lbadload: ; Call sRTAssertMsg2Weak. Variadic (...) arguments are passed on the stack it seems.\n"
+            "    mov     x2, x0\n"
+            "    ldr     x1, =g_szLibrary\n"
+            "    ldr     x0, =g_szFailResolveFmt\n"
+            "    stp     x1, x2, [sp]\n"
+            "    bl      %sRTAssertMsg2Weak\n"
+            "Lbadloadloop:\n"
+            "    brk     #0x1\n"
+            "    b       Lbadloadloop\n"
+            "Lload_return:\n"
+            "    ldr     x0, =g_hMod\n"
+            "    ldr     x0, [x0]\n"
+            "    ldp     x29, x30, [sp, #48]\n"
+            "    .cfi_restore x29\n"
+            "    .cfi_restore x30\n"
+            "    add     sp, sp, #(16 + 48)\n"
+            "    ret\n"
+            "    .cfi_endproc\n"
+            "\n"
+            "\n"
+            , pszNmPfx);
+
+    /*
+     * C callable method for explicitly loading the library and optionally
+     * resolving all the imports.
+     */
+    if (g_fWithExplictLoadFunction)
+    {
+        if (g_fSystemLibrary) /* Lazy bird. */
+        {
+            fprintf(stderr, "error: cannot use --system with --explicit-load-function, sorry\n");
+            return RTEXITCODE_FAILURE;
+        }
+
+        int cchLibBaseName = (int)(strchr(g_pszLibrary, '.') ? strchr(g_pszLibrary, '.') - g_pszLibrary : strlen(g_pszLibrary));
+        fprintf(pOutput,
+                ";;\n"
+                "; ExplicitlyLoad%.*s(bool fResolveAllImports, pErrInfo);\n"
+                ";\n"
+                ".section __TEXT,__text,regular,pure_instructions\n"
+                ".p2align 2\n"
+                ".globl ExplicitlyLoad%.*s\n"
+                "ExplicitlyLoad%.*s:\n"
+                "    .cfi_startproc\n"
+                "    ; Create frame.\n"
+                "    sub     sp, sp, #(16 + #96)\n"
+                "    stp     x29, x30, [sp, #96]\n"
+                "    add     x29, sp, #96\n"
+                "    .cfi_def_cfa x29, 16\n"
+                "    .cfi_offset x30, -8\n"
+                "    .cfi_offset x29, -16\n"
+                "\n"
+                "    stp     x20, x21, [sp, #(96 - 16)]\n"
+                "    .cfi_offset x21, -24\n"
+                "    .cfi_offset x20, -32\n"
+                "    stp     x22, x23, [sp, #(96 - 32)]\n"
+                "    .cfi_offset x23, -40\n"
+                "    .cfi_offset x22, -48\n"
+
+                "    ; Save the input parameters.\n"
+                "    mov     x20, x0\n"
+                "    mov     x21, x1\n"
+                "\n"
+                "    ;\n"
+                "    ; Is the module already loaded?\n"
+                "    ;\n"
+                "    ldr     x0, =g_hMod\n"
+                "    ldr     x0, [x0]\n"
+                "    cmp     x0, #0\n"
+                "    b.ne    Lexplicit_loaded_module\n"
+                "\n"
+                ,
+                cchLibBaseName, g_pszLibrary,
+                cchLibBaseName, g_pszLibrary,
+                cchLibBaseName, g_pszLibrary);
+        fprintf(pOutput,
+                "Lexplicit_load_module:\n"
+                "    ; Call SUPR3HardenedLdrLoadAppPriv(const char *pszFilename, PRTLDRMOD phLdrMod, uint32_t fFlags, PRTERRINFO pErrInfo);\n"
+                "    mov     x3, #0\n"
+                "    mov     x2, #0\n"
+                "    ldr     x1, =g_hMod\n"
+                "    ldr     x0, =g_szLibrary\n"
+                "    bl      %sSUPR3HardenedLdrLoadAppPriv\n"
+                "    cmp     x0, #0\n"
+                "    b.ne    Lexplicit_load_return\n"
+                "\n"
+                , pszNmPfx);
+
+        fprintf(pOutput,
+                "    ;\n"
+                "    ; Resolve the imports too if requested to do so.\n"
+                "    ;\n"
+                "Lexplicit_loaded_module:\n"
+                "    cmp     w20, #0\n"
+                "    b.eq    Lexplicit_load_return\n"
+                "\n"
+                "    ldr     x22, =g_szzNames\n"
+                "    ldr     x23, =g_apfnImports\n"
+                "Lexplicit_load_next_import:\n"
+                "    ldr     x0, [x23]\n"
+                "    cmp     x0, #0\n"
+                "    b.eq    Lexplicit_load_return\n"
+                "\n"
+                "    ; Get the module handle and call RTLdrGetSymbol(RTLDRMOD hLdrMod, const char *pszSymbol, void **ppvValue)\n"
+                "    ldr     x0, =g_hMod\n"
+                "    ldr     x0, [x0]\n"
+                "    mov     x1, x22\n"
+                "    mov     x2, x23\n"
+                "    bl      %sRTLdrGetSymbol\n"
+                "    cmp     x0, #0\n"
+                "    b.ne    Lexplicit_load_symbol_error\n"
+                "\n"
+                "    ; Advance.\n"
+                "    add     x23, x23, #8\n"
+                "Lexplict_load_advance_string:\n"
+                "    ldrb    w0, [x22]\n"
+                "    add     x22, x22, #1\n"
+                "    cmp     w0, #0\n"
+                "    b.ne    Lexplict_load_advance_string\n"
+                "    b       Lexplicit_load_next_import\n"
+                "\n"
+                "    ;\n"
+                "    ; Error loading a symbol. Call RTErrInfoSet(PRTERRINFO pErrInfo, int rc, const char *pszMsg) on pErrInfo (preserves x0).\n"
+                "    ;\n"
+                "Lexplicit_load_symbol_error:\n"
+                "    mov     x2, x22\n"
+                "    mov     x1, x0\n"
+                "    mov     x0, x21\n"
+                "    bl      %sRTErrInfoSet\n"
+                "    b       Lexplicit_load_return"
+                "    "
+                "\n"
+                "Lexplicit_load_return:\n"
+                "    ldp     x22,   x23, [sp, #(96 - 32)]\n"
+                "    .cfi_restore x23\n"
+                "    .cfi_restore x22\n"
+                "    ldp     x20,   x21, [sp, #(96 - 16)]\n"
+                "    .cfi_restore x21\n"
+                "    .cfi_restore x20\n"
+                "\n"
+                "    ldp     x29, x30, [sp, #96]\n"
+                "    .cfi_restore x29\n"
+                "    .cfi_restore x30\n"
+                "    add     sp, sp, #(16 + 96)\n"
+                "    ret\n"
+                "    .cfi_endproc\n"
+                "\n"
+                "\n"
+                , pszNmPfx, pszNmPfx);
+    }
+
+    return RTEXITCODE_SUCCESS;
+}
+
+
+/**
  * Generates the assembly source code, writing it to g_pszOutput.
  *
@@ -994,5 +1460,17 @@
     if (pOutput)
     {
-        rcExit = generateOutputInner(pOutput);
+        switch (g_enmTarget)
+        {
+            case RTLDRARCH_AMD64:
+            case RTLDRARCH_X86_32:
+                rcExit = generateOutputInnerX86AndAMD64(pOutput);
+                break;
+            case RTLDRARCH_ARM64:
+                rcExit = generateOutputInnerArm64(pOutput);
+                break;
+            default:
+                rcExit = RTEXITCODE_FAILURE;
+                break;
+        }
         if (fclose(pOutput))
         {
