[pkg-lua-devel] Bug#1067890: luajit: diff for NMU version 2.1.0+openresty20240815-1.1
Gianfranco Costamagna
locutusofborg at debian.org
Sat Nov 2 09:29:46 GMT 2024
Il 02/11/24 10:12, Gianfranco Costamagna ha scritto:
> Package: luajit
> Version: 2.1.0+openresty20240815-1
> Severity: normal
> Tags: patch pending
>
> Dear maintainer,
>
> I've prepared an NMU for luajit (versioned as 2.1.0+openresty20240815-1.1) and
> uploaded it to DELAYED/15. Please feel free to tell me if I
> should delay it longer.
>
> Regards.
updated diff without the extra .orig files
(and pull request submitted upstream)
G.
-------------- next part --------------
diff -Nru luajit-2.1.0+openresty20240815/debian/changelog luajit-2.1.0+openresty20240815/debian/changelog
--- luajit-2.1.0+openresty20240815/debian/changelog 2024-08-18 23:22:02.000000000 +0200
+++ luajit-2.1.0+openresty20240815/debian/changelog 2024-11-02 10:07:38.000000000 +0100
@@ -1,3 +1,17 @@
+luajit (2.1.0+openresty20240815-1.1) unstable; urgency=medium
+
+ [ Bo YU <tsu.yubo at gmail.com> ]
+ * luajit: Add support for riscv64 (Closes: #1034484)
+
+ [ Xiaolin Zhao <zhaoxiaolin at loongson.cn> ]
+ * Add support for LoongArch64 (Closes: #1067890)
+
+ [ Gianfranco Costamagna ]
+ * Non-maintainer upload
+ * Rebase loongarch64 patch on top of riscv64 one
+
+ -- Gianfranco Costamagna <locutusofborg at debian.org> Sat, 02 Nov 2024 10:07:38 +0100
+
luajit (2.1.0+openresty20240815-1) unstable; urgency=medium
* New upstream version 2.1.0+openresty20240815
diff -Nru luajit-2.1.0+openresty20240815/debian/control luajit-2.1.0+openresty20240815/debian/control
--- luajit-2.1.0+openresty20240815/debian/control 2024-08-18 23:22:02.000000000 +0200
+++ luajit-2.1.0+openresty20240815/debian/control 2024-11-02 10:07:38.000000000 +0100
@@ -11,7 +11,7 @@
Build-Depends: debhelper-compat (= 13)
Package: luajit
-Architecture: amd64 arm64 armel armhf i386 mips64el mipsel s390x powerpc
+Architecture: amd64 arm64 armel armhf i386 mips64el mipsel s390x powerpc riscv64 loong64
Multi-Arch: foreign
Pre-Depends: ${misc:Pre-Depends}
Depends: libluajit-5.1-2 (= ${binary:Version}),
@@ -39,7 +39,7 @@
by its embeddable (i.e. library) version.
Package: libluajit-5.1-2
-Architecture: amd64 arm64 armel armhf i386 mips64el mipsel s390x powerpc
+Architecture: amd64 arm64 armel armhf i386 mips64el mipsel s390x powerpc riscv64 loong64
Multi-Arch: same
Pre-Depends: ${misc:Pre-Depends}
Depends: libluajit-5.1-common (= ${source:Version}),
@@ -60,7 +60,7 @@
Section: libdevel
Multi-Arch: same
Pre-Depends: ${misc:Pre-Depends}
-Architecture: amd64 arm64 armel armhf i386 mips64el mipsel s390x powerpc
+Architecture: amd64 arm64 armel armhf i386 mips64el mipsel s390x powerpc riscv64 loong64
Depends: libluajit-5.1-2 (= ${binary:Version}),
${misc:Depends},
Conflicts: libluajit2-5.1-dev
diff -Nru luajit-2.1.0+openresty20240815/debian/patches/0003_support_riscv64.patch luajit-2.1.0+openresty20240815/debian/patches/0003_support_riscv64.patch
--- luajit-2.1.0+openresty20240815/debian/patches/0003_support_riscv64.patch 1970-01-01 01:00:00.000000000 +0100
+++ luajit-2.1.0+openresty20240815/debian/patches/0003_support_riscv64.patch 2024-11-02 09:42:56.000000000 +0100
@@ -0,0 +1,10952 @@
+Description: add support for riscv64
+ Based on https://github.com/plctlab/LuaJIT/commits/riscv64-v2.1-branch/ without 9cd0040 and fix conflict by hand
+Author: infiwang at pm.me
+Origin: https://github.com/plctlab/LuaJIT/commits/riscv64-v2.1-branch/
+Bug: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1034484
+Last-Update: 2024-06-25
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- a/Makefile
++++ b/Makefile
+@@ -100,6 +100,7 @@
+ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
+ dis_mips64.lua dis_mips64el.lua \
+ dis_mips64r6.lua dis_mips64r6el.lua \
++ dis_riscv.lua dis_riscv64.lua \
+ vmdef.lua
+
+ ifeq (,$(findstring Windows,$(OS)))
+--- /dev/null
++++ b/dynasm/dasm_riscv.h
+@@ -0,0 +1,433 @@
++/*
++** DynASM RISC-V encoding engine.
++** Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++** Released under the MIT license. See dynasm.lua for full copyright notice.
++*/
++
++#include <stddef.h>
++#include <stdarg.h>
++#include <string.h>
++#include <stdlib.h>
++
++#define DASM_ARCH "riscv"
++
++#ifndef DASM_EXTERN
++#define DASM_EXTERN(a,b,c,d) 0
++#endif
++
++/* Action definitions. */
++enum {
++ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
++ /* The following actions need a buffer position. */
++ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
++ /* The following actions also have an argument. */
++ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
++ DASM__MAX
++};
++
++/* Maximum number of section buffer positions for a single dasm_put() call. */
++#define DASM_MAXSECPOS 25
++
++/* DynASM encoder status codes. Action list offset or number are or'ed in. */
++#define DASM_S_OK 0x00000000
++#define DASM_S_NOMEM 0x01000000
++#define DASM_S_PHASE 0x02000000
++#define DASM_S_MATCH_SEC 0x03000000
++#define DASM_S_RANGE_I 0x11000000
++#define DASM_S_RANGE_SEC 0x12000000
++#define DASM_S_RANGE_LG 0x13000000
++#define DASM_S_RANGE_PC 0x14000000
++#define DASM_S_RANGE_REL 0x15000000
++#define DASM_S_UNDEF_LG 0x21000000
++#define DASM_S_UNDEF_PC 0x22000000
++
++/* Macros to convert positions (8 bit section + 24 bit index). */
++#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
++#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
++#define DASM_SEC2POS(sec) ((sec)<<24)
++#define DASM_POS2SEC(pos) ((pos)>>24)
++#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
++
++/* Action list type. */
++typedef const unsigned int *dasm_ActList;
++
++/* Per-section structure. */
++typedef struct dasm_Section {
++ int *rbuf; /* Biased buffer pointer (negative section bias). */
++ int *buf; /* True buffer pointer. */
++ size_t bsize; /* Buffer size in bytes. */
++ int pos; /* Biased buffer position. */
++ int epos; /* End of biased buffer position - max single put. */
++ int ofs; /* Byte offset into section. */
++} dasm_Section;
++
++/* Core structure holding the DynASM encoding state. */
++struct dasm_State {
++ size_t psize; /* Allocated size of this structure. */
++ dasm_ActList actionlist; /* Current actionlist pointer. */
++ int *lglabels; /* Local/global chain/pos ptrs. */
++ size_t lgsize;
++ int *pclabels; /* PC label chains/pos ptrs. */
++ size_t pcsize;
++ void **globals; /* Array of globals. */
++ dasm_Section *section; /* Pointer to active section. */
++ size_t codesize; /* Total size of all code sections. */
++ int maxsection; /* 0 <= sectionidx < maxsection. */
++ int status; /* Status code. */
++ dasm_Section sections[1]; /* All sections. Alloc-extended. */
++};
++
++/* The size of the core structure depends on the max. number of sections. */
++#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
++
++
++/* Initialize DynASM state. */
++void dasm_init(Dst_DECL, int maxsection)
++{
++ dasm_State *D;
++ size_t psz = 0;
++ Dst_REF = NULL;
++ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
++ D = Dst_REF;
++ D->psize = psz;
++ D->lglabels = NULL;
++ D->lgsize = 0;
++ D->pclabels = NULL;
++ D->pcsize = 0;
++ D->globals = NULL;
++ D->maxsection = maxsection;
++ memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
++}
++
++/* Free DynASM state. */
++void dasm_free(Dst_DECL)
++{
++ dasm_State *D = Dst_REF;
++ int i;
++ for (i = 0; i < D->maxsection; i++)
++ if (D->sections[i].buf)
++ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
++ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
++ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
++ DASM_M_FREE(Dst, D, D->psize);
++}
++
++/* Setup global label array. Must be called before dasm_setup(). */
++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
++{
++ dasm_State *D = Dst_REF;
++ D->globals = gl;
++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
++}
++
++/* Grow PC label array. Can be called after dasm_setup(), too. */
++void dasm_growpc(Dst_DECL, unsigned int maxpc)
++{
++ dasm_State *D = Dst_REF;
++ size_t osz = D->pcsize;
++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
++ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
++}
++
++/* Setup encoder. */
++void dasm_setup(Dst_DECL, const void *actionlist)
++{
++ dasm_State *D = Dst_REF;
++ int i;
++ D->actionlist = (dasm_ActList)actionlist;
++ D->status = DASM_S_OK;
++ D->section = &D->sections[0];
++ memset((void *)D->lglabels, 0, D->lgsize);
++ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
++ for (i = 0; i < D->maxsection; i++) {
++ D->sections[i].pos = DASM_SEC2POS(i);
++ D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
++ D->sections[i].ofs = 0;
++ }
++}
++
++
++#ifdef DASM_CHECKS
++#define CK(x, st) \
++ do { if (!(x)) { \
++ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
++#define CKPL(kind, st) \
++ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
++ D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
++#else
++#define CK(x, st) ((void)0)
++#define CKPL(kind, st) ((void)0)
++#endif
++
++static int dasm_imms(int n)
++{
++ return (n >= -2048 && n < 2048) ? n : 4096;
++}
++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
++void dasm_put(Dst_DECL, int start, ...)
++{
++ va_list ap;
++ dasm_State *D = Dst_REF;
++ dasm_ActList p = D->actionlist + start;
++ dasm_Section *sec = D->section;
++ int pos = sec->pos, ofs = sec->ofs;
++ int *b;
++
++ if (pos >= sec->epos) {
++ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
++ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
++ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
++ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
++ }
++
++ b = sec->rbuf;
++ b[pos++] = start;
++
++ va_start(ap, start);
++ while (1) {
++ unsigned int ins = *p++;
++ unsigned int action = (ins >> 20);
++ if (action >= DASM__MAX || (ins & 0xf)) {
++ ofs += 4;
++ } else {
++ ins >>= 4;
++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
++ switch (action) {
++ case DASM_STOP: goto stop;
++ case DASM_SECTION:
++ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
++ D->section = &D->sections[n]; goto stop;
++ case DASM_ESC: p++; ofs += 4; break;
++ case DASM_REL_EXT: break;
++ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
++ case DASM_REL_LG:
++ n = (ins & 2047) - 10; pl = D->lglabels + n;
++ /* Bkwd rel or global. */
++ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
++ pl += 10; n = *pl;
++ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
++ goto linkrel;
++ case DASM_REL_PC:
++ pl = D->pclabels + n; CKPL(pc, PC);
++ putrel:
++ n = *pl;
++ if (n < 0) { /* Label exists. Get label pos and store it. */
++ b[pos] = -n;
++ } else {
++ linkrel:
++ b[pos] = n; /* Else link to rel chain, anchored at label. */
++ *pl = pos;
++ }
++ pos++;
++ break;
++ case DASM_LABEL_LG:
++ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
++ case DASM_LABEL_PC:
++ pl = D->pclabels + n; CKPL(pc, PC);
++ putlabel:
++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
++ }
++ *pl = -pos; /* Label exists now. */
++ b[pos++] = ofs; /* Store pass1 offset estimate. */
++ break;
++ case DASM_IMM:
++#ifdef DASM_CHECKS
++ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
++#endif
++ n >>= ((ins>>10)&31);
++#ifdef DASM_CHECKS
++ if (ins & 0x8000)
++ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
++ else
++ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
++#endif
++ b[pos++] = n;
++ break;
++ case DASM_IMMS:
++#ifdef DASM_CHECKS
++ CK(dasm_imms(n) != 4096, RANGE_I);
++#endif
++ b[pos++] = n;
++ break;
++ }
++ }
++ }
++stop:
++ va_end(ap);
++ sec->pos = pos;
++ sec->ofs = ofs;
++}
++#undef CK
++
++/* Pass 2: Link sections, shrink aligns, fix label offsets. */
++int dasm_link(Dst_DECL, size_t *szp)
++{
++ dasm_State *D = Dst_REF;
++ int secnum;
++ int ofs = 0;
++
++#ifdef DASM_CHECKS
++ *szp = 0;
++ if (D->status != DASM_S_OK) return D->status;
++ {
++ int pc;
++ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
++ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
++ }
++#endif
++
++ { /* Handle globals not defined in this translation unit. */
++ int idx;
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
++ int n = D->lglabels[idx];
++ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
++ }
++ }
++
++ /* Combine all code sections. No support for data sections (yet). */
++ for (secnum = 0; secnum < D->maxsection; secnum++) {
++ dasm_Section *sec = D->sections + secnum;
++ int *b = sec->rbuf;
++ int pos = DASM_SEC2POS(secnum);
++ int lastpos = sec->pos;
++
++ while (pos != lastpos) {
++ dasm_ActList p = D->actionlist + b[pos++];
++ while (1) {
++ unsigned int ins = *p++;
++ unsigned int action = (ins >> 20);
++ if (ins & 0xf) continue; else ins >>= 4;
++ switch (action) {
++ case DASM_STOP: case DASM_SECTION: goto stop;
++ case DASM_ESC: p++; break;
++ case DASM_REL_EXT: break;
++ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
++ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
++ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
++ case DASM_IMM: case DASM_IMMS: pos++; break;
++ }
++ }
++ stop: (void)0;
++ }
++ ofs += sec->ofs; /* Next section starts right after current section. */
++ }
++
++ D->codesize = ofs; /* Total size of all code sections */
++ *szp = ofs;
++ return DASM_S_OK;
++}
++
++#ifdef DASM_CHECKS
++#define CK(x, st) \
++ do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
++#else
++#define CK(x, st) ((void)0)
++#endif
++
++/* Pass 3: Encode sections. */
++int dasm_encode(Dst_DECL, void *buffer)
++{
++ dasm_State *D = Dst_REF;
++ char *base = (char *)buffer;
++ unsigned int *cp = (unsigned int *)buffer;
++ int secnum;
++
++ /* Encode all code sections. No support for data sections (yet). */
++ for (secnum = 0; secnum < D->maxsection; secnum++) {
++ dasm_Section *sec = D->sections + secnum;
++ int *b = sec->buf;
++ int *endb = sec->rbuf + sec->pos;
++
++ while (b != endb) {
++ dasm_ActList p = D->actionlist + *b++;
++ while (1) {
++ unsigned int ins = *p++;
++ if (ins & 0xf) { *cp++ = ins; continue; }
++ unsigned int action = (ins >> 20);
++ unsigned int val = (ins >> 4);
++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
++ switch (action) {
++ case DASM_STOP: case DASM_SECTION: goto stop;
++ case DASM_ESC: *cp++ = *p++; break;
++ case DASM_REL_EXT:
++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (val & 2047), 1);
++ goto patchrel;
++ case DASM_ALIGN:
++ val &= 255; while ((((char *)cp - base) & val)) *cp++ = 0x60000000;
++ break;
++ case DASM_REL_LG:
++ if (n < 0) {
++ n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4);
++ goto patchrel;
++ }
++ /* fallthrough */
++ case DASM_REL_PC:
++ CK(n >= 0, UNDEF_PC);
++ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
++ patchrel:
++ if (val & 2048) { /* B */
++ CK((n & 1) == 0 && ((n + 0x1000) >> 13) == 0, RANGE_REL);
++ cp[-1] |= ((n << 19) & 0x80000000) | ((n << 20) & 0x7e000000)
++ | ((n << 7) & 0x00000f00) | ((n >> 4) & 0x00000080);
++ } else { /* J */
++ CK((n & 1) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
++ cp[-1] |= ((n << 11) & 0x80000000) | ((n << 20) & 0x7fe00000)
++ | ((n << 9) & 0x00100000) | (n & 0x000ff000);
++ }
++ break;
++ case DASM_LABEL_LG:
++ val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n);
++ break;
++ case DASM_LABEL_PC: break;
++ case DASM_IMM:
++ cp[-1] |= (n & ((1<<((val>>5)&31))-1)) << (val&31);
++ break;
++ case DASM_IMMS:
++ cp[-1] |= (((n << 20) & 0xfe000000) | ((n << 7) & 0x00000f80));
++ break;
++ default: *cp++ = ins; break;
++ }
++ }
++ stop: (void)0;
++ }
++ }
++
++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
++ return DASM_S_PHASE;
++ return DASM_S_OK;
++}
++#undef CK
++
++/* Get PC label offset. */
++int dasm_getpclabel(Dst_DECL, unsigned int pc)
++{
++ dasm_State *D = Dst_REF;
++ if (pc*sizeof(int) < D->pcsize) {
++ int pos = D->pclabels[pc];
++ if (pos < 0) return *DASM_POS2PTR(D, -pos);
++ if (pos > 0) return -1; /* Undefined. */
++ }
++ return -2; /* Unused or out of range. */
++}
++
++#ifdef DASM_CHECKS
++/* Optional sanity checker to call between isolated encoding steps. */
++int dasm_checkstep(Dst_DECL, int secmatch)
++{
++ dasm_State *D = Dst_REF;
++ if (D->status == DASM_S_OK) {
++ int i;
++ for (i = 1; i <= 9; i++) {
++ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
++ D->lglabels[i] = 0;
++ }
++ }
++ if (D->status == DASM_S_OK && secmatch >= 0 &&
++ D->section != &D->sections[secmatch])
++ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
++ return D->status;
++}
++#endif
++
+--- /dev/null
++++ b/dynasm/dasm_riscv.lua
+@@ -0,0 +1,981 @@
++------------------------------------------------------------------------------
++-- DynASM RISC-V module.
++--
++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++-- See dynasm.lua for full copyright notice.
++------------------------------------------------------------------------------
++
++local riscv32 = riscv32
++local riscv64 = riscv64
++
++-- Module information:
++local _info = {
++ arch = riscv32 and "riscv32" or riscv64 and "riscv64",
++ description = "DynASM RISC-V module",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2022-07-12",
++ author = "Mike Pall",
++ license = "MIT",
++}
++
++-- Exported glue functions for the arch-specific module.
++local _M = { _info = _info }
++
++-- Cache library functions.
++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
++local assert, setmetatable = assert, setmetatable
++local _s = string
++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
++local match, gmatch = _s.match, _s.gmatch
++local concat, sort = table.concat, table.sort
++local bit = bit or require("bit")
++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
++local tohex = bit.tohex
++
++local function __orderedIndexGen(t)
++ local orderedIndex = {}
++ for key in pairs(t) do
++ table.insert(orderedIndex, key)
++ end
++ table.sort( orderedIndex )
++ return orderedIndex
++end
++
++local function __orderedNext(t, state)
++ local key = nil
++ if state == nil then
++ t.__orderedIndex = __orderedIndexGen(t)
++ key = t.__orderedIndex[1]
++ else
++ local j = 0
++ for _,_ in pairs(t.__orderedIndex) do j = j + 1 end
++ for i = 1, j do
++ if t.__orderedIndex[i] == state then
++ key = t.__orderedIndex[i+1]
++ end
++ end
++ end
++
++ if key then
++ return key, t[key]
++ end
++
++ t.__orderedIndex = nil
++ return
++end
++
++local function opairs(t)
++ return __orderedNext, t, nil
++end
++
++-- Inherited tables and callbacks.
++local g_opt, g_arch
++local wline, werror, wfatal, wwarn
++
++-- Action name list.
++-- CHECK: Keep this in sync with the C code!
++local action_names = {
++ "STOP", "SECTION", "ESC", "REL_EXT",
++ "ALIGN", "REL_LG", "LABEL_LG",
++ "REL_PC", "LABEL_PC", "IMM", "IMMS",
++}
++
++-- Maximum number of section buffer positions for dasm_put().
++-- CHECK: Keep this in sync with the C code!
++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
++
++-- Action name -> action number.
++local map_action = {}
++for n,name in ipairs(action_names) do
++ map_action[name] = n-1
++end
++
++-- Action list buffer.
++local actlist = {}
++
++-- Argument list for next dasm_put(). Start with offset 0 into action list.
++local actargs = { 0 }
++
++-- Current number of section buffer positions for dasm_put().
++local secpos = 1
++
++------------------------------------------------------------------------------
++
++-- Dump action names and numbers.
++local function dumpactions(out)
++ out:write("DynASM encoding engine action codes:\n")
++ for n,name in ipairs(action_names) do
++ local num = map_action[name]
++ out:write(format(" %-10s %02X %d\n", name, num, num))
++ end
++ out:write("\n")
++end
++
++-- Write action list buffer as a huge static C array.
++local function writeactions(out, name)
++ local nn = #actlist
++ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
++ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
++ for i = 1,nn-1 do
++ assert(out:write("0x", tohex(actlist[i]), ",\n"))
++ end
++ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
++end
++
++------------------------------------------------------------------------------
++
++-- Add word to action list.
++local function wputxw(n)
++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++ actlist[#actlist+1] = n
++end
++
++-- Add action to list with optional arg. Advance buffer pos, too.
++local function waction(action, val, a, num)
++ local w = assert(map_action[action], "bad action name `"..action.."'")
++ wputxw(w * 0x100000 + (val or 0) * 16)
++ if a then actargs[#actargs+1] = a end
++ if a or num then secpos = secpos + (num or 1) end
++end
++
++-- Flush action list (intervening C code or buffer pos overflow).
++local function wflush(term)
++ if #actlist == actargs[1] then return end -- Nothing to flush.
++ if not term then waction("STOP") end -- Terminate action list.
++ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
++ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
++ secpos = 1 -- The actionlist offset occupies a buffer position, too.
++end
++
++-- Put escaped word.
++local function wputw(n)
++ if band(n, 0xf) == 0 then waction("ESC") end
++ wputxw(n)
++end
++
++-- Reserve position for word.
++local function wpos()
++ local pos = #actlist+1
++ actlist[pos] = ""
++ return pos
++end
++
++-- Store word to reserved position.
++local function wputpos(pos, n)
++ assert(n >= -0x80000000 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++ actlist[pos] = n
++end
++
++------------------------------------------------------------------------------
++
++-- Global label name -> global label number. With auto assignment on 1st use.
++local next_global = 20
++local map_global = setmetatable({}, { __index = function(t, name)
++ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
++ local n = next_global
++ if n > 2047 then werror("too many global labels") end
++ next_global = n + 1
++ t[name] = n
++ return n
++end})
++
++-- Dump global labels.
++local function dumpglobals(out, lvl)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("Global labels:\n")
++ for i=20,next_global-1 do
++ out:write(format(" %s\n", t[i]))
++ end
++ out:write("\n")
++end
++
++-- Write global label enum.
++local function writeglobals(out, prefix)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("enum {\n")
++ for i=20,next_global-1 do
++ out:write(" ", prefix, t[i], ",\n")
++ end
++ out:write(" ", prefix, "_MAX\n};\n")
++end
++
++-- Write global label names.
++local function writeglobalnames(out, name)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("static const char *const ", name, "[] = {\n")
++ for i=20,next_global-1 do
++ out:write(" \"", t[i], "\",\n")
++ end
++ out:write(" (const char *)0\n};\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Extern label name -> extern label number. With auto assignment on 1st use.
++local next_extern = 0
++local map_extern_ = {}
++local map_extern = setmetatable({}, { __index = function(t, name)
++ -- No restrictions on the name for now.
++ local n = next_extern
++ if n > 2047 then werror("too many extern labels") end
++ next_extern = n + 1
++ t[name] = n
++ map_extern_[n] = name
++ return n
++end})
++
++-- Dump extern labels.
++local function dumpexterns(out, lvl)
++ out:write("Extern labels:\n")
++ for i=0,next_extern-1 do
++ out:write(format(" %s\n", map_extern_[i]))
++ end
++ out:write("\n")
++end
++
++-- Write extern label names.
++local function writeexternnames(out, name)
++ out:write("static const char *const ", name, "[] = {\n")
++ for i=0,next_extern-1 do
++ out:write(" \"", map_extern_[i], "\",\n")
++ end
++ out:write(" (const char *)0\n};\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Arch-specific maps.
++local map_archdef = {
++ ra = "x1", sp = "x2",
++} -- Ext. register name -> int. name.
++
++local map_type = {} -- Type name -> { ctype, reg }
++local ctypenum = 0 -- Type number (for Dt... macros).
++
++-- Reverse defines for registers.
++function _M.revdef(s)
++ if s == "x1" then return "ra"
++ elseif s == "x2" then return "sp" end
++ return s
++end
++
++------------------------------------------------------------------------------
++
++-- Template strings for RISC-V instructions.
++local map_op = {}
++
++local map_op_rv32imafd = {
++
++ -- DASM pseudo-instrs
++ empty_0 = "ffffffff",
++ call_1 = "7fffffffJ",
++
++ -- RV32I
++ lui_2 = "00000037DU",
++ auipc_2 = "00000017DA",
++
++ jal_2 = "0000006fDJ",
++ jalr_3 = "00000067DRJ",
++ -- pseudo-instrs
++ j_1 = "0000006fJ",
++ jal_1 = "000000efJ",
++ jr_1 = "00000067R",
++ jalr_1 = "000000e7R",
++ jalr_2 = "000000e7RJ",
++
++ beq_3 = "00000063RrB",
++ bne_3 = "00001063RrB",
++ blt_3 = "00004063RrB",
++ bge_3 = "00005063RrB",
++ bltu_3 = "00006063RrB",
++ bgeu_3 = "00007063RrB",
++ -- pseudo-instrs
++ bnez_2 = "00001063RB",
++ beqz_2 = "00000063RB",
++ blez_2 = "00005063rB",
++ bgez_2 = "00005063RB",
++ bltz_2 = "00004063RB",
++ bgtz_2 = "00004063rB",
++ bgt_3 = "00004063rRB",
++ ble_3 = "00005063rRB",
++ bgtu_3 = "00006063rRB",
++ bleu_3 = "00007063rRB",
++
++ lb_2 = "00000003DL",
++ lh_2 = "00001003DL",
++ lw_2 = "00002003DL",
++ lbu_2 = "00004003DL",
++ lhu_2 = "00005003DL",
++
++ sb_2 = "00000023rS",
++ sh_2 = "00001023rS",
++ sw_2 = "00002023rS",
++
++ addi_3 = "00000013DRI",
++ slti_3 = "00002013DRI",
++ sltiu_3 = "00003013DRI",
++ xori_3 = "00004013DRI",
++ ori_3 = "00006013DRI",
++ andi_3 = "00007013DRI",
++ slli_3 = "00001013DRi",
++ srli_3 = "00005013DRi",
++ srai_3 = "40005013DRi",
++ -- pseudo-instrs
++ seqz_2 = "00103013DR",
++ ["zext.b_2"] = "0ff07013DR",
++
++ add_3 = "00000033DRr",
++ sub_3 = "40000033DRr",
++ sll_3 = "00001033DRr",
++ slt_3 = "00002033DRr",
++ sltu_3 = "00003033DRr",
++ xor_3 = "00004033DRr",
++ srl_3 = "00005033DRr",
++ sra_3 = "40005033DRr",
++ or_3 = "00006033DRr",
++ and_3 = "00007033DRr",
++ -- pseudo-instrs
++ snez_2 = "00003033Dr",
++ sltz_2 = "00002033DR",
++ sgtz_2 = "00002033Dr",
++
++ ecall_0 = "00000073",
++ ebreak_0 = "00100073",
++
++ nop_0 = "00000013",
++ li_2 = "00000013DI",
++ mv_2 = "00000013DR",
++ not_2 = "fff04013DR",
++ neg_2 = "40000033Dr",
++ ret_0 = "00008067",
++
++ -- RV32M
++ mul_3 = "02000033DRr",
++ mulh_3 = "02001033DRr",
++ mulhsu_3 = "02002033DRr",
++ mulhu_3 = "02003033DRr",
++ div_3 = "02004033DRr",
++ divu_3 = "02005033DRr",
++ rem_3 = "02006033DRr",
++ remu_3 = "02007033DRr",
++
++ -- RV32A
++ ["lr.w_2"] = "c0000053FR",
++ ["sc.w_2"] = "c0001053FRr",
++ ["amoswap.w_3"] = "c0002053FRr",
++ ["amoadd.w_3"] = "c0003053FRr",
++ ["amoxor.w_3"] = "c0004053FRr",
++ ["amoor.w_3"] = "c0005053FRr",
++ ["amoand.w_3"] = "c0006053FRr",
++ ["amomin.w_3"] = "c0007053FRr",
++ ["amomax.w_3"] = "c0008053FRr",
++ ["amominu.w_3"] = "c0009053FRr",
++ ["amomaxu.w_3"] = "c000a053FRr",
++
++ -- RV32F
++ ["flw_2"] = "00002007FL",
++ ["fsw_2"] = "00002027gS",
++
++ ["fmadd.s_4"] = "00000043FGgH",
++ ["fmsub.s_4"] = "00000047FGgH",
++ ["fnmsub.s_4"] = "0000004bFGgH",
++ ["fnmadd.s_4"] = "0000004fFGgH",
++ ["fmadd.s_5"] = "00000043FGgHM",
++ ["fmsub.s_5"] = "00000047FGgHM",
++ ["fnmsub.s_5"] = "0000004bFGgHM",
++ ["fnmadd.s_5"] = "0000004fFGgHM",
++
++ ["fadd.s_3"] = "00000053FGg",
++ ["fsub.s_3"] = "08000053FGg",
++ ["fmul.s_3"] = "10000053FGg",
++ ["fdiv.s_3"] = "18000053FGg",
++ ["fsqrt.s_2"] = "58000053FG",
++ ["fadd.s_4"] = "00000053FGgM",
++ ["fsub.s_4"] = "08000053FGgM",
++ ["fmul.s_4"] = "10000053FGgM",
++ ["fdiv.s_4"] = "18000053FGgM",
++ ["fsqrt.s_3"] = "58000053FGM",
++
++ ["fsgnj.s_3"] = "20000053FGg",
++ ["fsgnjn.s_3"] = "20001053FGg",
++ ["fsgnjx.s_3"] = "20002053FGg",
++
++ ["fmin.s_3"] = "28000053FGg",
++ ["fmax.s_3"] = "28001053FGg",
++
++ ["fcvt.w.s_2"] = "c0000053DG",
++ ["fcvt.wu.s_2"] = "c0100053DG",
++ ["fcvt.w.s_3"] = "c0000053DGM",
++ ["fcvt.wu.s_3"] = "c0100053DGM",
++ ["fmv.x.w_2"] = "e0000053DG",
++
++ ["feq.s_3"] = "a0002053DGg",
++ ["flt.s_3"] = "a0001053DGg",
++ ["fle.s_3"] = "a0000053DGg",
++
++ ["fclass.s_2"] = "e0001053DG",
++
++ ["fcvt.s.w_2"] = "d0000053FR",
++ ["fcvt.s.wu_2"] = "d0100053FR",
++ ["fcvt.s.w_3"] = "d0000053FRM",
++ ["fcvt.s.wu_3"] = "d0100053FRM",
++ ["fmv.w.x_2"] = "f0000053FR",
++
++ -- RV32D
++ ["fld_2"] = "00003007FL",
++ ["fsd_2"] = "00003027gS",
++
++ ["fmadd.d_4"] = "02000043FGgH",
++ ["fmsub.d_4"] = "02000047FGgH",
++ ["fnmsub.d_4"] = "0200004bFGgH",
++ ["fnmadd.d_4"] = "0200004fFGgH",
++ ["fmadd.d_5"] = "02000043FGgHM",
++ ["fmsub.d_5"] = "02000047FGgHM",
++ ["fnmsub.d_5"] = "0200004bFGgHM",
++ ["fnmadd.d_5"] = "0200004fFGgHM",
++
++ ["fadd.d_3"] = "02000053FGg",
++ ["fsub.d_3"] = "0a000053FGg",
++ ["fmul.d_3"] = "12000053FGg",
++ ["fdiv.d_3"] = "1a000053FGg",
++ ["fsqrt.d_2"] = "5a000053FG",
++ ["fadd.d_4"] = "02000053FGgM",
++ ["fsub.d_4"] = "0a000053FGgM",
++ ["fmul.d_4"] = "12000053FGgM",
++ ["fdiv.d_4"] = "1a000053FGgM",
++ ["fsqrt.d_3"] = "5a000053FGM",
++
++ ["fsgnj.d_3"] = "22000053FGg",
++ ["fsgnjn.d_3"] = "22001053FGg",
++ ["fsgnjx.d_3"] = "22002053FGg",
++ ["fmin.d_3"] = "2a000053FGg",
++ ["fmax.d_3"] = "2a001053FGg",
++ ["fcvt.s.d_2"] = "40100053FG",
++ ["fcvt.d.s_2"] = "42000053FG",
++ ["feq.d_3"] = "a2002053DGg",
++ ["flt.d_3"] = "a2001053DGg",
++ ["fle.d_3"] = "a2000053DGg",
++ ["fclass.d_2"] = "e2001053DG",
++ ["fcvt.w.d_2"] = "c2000053DG",
++ ["fcvt.wu.d_2"] = "c2100053DG",
++ ["fcvt.d.w_2"] = "d2000053FR",
++ ["fcvt.d.wu_2"] = "d2100053FR",
++ ["fcvt.w.d_3"] = "c2000053DGM",
++ ["fcvt.wu.d_3"] = "c2100053DGM",
++ ["fcvt.d.w_3"] = "d2000053FRM",
++ ["fcvt.d.wu_3"] = "d2100053FRM",
++
++ ["fmv.d_2"] = "22000053FY",
++ ["fneg.d_2"] = "22001053FY",
++ ["fabs.d_2"] = "22002053FY",
++
++}
++
++local map_op_rv64imafd = {
++
++ -- RV64I
++ lwu_2 = "00006003DL",
++ ld_2 = "00003003DL",
++
++ sd_2 = "00003023rS",
++
++ slli_3 = "00001013DRj",
++ srli_3 = "00005013DRj",
++ srai_3 = "40005013DRj",
++
++ addiw_3 = "0000001bDRI",
++ slliw_3 = "0000101bDRi",
++ srliw_3 = "0000501bDRi",
++ sraiw_3 = "4000501bDRi",
++
++ addw_3 = "0000003bDRr",
++ subw_3 = "4000003bDRr",
++ sllw_3 = "0000103bDRr",
++ srlw_3 = "0000503bDRr",
++ sraw_3 = "4000503bDRr",
++
++ negw_2 = "4000003bDr",
++ ["sext.w_2"] = "0000001bDR",
++
++ -- RV64M
++ mulw_3 = "0200003bDRr",
++ divw_3 = "0200403bDRr",
++ divuw_3 = "0200503bDRr",
++ remw_3 = "0200603bDRr",
++ remuw_3 = "0200703bDRr",
++
++ -- RV64A
++ ["lr.d_2"] = "c2000053FR",
++ ["sc.d_2"] = "c2001053FRr",
++ ["amoswap.d_3"] = "c2002053FRr",
++ ["amoadd.d_3"] = "c2003053FRr",
++ ["amoxor.d_3"] = "c2004053FRr",
++ ["amoor.d_3"] = "c2005053FRr",
++ ["amoand.d_3"] = "c2006053FRr",
++ ["amomin.d_3"] = "c2007053FRr",
++ ["amomax.d_3"] = "c2008053FRr",
++ ["amominu.d_3"] = "c2009053FRr",
++ ["amomaxu.d_3"] = "c200a053FRr",
++
++ -- RV64F
++ ["fcvt.l.s_2"] = "c0200053DG",
++ ["fcvt.lu.s_2"] = "c0300053DG",
++ ["fcvt.l.s_3"] = "c0200053DGM",
++ ["fcvt.lu.s_3"] = "c0300053DGM",
++ ["fcvt.s.l_2"] = "d0200053FR",
++ ["fcvt.s.lu_2"] = "d0300053FR",
++ ["fcvt.s.l_3"] = "d0200053FRM",
++ ["fcvt.s.lu_3"] = "d0300053FRM",
++
++ -- RV64D
++ ["fcvt.l.d_2"] = "c2200053DG",
++ ["fcvt.lu.d_2"] = "c2300053DG",
++ ["fcvt.l.d_3"] = "c2200053DGM",
++ ["fcvt.lu.d_3"] = "c2300053DGM",
++ ["fmv.x.d_2"] = "e2000053DG",
++ ["fcvt.d.l_2"] = "d2200053FR",
++ ["fcvt.d.lu_2"] = "d2300053FR",
++ ["fcvt.d.l_3"] = "d2200053FRM",
++ ["fcvt.d.lu_3"] = "d2300053FRM",
++ ["fmv.d.x_2"] = "f2000053FR",
++
++}
++
++local map_op_zicsr = {
++ csrrw_3 = "00001073DCR",
++ csrrs_3 = "00002073DCR",
++ csrrc_3 = "00003073DCR",
++ csrrwi_3 = "00005073DCu",
++ csrrsi_3 = "00006073DCu",
++ csrrci_3 = "00007073DCu",
++
++ -- pseudo-ops
++ csrrw_2 = "00001073DC",
++ csrrs_2 = "00002073CR",
++ csrrc_2 = "00003073CR",
++ csrrwi_2 = "00005073Cu",
++ csrrsi_2 = "00006073Cu",
++ csrrci_2 = "00007073Cu",
++
++ rdinstret_1 = "C0202073D",
++ rdcycle_1 = "C0002073D",
++ rdtime_1 = "C0102073D",
++ rdinstreth_1 = "C8202073D",
++ rdcycleh_1 = "C8002073D",
++ rdtimeh_1 = "C8102073D",
++
++ frcsr_1 = "00302073D",
++ fscsr_2 = "00301073DR",
++ fscsr_1 = "00301073R",
++ frrm_1 = "00202073D",
++ fsrm_2 = "00201073DR",
++ fsrm_1 = "00201073R",
++ fsrmi_2 = "00205073Du",
++ fsrmi_1 = "00205073u",
++ frflags_1 = "00102073D",
++ fsflags_2 = "00101073DR",
++ fsflagsi_2 = "00105073Du",
++ fsflagsi_1 = "00105073u",
++}
++
++local map_op_zifencei = {
++ ["fence.i_3"] = "0000100fDRI",
++}
++
++local list_map_op_rv32 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_zifencei, ['c'] = map_op_zicsr }
++local list_map_op_rv64 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_rv64imafd, ['c'] = map_op_zifencei, ['d'] = map_op_zicsr }
++
++if riscv32 then for _, map in opairs(list_map_op_rv32) do
++ for k, v in pairs(map) do map_op[k] = v end
++ end
++end
++if riscv64 then for _, map in opairs(list_map_op_rv64) do
++ for k, v in pairs(map) do map_op[k] = v end
++ end
++end
++
++------------------------------------------------------------------------------
++
++local function parse_gpr(expr)
++ local tname, ovreg = match(expr, "^([%w_]+):(x[1-3]?[0-9])$")
++ local tp = map_type[tname or expr]
++ if tp then
++ local reg = ovreg or tp.reg
++ if not reg then
++ werror("type `"..(tname or expr).."' needs a register override")
++ end
++ expr = reg
++ end
++ local r = match(expr, "^x([1-3]?[0-9])$")
++ if r then
++ r = tonumber(r)
++ if r <= 31 then return r, tp end
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++local function parse_fpr(expr)
++ local r = match(expr, "^f([1-3]?[0-9])$")
++ if r then
++ r = tonumber(r)
++ if r <= 31 then return r end
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++local function parse_imm(imm, bits, shift, scale, signed, action)
++ local n = tonumber(imm)
++ if n then
++ local m = sar(n, scale)
++ if shl(m, scale) == n then
++ if signed then
++ local s = sar(m, bits-1)
++ if s == 0 then return shl(m, shift)
++ elseif s == -1 then return shl(m + shl(1, bits), shift) end
++ else
++ if sar(m, bits) == 0 then return shl(m, shift) end
++ end
++ end
++ werror("out of range immediate `"..imm.."'")
++ elseif match(imm, "^[xf]([1-3]?[0-9])$") or
++ match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then
++ werror("expected immediate operand, got register")
++ else
++ waction(action or "IMM",
++ (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
++ return 0
++ end
++end
++
++local function parse_csr(expr)
++ local r = match(expr, "^([1-4]?[0-9]?[0-9]?[0-9])$")
++ if r then
++ r = tonumber(r)
++ if r <= 4095 then return r end
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++local function parse_imms(imm)
++ local n = tonumber(imm)
++ if n then
++ if n >= -2048 and n < 2048 then
++ local imm5, imm7 = band(n, 0x1f), shr(band(n, 0xfe0), 5)
++ return shl(imm5, 7) + shl(imm7, 25)
++ end
++ werror("out of range immediate `"..imm.."'")
++ elseif match(imm, "^[xf]([1-3]?[0-9])$") or
++ match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then
++ werror("expected immediate operand, got register")
++ else
++ waction("IMMS", 0, imm); return 0
++ end
++end
++
++local function parse_rm(mode)
++ local rnd_mode = {
++ rne = 0, rtz = 1, rdn = 2, rup = 3, rmm = 4, dyn = 7
++ }
++ local n = rnd_mode[mode]
++ if n then return n
++ else werror("bad rounding mode `"..mode.."'") end
++end
++
++local function parse_disp(disp, mode)
++ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
++ if imm then
++ local r = shl(parse_gpr(reg), 15)
++ local extname = match(imm, "^extern%s+(%S+)$")
++ if extname then
++ waction("REL_EXT", map_extern[extname], nil, 1)
++ return r
++ else
++ if mode == "load" then
++ return r + parse_imm(imm, 12, 20, 0, true)
++ elseif mode == "store" then
++ return r + parse_imms(imm)
++ else
++ werror("bad displacement mode '"..mode.."'")
++ end
++ end
++ end
++ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
++ if reg and tailr ~= "" then
++ local r, tp = parse_gpr(reg)
++ if tp then
++ if mode == "load" then
++ waction("IMM", 32768+12*32+20, format(tp.ctypefmt, tailr))
++ elseif mode == "store" then
++ waction("IMMS", 0, format(tp.ctypefmt, tailr))
++ else
++ werror("bad displacement mode '"..mode.."'")
++ end
++ return shl(r, 15)
++ end
++ end
++ werror("bad displacement `"..disp.."'")
++end
++
++local function parse_label(label, def)
++ local prefix = sub(label, 1, 2)
++ -- =>label (pc label reference)
++ if prefix == "=>" then
++ return "PC", 0, sub(label, 3)
++ end
++ -- ->name (global label reference)
++ if prefix == "->" then
++ return "LG", map_global[sub(label, 3)]
++ end
++ if def then
++ -- [1-9] (local label definition)
++ if match(label, "^[1-9]$") then
++ return "LG", 10+tonumber(label)
++ end
++ else
++ -- [<>][1-9] (local label reference)
++ local dir, lnum = match(label, "^([<>])([1-9])$")
++ if dir then -- Fwd: 1-9, Bkwd: 11-19.
++ return "LG", lnum + (dir == ">" and 0 or 10)
++ end
++ -- extern label (extern label reference)
++ local extname = match(label, "^extern%s+(%S+)$")
++ if extname then
++ return "EXT", map_extern[extname]
++ end
++ end
++ werror("bad label `"..label.."'")
++end
++
++------------------------------------------------------------------------------
++
++-- Handle opcodes defined with template strings.
++map_op[".template__"] = function(params, template, nparams)
++ if not params then return sub(template, 9) end
++ local op = tonumber(sub(template, 1, 8), 16)
++ local n = 1
++
++ -- Limit number of section buffer positions used by a single dasm_put().
++ -- A single opcode needs a maximum of 2 positions (ins/ext).
++ if secpos+2 > maxsecpos then wflush() end
++ local pos = wpos()
++
++ -- Process each character.
++ for p in gmatch(sub(template, 9), ".") do
++ if p == "D" then -- gpr rd
++ op = op + shl(parse_gpr(params[n]), 7); n = n + 1
++ elseif p == "R" then -- gpr rs1
++ op = op + shl(parse_gpr(params[n]), 15); n = n + 1
++ elseif p == "r" then -- gpr rs2
++ op = op + shl(parse_gpr(params[n]), 20); n = n + 1
++ elseif p == "F" then -- fpr rd
++ op = op + shl(parse_fpr(params[n]), 7); n = n + 1
++ elseif p == "G" then -- fpr rs1
++ op = op + shl(parse_fpr(params[n]), 15); n = n + 1
++ elseif p == "g" then -- fpr rs2
++ op = op + shl(parse_fpr(params[n]), 20); n = n + 1
++ elseif p == "H" then -- fpr rs3
++ op = op + shl(parse_fpr(params[n]), 27); n = n + 1
++ elseif p == "C" then -- csr
++ op = op + shl(parse_csr(params[n]), 20); n = n + 1
++ elseif p == "M" then -- fpr rounding mode
++ op = op + shl(parse_rm(params[n]), 12); n = n + 1
++ elseif p == "Y" then -- fpr psuedo-op
++ local r = parse_fpr(params[n])
++ op = op + shl(r, 15) + shl(r, 20); n = n + 1
++ elseif p == "I" then -- I-type imm12
++ op = op + parse_imm(params[n], 12, 20, 0, true); n = n + 1
++ elseif p == "i" then -- I-type shamt5
++ op = op + parse_imm(params[n], 5, 20, 0, false); n = n + 1
++ elseif p == "j" then -- I-type shamt6
++ op = op + parse_imm(params[n], 6, 20, 0, false); n = n + 1
++ elseif p == "u" then -- I-type uimm
++ op = op + parse_imm(params[n], 5, 15, 0, false); n = n + 1
++ elseif p == "U" then -- U-type imm20
++ op = op + parse_imm(params[n], 20, 12, 0, false); n = n + 1
++ elseif p == "L" then -- load
++ op = op + parse_disp(params[n], "load"); n = n + 1
++ elseif p == "S" then -- store
++ op = op + parse_disp(params[n], "store"); n = n + 1
++ elseif p == "B" or p == "J" then -- control flow
++ local mode, m, s = parse_label(params[n], false)
++ if p == "B" then m = m + 2048 end
++ waction("REL_"..mode, m, s, 1); n = n + 1
++ elseif p == "A" then -- AUIPC
++ local mode, m, s = parse_label(params[n], false)
++ waction("REL_"..mode, m, s, 1); n = n + 1
++ else
++ assert(false)
++ end
++ end
++ wputpos(pos, op)
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcode to mark the position where the action list is to be emitted.
++map_op[".actionlist_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeactions(out, name) end)
++end
++
++-- Pseudo-opcode to mark the position where the global enum is to be emitted.
++map_op[".globals_1"] = function(params)
++ if not params then return "prefix" end
++ local prefix = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeglobals(out, prefix) end)
++end
++
++-- Pseudo-opcode to mark the position where the global names are to be emitted.
++map_op[".globalnames_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeglobalnames(out, name) end)
++end
++
++-- Pseudo-opcode to mark the position where the extern names are to be emitted.
++map_op[".externnames_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeexternnames(out, name) end)
++end
++
++------------------------------------------------------------------------------
++
++-- Label pseudo-opcode (converted from trailing colon form).
++map_op[".label_1"] = function(params)
++ if not params then return "[1-9] | ->global | =>pcexpr" end
++ if secpos+1 > maxsecpos then wflush() end
++ local mode, n, s = parse_label(params[1], true)
++ if mode == "EXT" then werror("bad label definition") end
++ waction("LABEL_"..mode, n, s, 1)
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcodes for data storage.
++map_op[".long_*"] = function(params)
++ if not params then return "imm..." end
++ for _,p in ipairs(params) do
++ local n = tonumber(p)
++ if not n then werror("bad immediate `"..p.."'") end
++ if n < 0 then n = n + 2^32 end
++ wputw(n)
++ if secpos+2 > maxsecpos then wflush() end
++ end
++end
++
++-- Alignment pseudo-opcode.
++map_op[".align_1"] = function(params)
++ if not params then return "numpow2" end
++ if secpos+1 > maxsecpos then wflush() end
++ local align = tonumber(params[1])
++ if align then
++ local x = align
++ -- Must be a power of 2 in the range (2 ... 256).
++ for i=1,8 do
++ x = x / 2
++ if x == 1 then
++ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
++ return
++ end
++ end
++ end
++ werror("bad alignment")
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcode for (primitive) type definitions (map to C types).
++map_op[".type_3"] = function(params, nparams)
++ if not params then
++ return nparams == 2 and "name, ctype" or "name, ctype, reg"
++ end
++ local name, ctype, reg = params[1], params[2], params[3]
++ if not match(name, "^[%a_][%w_]*$") then
++ werror("bad type name `"..name.."'")
++ end
++ local tp = map_type[name]
++ if tp then
++ werror("duplicate type `"..name.."'")
++ end
++ -- Add #type to defines. A bit unclean to put it in map_archdef.
++ map_archdef["#"..name] = "sizeof("..ctype..")"
++ -- Add new type and emit shortcut define.
++ local num = ctypenum + 1
++ map_type[name] = {
++ ctype = ctype,
++ ctypefmt = format("Dt%X(%%s)", num),
++ reg = reg,
++ }
++ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
++ ctypenum = num
++end
++map_op[".type_2"] = map_op[".type_3"]
++
++-- Dump type definitions.
++local function dumptypes(out, lvl)
++ local t = {}
++ for name in pairs(map_type) do t[#t+1] = name end
++ sort(t)
++ out:write("Type definitions:\n")
++ for _,name in ipairs(t) do
++ local tp = map_type[name]
++ local reg = tp.reg or ""
++ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
++ end
++ out:write("\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Set the current section.
++function _M.section(num)
++ waction("SECTION", num)
++ wflush(true) -- SECTION is a terminal action.
++end
++
++------------------------------------------------------------------------------
++
++-- Dump architecture description.
++function _M.dumparch(out)
++ out:write(format("DynASM %s version %s, released %s\n\n",
++ _info.arch, _info.version, _info.release))
++ dumpactions(out)
++end
++
++-- Dump all user defined elements.
++function _M.dumpdef(out, lvl)
++ dumptypes(out, lvl)
++ dumpglobals(out, lvl)
++ dumpexterns(out, lvl)
++end
++
++------------------------------------------------------------------------------
++
++-- Pass callbacks from/to the DynASM core.
++function _M.passcb(wl, we, wf, ww)
++ wline, werror, wfatal, wwarn = wl, we, wf, ww
++ return wflush
++end
++
++-- Setup the arch-specific module.
++function _M.setup(arch, opt)
++ g_arch, g_opt = arch, opt
++end
++
++-- Merge the core maps and the arch-specific maps.
++function _M.mergemaps(map_coreop, map_def)
++ setmetatable(map_op, { __index = map_coreop })
++ setmetatable(map_def, { __index = map_archdef })
++ return map_op, map_def
++end
++
++return _M
++
++------------------------------------------------------------------------------
++
+--- /dev/null
++++ b/dynasm/dasm_riscv32.lua
+@@ -0,0 +1,12 @@
++------------------------------------------------------------------------------
++-- DynASM RISC-V 32 module.
++--
++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++-- See dynasm.lua for full copyright notice.
++------------------------------------------------------------------------------
++-- This module just sets 32 bit mode for the combined RISC-V module.
++-- All the interesting stuff is there.
++------------------------------------------------------------------------------
++
++riscv32 = true -- Using a global is an ugly, but effective solution.
++return require("dasm_riscv")
+--- /dev/null
++++ b/dynasm/dasm_riscv64.lua
+@@ -0,0 +1,12 @@
++------------------------------------------------------------------------------
++-- DynASM RISC-V 64 module.
++--
++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++-- See dynasm.lua for full copyright notice.
++------------------------------------------------------------------------------
++-- This module just sets 64 bit mode for the combined RISC-V module.
++-- All the interesting stuff is there.
++------------------------------------------------------------------------------
++
++riscv64 = true -- Using a global is an ugly, but effective solution.
++return require("dasm_riscv")
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -52,6 +52,7 @@
+ CCOPT_arm64=
+ CCOPT_ppc=
+ CCOPT_mips=
++CCOPT_riscv64=
+ #
+ #CCDEBUG=
+ # Uncomment the next line to generate debug information:
+@@ -270,6 +271,9 @@
+ TARGET_LJARCH= mips
+ endif
+ else
++ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
++ TARGET_LJARCH= riscv64
++else
+ $(error Unsupported target architecture)
+ endif
+ endif
+@@ -278,6 +282,7 @@
+ endif
+ endif
+ endif
++endif
+
+ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
+ TARGET_SYS= PS3
+@@ -484,6 +489,9 @@
+ DASM_AFLAGS+= -D ELFV2
+ endif
+ endif
++ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
++ DASM_AFLAGS+= -D RISCV64
++endif
+ endif
+ endif
+
+--- a/src/host/buildvm.c
++++ b/src/host/buildvm.c
+@@ -67,6 +67,8 @@
+ #include "../dynasm/dasm_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "../dynasm/dasm_mips.h"
++#elif LJ_TARGET_RISCV64
++#include "../dynasm/dasm_riscv.h"
+ #elif LJ_TARGET_S390X
+ #include "../dynasm/dasm_s390x.h"
+ #else
+--- a/src/host/buildvm_asm.c
++++ b/src/host/buildvm_asm.c
+@@ -145,9 +145,15 @@
+ #if LJ_TARGET_ARM64 && LJ_BE
+ ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
+ #endif
+- if ((i & 15) == 0)
++ if ((i & 15) == 0) {
++#if LJ_TARGET_RISCV64
++ while (ins == 0xffffffffu) { i += 4; ins = *(uint32_t *)(p+i); }
++#endif
+ fprintf(ctx->fp, "\t.long 0x%08x", ins);
+- else
++ } else
++#if LJ_TARGET_RISCV64
++ if (ins != 0xffffffffu)
++#endif
+ fprintf(ctx->fp, ",0x%08x", ins);
+ if ((i & 15) == 12) putc('\n', ctx->fp);
+ }
+@@ -208,6 +214,21 @@
+ "Error: unsupported opcode %08x for %s symbol relocation.\n",
+ ins, sym);
+ exit(1);
++#elif LJ_TARGET_RISCV64
++ if (ins == 0x7fffffffu) {
++ fprintf(ctx->fp, "\tcall %s\n", sym);
++ } else if ((ins & 0x7f) == 0x17u) {
++ fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym);
++ } else if ((ins & 0x7f) == 0x67u) {
++ fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym);
++ } else if ((ins & 0x7f) == 0x6fu) {
++ fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym);
++ } else {
++ fprintf(stderr,
++ "Error: unsupported opcode %08x for %s symbol relocation.\n",
++ ins, sym);
++ exit(1);
++ }
+ #else
+ #error "missing relocation support for this architecture"
+ #endif
+@@ -304,6 +325,9 @@
+ #if LJ_TARGET_MIPS
+ fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n");
+ #endif
++#if LJ_TARGET_RISCV64
++ fprintf(ctx->fp, ".option arch, -c\n.option norelax\n");
++#endif
+ emit_asm_align(ctx, 4);
+
+ #if LJ_TARGET_PS3
+--- /dev/null
++++ b/src/jit/dis_riscv.lua
+@@ -0,0 +1,793 @@
++------------------------------------------------------------------------------
++-- LuaJIT RISC-V disassembler module.
++--
++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++-- Released under the MIT license. See Copyright Notice in luajit.h
++--
++-- Contributed by Milos Poletanovic from Syrmia.com.
++------------------------------------------------------------------------------
++-- This is a helper module used by the LuaJIT machine code dumper module.
++--
++-- It disassembles most standard RISC-V instructions.
++-- Mode is little-endian
++------------------------------------------------------------------------------
++
++local type = type
++local byte, format = string.byte, string.format
++local match, gmatch = string.match, string.gmatch
++local concat = table.concat
++local bit = require("bit")
++local band, bor, tohex = bit.band, bit.bor, bit.tohex
++local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
++
++------------------------------------------------------------------------------
++-- Opcode maps
++------------------------------------------------------------------------------
++
++--RVC32 extension
++
++local map_quad0 = {
++ shift = 13, mask = 7,
++ [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn",
++ false, "c.fsdNMh", "c.swZMn", "c.fswNMn"
++}
++
++local map_sub2quad1 = {
++ shift = 5, mask = 3,
++ [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ"
++}
++
++local map_sub1quad1 = {
++ shift = 10, mask = 3,
++ [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1
++}
++
++local map_quad1 = {
++ shift = 13, mask = 7,
++ [0] = {
++ shift = 7, mask = 31,
++ [0] = "c.nop", _ = "c.addiDx"
++ },
++ [1] = "c.jalT", [2] = "c.liDx",
++ [3] = {
++ shift = 7, mask = 31,
++ [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX",
++ _ = "c.luiDK"
++ },
++ [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq"
++}
++
++local map_sub1quad2 = {
++ shift = 12, mask = 1,
++ [0] = {
++ shift = 2, mask = 31,
++ [0] = "c.jrD", _ = "c.mvDE"
++ },
++ [1] = {
++ shift = 2, mask = 31,
++ [0] = {
++ shift = 7, mask = 31,
++ [0] = "c.ebreak", _ = "c.jalrD"
++ },
++ _ = "c.addDE"
++ }
++}
++
++local map_quad2 = {
++ shift = 13, mask = 7,
++ [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY",
++ [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu"
++}
++
++local map_compr = {
++ [0] = map_quad0, map_quad1, map_quad2
++}
++
++--RV32M
++local map_mext = {
++ shift = 12, mask = 7,
++ [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr",
++ "divDRr", "divuDRr", "remDRr", "remuDRr"
++}
++
++--RV64M
++local map_mext64 = {
++ shift = 12, mask = 7,
++ [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr",
++ [7] = "remuwDRr"
++}
++
++--RV32F, RV64F, RV32D, RV64D
++local map_fload = {
++ shift = 12, mask = 7,
++ [2] = "flwFL", [3] = "fldFL"
++}
++
++local map_fstore = {
++ shift = 12, mask = 7,
++ [2] = "fswSg", [3] = "fsdSg"
++}
++
++local map_fmadd = {
++ shift = 25, mask = 3,
++ [0] = "fmadd.sFGgH", "fmadd.dFGgH"
++}
++
++local map_fmsub = {
++ shift = 25, mask = 3,
++ [0] = "fmsub.sFGgH", "fmsub.dFGgH"
++}
++
++local map_fnmsub = {
++ shift = 25, mask = 3,
++ [0] = "fnmsub.sFGgH", "fnmsub.dFGgH"
++}
++
++local map_fnmadd = {
++ shift = 25, mask = 3,
++ [0] = "fnmadd.sFGgH", "fnmadd.dFGgH"
++}
++
++local map_fsgnjs = {
++ shift = 12, mask = 7,
++ [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6"
++}
++
++local map_fsgnjd = {
++ shift = 12, mask = 7,
++ [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6"
++}
++
++local map_fms = {
++ shift = 12, mask = 7,
++ [0] = "fmin.sFGg", "fmax.sFGg"
++}
++
++local map_fmd = {
++ shift = 12, mask = 7,
++ [0] = "fmin.dFGg", "fmax.dFGg"
++}
++
++local map_fcomps = {
++ shift = 12, mask = 7,
++ [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg"
++}
++
++local map_fcompd = {
++ shift = 12, mask = 7,
++ [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg"
++}
++
++local map_fcvtwls = {
++ shift = 20, mask = 31,
++ [0] = "fcvt.w.sDG", "fcvt.wu.sDG", "fcvt.l.sDG", "fcvt.lu.sDG"
++}
++
++local map_fcvtwld = {
++ shift = 20, mask = 31,
++ [0] = "fcvt.w.dDG", "fcvt.wu.dDG", "fcvt.l.dDG", "fcvt.lu.dDG"
++}
++
++local map_fcvts = {
++ shift = 20, mask = 31,
++ [0] = "fcvt.s.wFR", "fcvt.s.wuFR", "fcvt.s.lFR", "fcvt.s.luFR"
++}
++
++local map_fcvtd = {
++ shift = 20, mask = 31,
++ [0] = "fcvt.d.wFR", "fcvt.d.wuFR", "fcvt.d.lFR", "fcvt.d.luFR"
++}
++
++local map_fext = {
++ shift = 25, mask = 127,
++ [0] = "fadd.sFGg", [1] = "fadd.dFGg", [4] = "fsub.sFGg", [5] = "fsub.dFGg",
++ [8] = "fmul.sFGg", [9] = "fmul.dFGg", [12] = "fdiv.sFGg", [13] = "fdiv.dFGg",
++ [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd,
++ [32] = "fcvt.s.dFG", [33] = "fcvt.d.sFG",[44] = "fsqrt.sFG", [45] = "fsqrt.dFG",
++ [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld,
++ [104] = map_fcvts, [105] = map_fcvtd,
++ [112] = {
++ shift = 12, mask = 7,
++ [0] = "fmv.x.wDG", "fclass.sDG"
++ },
++ [113] = {
++ shift = 12, mask = 7,
++ [0] = "fmv.x.dDG", "fclass.dDG"
++ },
++ [120] = "fmv.w.xFR", [121] = "fmv.d.xFR"
++}
++
++--RV32A, RV64A
++local map_aext = {
++ shift = 27, mask = 31,
++ [0] = {
++ shift = 12, mask = 7,
++ [2] = "amoadd.wDrO", [3] = "amoadd.dDrO"
++ },
++ {
++ shift = 12, mask = 7,
++ [2] = "amoswap.wDrO", [3] = "amoswap.dDrO"
++ },
++ {
++ shift = 12, mask = 7,
++ [2] = "lr.wDO", [3] = "lr.dDO"
++ },
++ {
++ shift = 12, mask = 7,
++ [2] = "sc.wDrO", [3] = "sc.dDrO"
++ },
++ {
++ shift = 12, mask = 7,
++ [2] = "amoxor.wDrO", [3] = "amoxor.dDrO"
++ },
++ [8] = {
++ shift = 12, mask = 7,
++ [2] = "amoor.wDrO", [3] = "amoor.dDrO"
++ },
++ [12] = {
++ shift = 12, mask = 7,
++ [2] = "amoand.wDrO", [3] = "amoand.dDrO"
++ },
++ [16] = {
++ shift = 12, mask = 7,
++ [2] = "amomin.wDrO", [3] = "amomin.dDrO"
++ },
++ [20] = {
++ shift = 12, mask = 7,
++ [2] = "amomax.wDrO", [3] = "amomax.dDrO"
++ },
++ [24] = {
++ shift = 12, mask = 7,
++ [2] = "amominu.wDrO", [3] = "amominu.dDrO"
++ },
++ [28] = {
++ shift = 12, mask = 7,
++ [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO"
++ },
++}
++
++-- RV32I, RV64I
++local map_load = {
++ shift = 12, mask = 7,
++ [0] = "lbDL", "lhDL", "lwDL", "ldDL",
++ "lbuDL", "lhuDL", "lwuDL"
++}
++
++local map_ali = {
++ shift = 12, mask = 7,
++ [0] = {
++ shift = 7, mask = 0x1ffffff,
++ [0] = "nop", _ = "addi|li|mvDR0I2"
++ }
++ ,"slliDRi", "sltiDRI", "sltiu|seqzDRI5",
++ "xori|notDRI4",
++ {
++ shift = 26, mask = 63,
++ [0] = "srliDRi", [16] = "sraiDRi"
++ },
++ "oriDRI", "andiDRI"
++}
++
++local map_branch = {
++ shift = 12, mask = 7,
++ [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false,
++ "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB"
++}
++
++local map_store = {
++ shift = 12, mask = 7,
++ [0] = "sbSr", "shSr", "swSr", "sdSr"
++}
++
++local map_al = {
++ shift = 25, mask = 127,
++ [0] = {
++ shift = 12, mask = 7,
++ [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r",
++ "xorDRr", "srlDRr", "orDRr", "andDRr"
++ },
++ map_mext,
++ [32] = {
++ shift = 12, mask = 7,
++ [0] = "sub|negDR0r", [5] = "sraDRr"
++ }
++}
++
++--64I
++local map_addi_shift = {
++ shift = 12, mask = 7,
++ [0] = "addiw|sext.wDRI0", "slliwDRi",
++ [5] = {
++ shift = 25, mask = 127,
++ [0] = "srliwDRi", [32] = "sraiwDRi"
++ }
++}
++
++local map_arithw_shiftw = {
++ shift = 25, mask = 127,
++ [0] = {
++ shift = 12, mask = 7,
++ [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr"
++ },
++ [1] = map_mext64,
++ [32] = {
++ shift = 12, mask = 7,
++ [0] = "subw|negwDR0r", [5] = "srawDRr"
++ }
++}
++
++local map_ecabre = {
++ shift = 12, mask = 7,
++ [0] = {
++ shift = 20, mask = 4095,
++ [0] = "ecall", "ebreak"
++ }
++}
++
++local map_fence = {
++ shift = 12, mask = 1,
++ [0] = "fence", --"fence.i" ZIFENCEI EXTENSION
++}
++
++local map_jalr = {
++ shift = 7, mask = 0x1ffffff,
++ _ = "jalr|jrDRI7", [256] = "ret"
++}
++
++local map_pri = {
++ [3] = map_load, [7] = map_fload, [15] = map_fence, [19] = map_ali,
++ [23] = "auipcDA", [27] = map_addi_shift,
++ [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_al,
++ [55] = "luiDU", [59] = map_arithw_shiftw, [67] = map_fmadd, [71] = map_fmsub,
++ [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext,
++ [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre
++}
++
++------------------------------------------------------------------------------
++
++local map_gpr = {
++ [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7",
++ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
++ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
++ "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31",
++}
++
++local map_fgpr = {
++ [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
++}
++
++------------------------------------------------------------------------------
++
++-- Output a nicely formatted line with an opcode and operands.
++local function putop(ctx, text, operands)
++ local pos = ctx.pos
++ local extra = ""
++ if ctx.rel then
++ local sym = ctx.symtab[ctx.rel]
++ if sym then extra = "\t->"..sym end
++ end
++ if ctx.hexdump > 0 then
++ ctx.out:write((format("%08x %s %-7s %s%s\n",
++ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra)))
++ else
++ ctx.out(format("%08x %-7s %s%s\n",
++ ctx.addr+pos, text, concat(operands, ", "), extra))
++ end
++ local pos = ctx.pos
++ local first_byte = byte(ctx.code, ctx.pos+1)
++ --Examine if the next instruction is 16-bits or 32-bits
++ if(band(first_byte, 3) < 3) then
++ ctx.pos = pos + 2
++ else
++ ctx.pos = pos + 4
++ end
++end
++
++-- Fallback for unknown opcodes.
++local function unknown(ctx)
++ return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
++end
++
++local function get_le(ctx)
++ local pos = ctx.pos
++ --Examine if the next instruction is 16-bits or 32-bits
++ local first_byte = byte(ctx.code, pos+1)
++ if(band(first_byte, 3) < 3) then --checking first two bits of opcode
++ local b0, b1 = byte(ctx.code, pos+1, pos+2)
++ return bor(lshift(b1, 8), b0)
++ else
++ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
++ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
++ end
++end
++
++local function parse_W(opcode)
++ local part1 = band(rshift(opcode, 7), 15) --9:6
++ local part2 = band(rshift(opcode, 11), 3) --5:4
++ local part3 = band(rshift(opcode, 5), 1)--3
++ local part4 = band(rshift(opcode, 6), 1)--2
++ return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4),
++ lshift(part3, 3), lshift(part4, 2))
++end
++
++local function parse_x(opcode)
++ local part1 = band(rshift(opcode, 12), 1) --5
++ local part2 = band(rshift(opcode, 2), 31) --4:0
++ if(part1 == 1) then
++ return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2)
++ else
++ return bor(lshift(0, 31), lshift(part1, 5), part2)
++ end
++end
++
++local function parse_X(opcode)
++ local part1 = band(rshift(opcode, 12), 1) --12
++ local part2 = band(rshift(opcode, 3), 3) --8:7
++ local part3 = band(rshift(opcode, 5), 1) --6
++ local part4 = band(rshift(opcode, 2), 1) --5
++ local part5 = band(rshift(opcode, 6), 1) --4
++ if(part1 == 1) then
++ return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7),
++ lshift(part3, 6), lshift(part4, 5), lshift(part5, 4))
++ else
++ return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6),
++ lshift(part4, 5), lshift(part5, 4))
++ end
++end
++
++local function parse_S(opcode)
++ local part1 = band(rshift(opcode, 25), 127) --11:5
++ local sign = band(rshift(part1, 6), 1)
++ local part2 = band(rshift(opcode, 7), 31) --4:0
++ if (sign == 1) then
++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2)
++ else
++ return bor(lshift(0, 31), lshift(part1, 5), part2)
++ end
++end
++
++local function parse_B(opcode)
++ local part1 = band(rshift(opcode, 7), 1) --11
++ local part2 = band(rshift(opcode, 25), 63) --10:5
++ local part3 = band(rshift(opcode, 8), 15) -- 4 : 1
++ if (part1 == 1) then
++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11),
++ lshift(part2, 5), lshift(part3, 1), 0)
++ else
++ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5),
++ lshift(part3, 1), 0)
++ end
++end
++
++local function parse_q(opcode)
++ local part1 = band(rshift(opcode, 12), 1) --8
++ local part2 = band(rshift(opcode, 5), 3) --7:6
++ local part3 = band(rshift(opcode, 2), 1) --5
++ local part4 = band(rshift(opcode, 10), 3) --4:3
++ local part5 = band(rshift(opcode, 3), 3) --2:1
++ if(part1 == 1) then
++ return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6),
++ lshift(part3, 5), lshift(part4, 3), lshift(part5, 1))
++ else
++ return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5),
++ lshift(part4, 3), lshift(part5, 1))
++ end
++end
++
++local function parse_J(opcode)
++ local part1 = band(rshift(opcode, 31), 1) --20
++ local part2 = band(rshift(opcode, 12), 255) -- 19:12
++ local part3 = band(rshift(opcode, 20), 1) --11
++ local part4 = band(rshift(opcode, 21), 1023) --10:1
++ if(part1 == 1) then
++ return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12),
++ lshift(part3, 11), lshift(part4, 1))
++ else
++ return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12),
++ lshift(part3, 11), lshift(part4, 1))
++ end
++end
++
++local function parse_T(opcode)
++ local part1 = band(rshift(opcode, 12), 1) --11
++ local part2 = band(rshift(opcode, 8), 1) --10
++ local part3 = band(rshift(opcode, 9), 3)--9:8
++ local part4 = band(rshift(opcode, 6), 1) --7
++ local part5 = band(rshift(opcode, 7), 1) -- 6
++ local part6 = band(rshift(opcode, 2), 1) --5
++ local part7 = band(rshift(opcode, 11), 1) --4
++ local part8 = band(rshift(opcode, 3), 7) --3:1
++ if(part1 == 1) then
++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11),
++ lshift(part2, 10), lshift(part3, 8), lshift(part4, 7),
++ lshift(part5, 6), lshift(part6, 5), lshift(part7, 4),
++ lshift(part8, 1))
++ else
++ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10),
++ lshift(part3, 8), lshift(part4, 7), lshift(part5, 6),
++ lshift(part6, 5), lshift(part7, 4), lshift(part8, 1))
++ end
++end
++
++local function parse_K(opcode)
++ local part1 = band(rshift(opcode, 12), 1) --5 17
++ local part2 = band(rshift(opcode, 2), 31) --4:0 16:12
++ if(part1 == 1) then
++ return bor(lshift(0, 31), lshift(0x7fff, 5), part2)
++ else
++ return bor(lshift(0, 31), lshift(part1, 5), part2)
++ end
++end
++
++-- Disassemble a single instruction.
++local function disass_ins(ctx)
++ local op = ctx:get()
++ local operands = {}
++ local last = nil
++ ctx.op = op
++ ctx.rel =nil
++
++ local opat = 0
++ --for compressed instructions
++ if(band(op, 3) < 3) then
++ opat = ctx.map_compr[band(op, 3)]
++ while type(opat) ~= "string" do
++ if not opat then return unknown(ctx) end
++ local test = band(rshift(op, opat.shift), opat.mask)
++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
++ end
++ else
++ opat = ctx.map_pri[band(op,127)]
++ while type(opat) ~= "string" do
++ if not opat then return unknown(ctx) end
++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
++ end
++ end
++ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
++ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
++ local a1, a2 = 0
++ if altname then
++ pat = pat2
++ end
++
++ local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0
++
++ for p in gmatch(pat, ".") do
++ local x = nil
++ if p == "D" then
++ x = map_gpr[band(rshift(op, 7), 31)]
++ elseif p == "F" then
++ x = map_fgpr[band(rshift(op, 7), 31)]
++ elseif p == "R" then
++ x = map_gpr[band(rshift(op, 15), 31)]
++ elseif p == "G" then
++ x = map_fgpr[band(rshift(op, 15), 31)]
++ elseif p == "r" then
++ x = map_gpr[band(rshift(op, 20), 31)]
++ if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then
++ local temp = last --because of the diffrent order of the characters
++ operands[#operands] = x
++ x = temp
++ end
++ elseif p == "g" then
++ x = map_fgpr[band(rshift(op, 20), 31)]
++ if(name == "fsw" or name == "fsd") then
++ local temp = last
++ operands[#operands] = x
++ x = temp
++ end
++ elseif p == "Z" then
++ x = map_gpr[8 + band(rshift(op, 2), 7)]
++ elseif p == "N" then
++ x = map_fgpr[8 + band(rshift(op, 2), 7)]
++ elseif p == "M" then
++ x = map_gpr[8 + band(rshift(op, 7), 7)]
++ elseif p == "E" then
++ x = map_gpr[band(rshift(op, 2), 31)]
++ elseif p == "W" then
++ local uimm = parse_W(op)
++ x = format("%s,%d", "sp", uimm)
++ elseif p == "x" then
++ x = parse_x(op)
++ elseif p == "h" then
++ local part1 = band(rshift(op, 5), 3) --7:6
++ local part2 = band(rshift(op, 10), 7) --5:3
++ local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3))
++ operands[#operands] = format("%d(%s)", uimm, last)
++ elseif p == "X" then
++ local imm = parse_X(op)
++ x = format("%s,%d", "sp", imm)
++ elseif p == "O" then
++ x = format("(%s)", map_gpr[band(rshift(op, 15), 31)])
++ elseif p == "H" then
++ x = map_fgpr[band(rshift(op, 27), 31)]
++ elseif p == "L" then
++ local register = map_gpr[band(rshift(op, 15), 31)]
++ local disp = arshift(op, 20)
++ x = format("%d(%s)", disp, register)
++ elseif p == "I" then
++ x = arshift(op, 20)
++ --different for jalr
++ if(name == "jalr") then
++ local reg = map_gpr[band(rshift(op, 15), 31)]
++ if(ctx.reltab[reg] == nil) then
++ operands[#operands] = format("%d(%s)", x, last)
++ else
++ local target = ctx.reltab[reg] + x
++ operands[#operands] = format("%d(%s) #0x%08x", x, last, target)
++ ctx.rel = target
++ ctx.reltab[reg] = nil --assume no reuses of the register
++ end
++ x = nil --not to add additional operand
++ end
++ elseif p == "i" then
++ --both for RV32I AND RV64I
++ local value = band(arshift(op, 20), 63)
++ x = string.format("0x%x", value)
++ elseif p == "S" then
++ local register = map_gpr[band(rshift(op, 15), 31)] --register
++ local imm = parse_S(op)
++ x = format("%d(%s)", imm, register)
++ elseif p == "n" then
++ local part1 = band(rshift(op, 5), 1) --6
++ local part2 = band(rshift(op, 10), 7) --5:3
++ local part3 = band(rshift(op, 6), 1) --2
++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3),
++ lshift(part3, 2))
++ operands[#operands] = format("%d(%s)", uimm, last)
++ elseif p == "A" then
++ local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)]
++ ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12)
++ x = format("0x%x", value)
++ elseif p == "B" then
++ x = ctx.addr + ctx.pos + parse_B(op)
++ ctx.rel = x
++ x = format("0x%08x", x)
++ elseif p == "U" then
++ local value = band(rshift(op, 12), 0xfffff)
++ x = string.format("0x%x", value)
++ elseif p == "Q" then
++ local part1 = band(rshift(op, 2), 7) --8:6
++ local part2 = band(rshift(op, 12), 1) --5
++ local part3 = band(rshift(op, 5), 3) --4:3
++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5),
++ lshift(part3, 3))
++ x = format("%d(%s)", uimm, "sp")
++ elseif p == "q" then
++ x = ctx.addr + ctx.pos + parse_q(op)
++ ctx.rel = x
++ x = format("0x%08x", x)
++ elseif p == "J" then
++ x = ctx.addr + ctx.pos + parse_J(op)
++ ctx.rel = x
++ x = format("0x%08x", x)
++ elseif p == "K" then
++ local value = parse_K(op)
++ x = string.format("0x%x", value)
++ elseif p == "Y" then
++ local part1 = band(rshift(op, 2), 3) --7:6
++ local part2 = band(rshift(op, 12), 1) --5
++ local part3 = band(rshift(op, 4), 7) --4:2
++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5),
++ lshift(part3, 2))
++ x = format("%d(%s)", uimm, "sp")
++ elseif p == "1" then
++ local part1 = band(rshift(op, 12), 1) --5
++ local part2 = band(rshift(op, 2), 31) --4:0
++ local uimm = bor(lshift(0, 31), lshift(part1, 5), part2)
++ x = string.format("0x%x", uimm)
++ elseif p == "T" then
++ x = ctx.addr + ctx.pos + parse_T(op)
++ ctx.rel = x
++ x = format("0x%08x", x)
++ elseif p == "t" then
++ local part1 = band(rshift(op, 7), 7) --8:6
++ local part2 = band(rshift(op, 10), 7) --5:3
++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3))
++ x = format("%d(%s)", uimm, "sp")
++ elseif p == "u" then
++ local part1 = band(rshift(op, 7), 3) --7:6
++ local part2 = band(rshift(op, 9), 15) --5:2
++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2))
++ x = format("%d(%s)", uimm, "sp")
++ elseif p == "V" then
++ x = map_fgpr[band(rshift(op, 2), 31)]
++ elseif p == "0" then --PSEUDOINSTRUCTIONS
++ if (last == "zero" or last == 0) then
++ local n = #operands
++ operands[n] = nil
++ last = operands[n-1]
++ local a1, a2 = match(altname, "([^|]*)|(.*)")
++ if a1 then name, altname = a1, a2
++ else name = altname end
++ alias_done = true
++ end
++ elseif (p == "4") then
++ if(last == -1) then
++ name = altname
++ operands[#operands] = nil
++ end
++ elseif (p == "5") then
++ if(last == 1) then
++ name = altname
++ operands[#operands] = nil
++ end
++ elseif (p == "6") then
++ if(last == operands[#operands - 1]) then
++ name = altname
++ operands[#operands] = nil
++ end
++ elseif (p == "7") then --jalr rs
++ local value = string.sub(operands[#operands], 1, 1)
++ local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1)
++ if(value == "0" and
++ (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then
++ if(operands[#operands - 1] == "zero") then
++ name = altname
++ end
++ operands[#operands] = nil
++ operands[#operands] = reg
++ end
++ elseif (p == "2" and alias_done == false) then
++ if (last == "zero" or last == 0) then
++ local a1, a2 = match(altname, "([^|]*)|(.*)")
++ name = a2
++ operands[#operands] = nil
++ end
++ end
++ if x then operands[#operands+1] = x; last = x end
++ end
++ return putop(ctx, name, operands)
++end
++
++------------------------------------------------------------------------------
++
++-- Disassemble a block of code.
++local function disass_block(ctx, ofs, len)
++ if not ofs then
++ ofs = 0
++ end
++ local stop = len and ofs+len or #ctx.code
++ --instructions can be both 32 and 16 bits
++ stop = stop - stop % 2
++ ctx.pos = ofs - ofs % 2
++ ctx.rel = nil
++ while ctx.pos < stop do disass_ins(ctx) end
++end
++
++-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
++local function create(code, addr, out)
++ local ctx = {}
++ ctx.code = code
++ ctx.addr = addr or 0
++ ctx.out = out or io.write
++ ctx.symtab = {}
++ ctx.disass = disass_block
++ ctx.hexdump = 8
++ ctx.get = get_le
++ ctx.map_pri = map_pri
++ ctx.map_compr = map_compr
++ ctx.reltab = {}
++ return ctx
++end
++
++-- Simple API: disassemble code (a string) at address and output via out.
++local function disass(code, addr, out)
++ create(code, addr, out):disass(addr)
++end
++
++-- Return register name for RID.
++local function regname(r)
++ if r < 32 then return map_gpr[r] end
++ return "f"..(r-32)
++end
++
++-- Public module functions.
++return {
++ create = create,
++ disass = disass,
++ regname = regname
++}
+--- /dev/null
++++ b/src/jit/dis_riscv64.lua
+@@ -0,0 +1,16 @@
++----------------------------------------------------------------------------
++-- LuaJIT RISC-V 64 disassembler wrapper module.
++--
++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++-- Released under the MIT license. See Copyright Notice in luajit.h
++----------------------------------------------------------------------------
++-- This module just exports the default riscv little-endian functions from the
++-- RISC-V disassembler module. All the interesting stuff is there.
++------------------------------------------------------------------------------
++
++local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv")
++return {
++ create = dis_riscv.create,
++ disass = dis_riscv.disass,
++ regname = dis_riscv.regname
++}
+\ No newline at end of file
+--- a/src/lib_jit.c
++++ b/src/lib_jit.c
+@@ -697,6 +697,81 @@
+ #include <sys/utsname.h>
+ #endif
+
++#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX
++#include <setjmp.h>
++#include <signal.h>
++static sigjmp_buf sigbuf = {0};
++static void detect_sigill(int sig)
++{
++ siglongjmp(sigbuf, 1);
++}
++
++static int riscv_compressed()
++{
++#if defined(__riscv_c) || defined(__riscv_compressed)
++ /* Don't bother checking for RVC -- would crash before getting here. */
++ return 1;
++#elif defined(__GNUC__)
++ /* c.nop; c.nop; */
++ __asm__(".4byte 0x00010001");
++ return 1;
++#else
++ return 0;
++#endif
++}
++
++static int riscv_zba()
++{
++#if defined(__riscv_b) || defined(__riscv_zba)
++ /* Don't bother checking for Zba -- would crash before getting here. */
++ return 1;
++#elif defined(__GNUC__)
++ /* Don't bother verifying the result, just check if the instruction exists. */
++ /* add.uw zero, zero, zero */
++ __asm__(".4byte 0x0800003b");
++ return 1;
++#else
++ return 0;
++#endif
++}
++
++static int riscv_zbb()
++{
++#if defined(__riscv_b) || defined(__riscv_zbb)
++ /* Don't bother checking for Zbb -- would crash before getting here. */
++ return 1;
++#elif defined(__GNUC__)
++ register int t asm ("a0");
++ /* addi a0, zero, 255; sext.b a0, a0; */
++ __asm__("addi a0, zero, 255\n\t.4byte 0x60451513");
++ return t < 0;
++#else
++ return 0;
++#endif
++}
++
++static int riscv_xthead()
++{
++#if defined(__GNUC__)
++ register int t asm ("a0");
++ /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */
++ /* Therefore assume XThead* are present if XTheadBb is present. */
++ /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */
++ __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b");
++ return t == -1; /* In case of collision with other vendor extensions. */
++#else
++ return 0;
++#endif
++}
++
++static uint32_t riscv_probe(int (*func)(void), uint32_t flag)
++{
++ if (sigsetjmp(sigbuf, 1) == 0) {
++ return func() ? flag : 0;
++ } else return 0;
++}
++#endif
++
+ /* Arch-dependent CPU feature detection. */
+ static uint32_t jit_cpudetect(void)
+ {
+@@ -767,6 +842,21 @@
+ if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
+ }
+ #endif
++#elif LJ_TARGET_RISCV64
++#if LJ_HASJIT
++ /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */
++ struct sigaction old = {0}, act = {0};
++ act.sa_handler = detect_sigill;
++ sigaction(SIGILL, &act, &old);
++ flags |= riscv_probe(riscv_compressed, JIT_F_RVC);
++ flags |= riscv_probe(riscv_zba, JIT_F_RVZba);
++ flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb);
++ flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead);
++ sigaction(SIGILL, &old, NULL);
++
++ /* Detect V/P? */
++ /* V have no hardware available, P not ratified yet. */
++#endif
+ #elif LJ_TARGET_S390X
+ /* No optional CPU features to detect (for now). */
+ #else
+--- a/src/lj_alloc.c
++++ b/src/lj_alloc.c
+@@ -365,7 +365,7 @@
+ #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
+ #define CALL_MREMAP_NOMOVE 0
+ #define CALL_MREMAP_MAYMOVE 1
+-#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
++#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64)
+ #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
+ #else
+ #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -33,6 +33,9 @@
+ #define LUAJIT_ARCH_mips64 7
+ #define LUAJIT_ARCH_S390X 8
+ #define LUAJIT_ARCH_s390x 8
++#define LUAJIT_ARCH_riscv64 9
++#define LUAJIT_ARCH_RISCV64 9
++
+
+ /* Target OS. */
+ #define LUAJIT_OS_OTHER 0
+@@ -69,6 +72,8 @@
+ #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
+ #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
+ #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
++#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
++#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64
+ #else
+ #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
+ #endif
+@@ -473,6 +478,27 @@
+ #define LJ_TARGET_GC64 1
+ #define LJ_ARCH_NOJIT 1 /* NYI */
+
++#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64
++#if defined(__riscv_float_abi_double)
++
++#define LJ_ARCH_NAME "riscv64"
++#define LJ_ARCH_BITS 64
++#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */
++#define LJ_TARGET_RISCV64 1
++#define LJ_TARGET_GC64 1
++#define LJ_TARGET_EHRETREG 10
++#define LJ_TARGET_EHRAREG 1
++#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\
++ AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */
++#define LJ_TARGET_MASKSHIFT 1
++#define LJ_TARGET_MASKROT 1
++#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR, no ROLI */
++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
++
++#else
++#error "No support for RISC-V 64 Soft-float/Single-float"
++#endif
++
+ #else
+ #error "No target architecture defined"
+ #endif
+@@ -556,6 +582,10 @@
+ #error "Only n64 ABI supported for MIPS64"
+ #undef LJ_TARGET_MIPS
+ #endif
++#elif LJ_TARGET_RISCV64
++#if !defined(__riscv_float_abi_double)
++#error "Only RISC-V 64 double float supported for now"
++#endif
+ #endif
+ #endif
+
+--- a/src/lj_asm.c
++++ b/src/lj_asm.c
+@@ -227,6 +227,8 @@
+ #include "lj_emit_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "lj_emit_mips.h"
++#elif LJ_TARGET_RISCV64
++#include "lj_emit_riscv.h"
+ #else
+ #error "Missing instruction emitter for target CPU"
+ #endif
+@@ -1708,6 +1710,8 @@
+ #include "lj_asm_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "lj_asm_mips.h"
++#elif LJ_TARGET_RISCV64
++#include "lj_asm_riscv64.h"
+ #elif LJ_TARGET_S390X
+ #include "lj_asm_s390x.h"
+ #else
+--- /dev/null
++++ b/src/lj_asm_riscv64.h
+@@ -0,0 +1,1976 @@
++/*
++** RISC-V IR assembler (SSA IR -> machine code).
++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++/* -- Register allocator extensions --------------------------------------- */
++
++/* Allocate a register with a hint. */
++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
++{
++ Reg r = IR(ref)->r;
++ if (ra_noreg(r)) {
++ if (!ra_hashint(r) && !iscrossref(as, ref))
++ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
++ r = ra_allocref(as, ref, allow);
++ }
++ ra_noweak(as, r);
++ return r;
++}
++
++/* Allocate a register or RID_ZERO. */
++static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
++{
++ Reg r = IR(ref)->r;
++ if (ra_noreg(r)) {
++ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0)
++ return RID_ZERO;
++ r = ra_allocref(as, ref, allow);
++ } else {
++ ra_noweak(as, r);
++ }
++ return r;
++}
++
++/* Allocate two source registers for three-operand instructions. */
++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
++{
++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
++ Reg left = irl->r, right = irr->r;
++ if (ra_hasreg(left)) {
++ ra_noweak(as, left);
++ if (ra_noreg(right))
++ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
++ else
++ ra_noweak(as, right);
++ } else if (ra_hasreg(right)) {
++ ra_noweak(as, right);
++ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
++ } else if (ra_hashint(right)) {
++ right = ra_alloc1z(as, ir->op2, allow);
++ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
++ } else {
++ left = ra_alloc1z(as, ir->op1, allow);
++ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
++ }
++ return left | (right << 8);
++}
++
++/* -- Guard handling ------------------------------------------------------ */
++
++/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */
++#define RISCV_SPAREJUMP 4
++
++/* Setup spare long-range jump (trampoline?) slots per mcarea. */
++
++static void asm_sparejump_setup(ASMState *as)
++{
++ MCode *mxp = as->mctop;
++ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
++ for (int i = RISCV_SPAREJUMP*2; i--; )
++ *--mxp = RISCVI_EBREAK;
++ as->mctop = mxp;
++ }
++}
++
++static MCode *asm_sparejump_use(MCode *mcarea, MCode *target)
++{
++ MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
++ int slot = RISCV_SPAREJUMP;
++ RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr;
++ while (slot--) {
++ mxp -= 2;
++ ptrdiff_t delta = (char *)target - (char *)mxp;
++ tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)),
++ tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
++ if (mxp[0] == tauipc && mxp[1] == tjalr) {
++ return mxp;
++ } else if (mxp[0] == tslot) {
++ mxp[0] = tauipc, mxp[1] = tjalr;
++ return mxp;
++ }
++ }
++ return NULL;
++}
++
++/* Setup exit stub after the end of each trace. */
++static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
++{
++ ExitNo i;
++ MCode *mxp = as->mctop;
++ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
++ asm_mclimit(as);
++ for (i = nexits-1; (int32_t)i >= 0; i--)
++ *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i)));
++ ptrdiff_t delta = (char *)lj_vm_exit_handler - (char *)(mxp-3);
++ /* 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... */
++ *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno);
++ *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP)
++ | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta));
++ *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP)
++ | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta));
++ *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP);
++ as->mctop = mxp;
++}
++
++static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
++{
++ /* Keep this in-sync with exitstub_trace_addr(). */
++ return as->mctop + exitno + 4;
++}
++
++/* Emit conditional branch to exit for guard. */
++static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2)
++{
++ MCode *target = asm_exitstub_addr(as, as->snapno);
++ MCode *p = as->mcp;
++ if (LJ_UNLIKELY(p == as->invmcp)) {
++ as->loopinv = 1;
++ as->mcp = ++p;
++ *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p);
++ riscvi = riscvi^RISCVF_FUNCT3(1); /* Invert cond. */
++ target = p - 1; /* Patch target later in asm_loop_fixup. */
++ }
++ ptrdiff_t delta = (char *)target - (char *)(p - 1);
++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta);
++ *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8);
++ as->mcp = p;
++}
++
++/* -- Operand fusion ------------------------------------------------------ */
++
++/* Limit linear search to this distance. Avoids O(n^2) behavior. */
++#define CONFLICT_SEARCH_LIM 31
++
++/* Check if there's no conflicting instruction between curins and ref. */
++static int noconflict(ASMState *as, IRRef ref, IROp conflict)
++{
++ IRIns *ir = as->ir;
++ IRRef i = as->curins;
++ if (i > ref + CONFLICT_SEARCH_LIM)
++ return 0; /* Give up, ref is too far away. */
++ while (--i > ref)
++ if (ir[i].o == conflict)
++ return 0; /* Conflict found. */
++ return 1; /* Ok, no conflict. */
++}
++
++/* Fuse the array base of colocated arrays. */
++static int32_t asm_fuseabase(ASMState *as, IRRef ref)
++{
++ IRIns *ir = IR(ref);
++ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
++ !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
++ return (int32_t)sizeof(GCtab);
++ return 0;
++}
++
++/* Fuse array/hash/upvalue reference into register+offset operand. */
++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
++{
++ IRIns *ir = IR(ref);
++ if (ra_noreg(ir->r)) {
++ if (ir->o == IR_AREF) {
++ if (mayfuse(as, ref)) {
++ if (irref_isk(ir->op2)) {
++ IRRef tab = IR(ir->op1)->op1;
++ int32_t ofs = asm_fuseabase(as, tab);
++ IRRef refa = ofs ? tab : ir->op1;
++ ofs += 8*IR(ir->op2)->i;
++ if (checki12(ofs)) {
++ *ofsp = ofs;
++ return ra_alloc1(as, refa, allow);
++ }
++ }
++ }
++ } else if (ir->o == IR_HREFK) {
++ if (mayfuse(as, ref)) {
++ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
++ if (checki12(ofs)) {
++ *ofsp = ofs;
++ return ra_alloc1(as, ir->op1, allow);
++ }
++ }
++ } else if (ir->o == IR_UREFC) {
++ if (irref_isk(ir->op1)) {
++ GCfunc *fn = ir_kfunc(IR(ir->op1));
++ GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
++ intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g));
++ if (checki12(ofs)) {
++ *ofsp = (int32_t)ofs;
++ return RID_GL;
++ }
++ }
++ } else if (ir->o == IR_TMPREF) {
++ *ofsp = (int32_t)offsetof(global_State, tmptv);
++ return RID_GL;
++ }
++ }
++ *ofsp = 0;
++ return ra_alloc1(as, ref, allow);
++}
++
++/* Fuse XLOAD/XSTORE reference into load/store operand. */
++static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref,
++ RegSet allow, int32_t ofs)
++{
++ IRIns *ir = IR(ref);
++ Reg base;
++ if (ra_noreg(ir->r) && canfuse(as, ir)) {
++ intptr_t ofs2;
++ if (ir->o == IR_ADD) {
++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
++ checki12(ofs2))) {
++ ref = ir->op1;
++ ofs = (int32_t)ofs2;
++ }
++ } else if (ir->o == IR_STRREF) {
++ ofs2 = 4096;
++ lj_assertA(ofs == 0, "bad usage");
++ ofs = (int32_t)sizeof(GCstr);
++ if (irref_isk(ir->op2)) {
++ ofs2 = ofs + get_kval(as, ir->op2);
++ ref = ir->op1;
++ } else if (irref_isk(ir->op1)) {
++ ofs2 = ofs + get_kval(as, ir->op1);
++ ref = ir->op2;
++ }
++ if (!checki12(ofs2)) {
++ /* NYI: Fuse ADD with constant. */
++ Reg right, left = ra_alloc2(as, ir, allow);
++ right = (left >> 8); left &= 255;
++ emit_lso(as, riscvi, rd, RID_TMP, ofs);
++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right);
++ return;
++ }
++ ofs = ofs2;
++ }
++ }
++ base = ra_alloc1(as, ref, allow);
++ emit_lso(as, riscvi, rd, base, ofs);
++}
++
++/* Fuse Integer multiply-accumulate. */
++
++static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi)
++{
++ IRRef lref = ir->op1, rref = ir->op2;
++ IRIns *irm;
++ if (lref != rref &&
++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
++ ra_noreg(irm->r)) ||
++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
++ (rref = lref, ra_noreg(irm->r))))) {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg add = ra_hintalloc(as, rref, dest, RSET_GPR);
++ Reg left = ra_alloc2(as, irm,
++ rset_exclude(rset_exclude(RSET_GPR, dest), add));
++ Reg right = (left >> 8); left &= 255;
++ emit_ds1s2(as, riscvi, dest, left, right);
++ if (dest != add) emit_mv(as, dest, add);
++ return 1;
++ }
++ return 0;
++}
++
++/* Fuse FP multiply-add/sub. */
++
++static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir)
++{
++ IRRef lref = ir->op1, rref = ir->op2;
++ IRIns *irm;
++ if ((as->flags & JIT_F_OPT_FMA) &&
++ lref != rref &&
++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
++ ra_noreg(irm->r)) ||
++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
++ (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
++ Reg left = ra_alloc2(as, irm,
++ rset_exclude(rset_exclude(RSET_FPR, dest), add));
++ Reg right = (left >> 8); left &= 255;
++ emit_ds1s2s3(as, riscvi, dest, left, right, add);
++ return 1;
++ }
++ return 0;
++}
++/* -- Calls --------------------------------------------------------------- */
++
++/* Generate a call to a C function. */
++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
++{
++ uint32_t n, nargs = CCI_XNARGS(ci);
++ int32_t ofs = 0;
++ Reg gpr, fpr = REGARG_FIRSTFPR;
++ if ((void *)ci->func)
++ emit_call(as, (void *)ci->func, 1);
++ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
++ as->cost[gpr] = REGCOST(~0u, ASMREF_L);
++ gpr = REGARG_FIRSTGPR;
++ for (n = 0; n < nargs; n++) { /* Setup args. */
++ IRRef ref = args[n];
++ IRIns *ir = IR(ref);
++ if (ref) {
++ if (irt_isfp(ir->t)) {
++ if (fpr <= REGARG_LASTFPR) {
++ lj_assertA(rset_test(as->freeset, fpr),
++ "reg %d not free", fpr); /* Must have been evicted. */
++ ra_leftov(as, fpr, ref);
++ fpr++; if(ci->flags & CCI_VARARG) gpr++;
++ } else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) {
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
++ ra_leftov(as, gpr, ref);
++ gpr++;
++ } else {
++ Reg r = ra_alloc1(as, ref, RSET_FPR);
++ emit_spstore(as, ir, r, ofs);
++ ofs += 8;
++ }
++ } else {
++ if (gpr <= REGARG_LASTGPR) {
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
++ ra_leftov(as, gpr, ref);
++ gpr++; if(ci->flags & CCI_VARARG) fpr++;
++ } else {
++ Reg r = ra_alloc1z(as, ref, RSET_GPR);
++ emit_spstore(as, ir, r, ofs);
++ ofs += 8;
++ }
++ }
++ }
++ }
++}
++
++/* Setup result reg/sp for call. Evict scratch regs. */
++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
++{
++ RegSet drop = RSET_SCRATCH;
++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
++ if (ra_hasreg(ir->r))
++ rset_clear(drop, ir->r); /* Dest reg handled below. */
++ if (hiop && ra_hasreg((ir+1)->r))
++ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
++ ra_evictset(as, drop); /* Evictions must be performed first. */
++ if (ra_used(ir)) {
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
++ if (irt_isfp(ir->t)) {
++ if ((ci->flags & CCI_CASTU64)) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X,
++ dest, RID_RET);
++ } else {
++ ra_destreg(as, ir, RID_FPRET);
++ }
++ } else if (hiop) {
++ ra_destpair(as, ir);
++ } else {
++ ra_destreg(as, ir, RID_RET);
++ }
++ }
++}
++
++static void asm_callx(ASMState *as, IRIns *ir)
++{
++ IRRef args[CCI_NARGS_MAX*2];
++ CCallInfo ci;
++ IRRef func;
++ IRIns *irf;
++ ci.flags = asm_callx_flags(as, ir);
++ asm_collectargs(as, ir, &ci, args);
++ asm_setupresult(as, ir, &ci);
++ func = ir->op2; irf = IR(func);
++ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
++ if (irref_isk(func)) { /* Call to constant address. */
++ ci.func = (ASMFunction)(void *)get_kval(as, func);
++ } else { /* Need specific register for indirect calls. */
++ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
++ MCode *p = as->mcp;
++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r);
++ if (r == RID_CFUNCADDR)
++ *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r);
++ else
++ *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r);
++ as->mcp = p;
++ ci.func = (ASMFunction)(void *)0;
++ }
++ asm_gencall(as, &ci, args);
++}
++
++static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
++{
++ /* The modified regs must match with the *.dasc implementation. */
++ RegSet drop = RID2RSET(RID_X6)|RID2RSET(RID_X7)|RID2RSET(RID_F10)|
++ RID2RSET(RID_F14)|RID2RSET(RID_F1)|RID2RSET(RID_F3)|
++ RID2RSET(RID_F4);
++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
++ ra_evictset(as, drop);
++ ra_destreg(as, ir, RID_FPRET);
++ emit_call(as, (void *)lj_ir_callinfo[id].func, 0);
++ ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
++}
++
++/* -- Returns ------------------------------------------------------------- */
++
++/* Return to lower frame. Guard that it goes to the right spot. */
++static void asm_retf(ASMState *as, IRIns *ir)
++{
++ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
++ void *pc = ir_kptr(IR(ir->op2));
++ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
++ as->topslot -= (BCReg)delta;
++ if ((int32_t)as->topslot < 0) as->topslot = 0;
++ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
++ emit_setgl(as, base, jit_base);
++ emit_addptr(as, base, -8*delta);
++ asm_guard(as, RISCVI_BNE, RID_TMP,
++ ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
++ emit_lso(as, RISCVI_LD, RID_TMP, base, -8);
++}
++
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++ emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp);
++ emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
++ emit_getgl(as, RID_TMP, cur_L);
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
++/* -- Type conversions ---------------------------------------------------- */
++
++static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
++ Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
++ asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO);
++ emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left);
++ emit_ds(as, RISCVI_FCVT_D_W, tmp, dest);
++ emit_ds(as, RISCVI_FCVT_W_D, dest, left);
++}
++
++static void asm_tobit(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_FPR;
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, ir->op1, allow);
++ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
++ Reg tmp = ra_scratch(as, rset_clear(allow, right));
++ emit_ds(as, RISCVI_FMV_X_W, dest, tmp);
++ emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right);
++}
++
++static void asm_conv(ASMState *as, IRIns *ir)
++{
++ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
++ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
++ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
++ IRRef lref = ir->op1;
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
++ /* Use GPR to pass floating-point arguments */
++ if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg ftmp = ra_scratch(as, RSET_FPR);
++ if (stfp) { /* FP to FP conversion. */
++ emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp);
++ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S,
++ ftmp, ra_alloc1(as, lref, RSET_FPR));
++ } else { /* Integer to FP conversion. */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ RISCVIns riscvi = irt_isfloat(ir->t) ?
++ (((IRT_IS64 >> st) & 1) ?
++ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) :
++ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) :
++ (((IRT_IS64 >> st) & 1) ?
++ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) :
++ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU));
++ emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp);
++ emit_ds(as, riscvi, ftmp, left);
++ }
++ } else if (irt_isfp(ir->t)) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ if (stfp) { /* FP to FP conversion. */
++ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S,
++ dest, ra_alloc1(as, lref, RSET_FPR));
++ } else { /* Integer to FP conversion. */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ RISCVIns riscvi = irt_isfloat(ir->t) ?
++ (((IRT_IS64 >> st) & 1) ?
++ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) :
++ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) :
++ (((IRT_IS64 >> st) & 1) ?
++ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) :
++ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU));
++ emit_ds(as, riscvi, dest, left);
++ }
++ } else if (stfp) { /* FP to integer conversion. */
++ if (irt_isguard(ir->t)) {
++ /* Checked conversions are only supported from number to int. */
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
++ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
++ } else {
++ Reg left = ra_alloc1(as, lref, RSET_FPR);
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ RISCVIns riscvi = irt_is64(ir->t) ?
++ (st == IRT_NUM ?
++ (irt_isi64(ir->t) ? RISCVI_FCVT_L_D : RISCVI_FCVT_LU_D) :
++ (irt_isi64(ir->t) ? RISCVI_FCVT_L_S : RISCVI_FCVT_LU_S)) :
++ (st == IRT_NUM ?
++ (irt_isint(ir->t) ? RISCVI_FCVT_W_D : RISCVI_FCVT_WU_D) :
++ (irt_isint(ir->t) ? RISCVI_FCVT_W_S : RISCVI_FCVT_WU_S));
++ emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left);
++ }
++ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B :
++ st == IRT_U8 ? RISCVI_ZEXT_B :
++ st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H;
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
++ emit_ext(as, riscvi, dest, left);
++ } else { /* 32/64 bit integer conversions. */
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ if (irt_is64(ir->t)) {
++ if (st64) {
++ /* 64/64 bit no-op (cast)*/
++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
++ } else { /* 32 to 64 bit sign extension. */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
++ emit_ext(as, RISCVI_SEXT_W, dest, left);
++ } else { /* 32 to 64 bit zero extension. */
++ emit_ext(as, RISCVI_ZEXT_W, dest, left);
++ }
++ }
++ } else {
++ if (st64 && !(ir->op2 & IRCONV_NONE)) {
++ /* This is either a 32 bit reg/reg mov which zeroes the hiword
++ ** or a load of the loword from a 64 bit address.
++ */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ emit_ext(as, RISCVI_ZEXT_W, dest, left);
++ } else { /* 32/32 bit no-op (cast). */
++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
++ }
++ }
++ }
++}
++
++static void asm_strto(ASMState *as, IRIns *ir)
++{
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
++ IRRef args[2];
++ int32_t ofs = SPOFS_TMP;
++ RegSet drop = RSET_SCRATCH;
++ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
++ ra_evictset(as, drop);
++ if (ir->s) ofs = sps_scale(ir->s);
++ asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO); /* Test return status. */
++ args[0] = ir->op1; /* GCstr *str */
++ args[1] = ASMREF_TMP1; /* TValue *n */
++ asm_gencall(as, ci, args);
++ /* Store the result to the spill slot or temp slots. */
++ Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
++ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs);
++}
++
++/* -- Memory references --------------------------------------------------- */
++
++/* Store tagged value for ref at base+ofs. */
++static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
++{
++ RegSet allow = rset_exclude(RSET_GPR, base);
++ IRIns *ir = IR(ref);
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
++ "store of IR type %d", irt_type(ir->t));
++ if (irref_isk(ref)) {
++ TValue k;
++ lj_ir_kvalue(as->J->L, &k, ir);
++ emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs);
++ } else {
++ Reg src = ra_alloc1(as, ref, allow);
++ rset_clear(allow, src);
++ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
++ emit_lso(as, RISCVI_SD, RID_TMP, base, ofs);
++ if (irt_isinteger(ir->t)) {
++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type);
++ emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src);
++ } else {
++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type);
++ }
++ }
++}
++
++/* Get pointer to TValue. */
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new
++{
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
++ /* Use the number constant itself as a TValue. */
++ ra_allockreg(as, igcptr(ir_knum(ir)), dest);
++ return;
++ }
++ emit_lso(as, RISCVI_FSD, ra_alloc1(as, ref, RSET_FPR), dest, 0);
++ } else {
++ asm_tvstore64(as, dest, 0, ref);
++ }
++ }
++ /* g->tmptv holds the TValue(s). */
++ emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv));
++}
++
++static void asm_aref(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg idx, base;
++ if (irref_isk(ir->op2)) {
++ IRRef tab = IR(ir->op1)->op1;
++ int32_t ofs = asm_fuseabase(as, tab);
++ IRRef refa = ofs ? tab : ir->op1;
++ ofs += 8*IR(ir->op2)->i;
++ if (checki12(ofs)) {
++ base = ra_alloc1(as, refa, RSET_GPR);
++ emit_dsi(as, RISCVI_ADDI, dest, base, ofs);
++ return;
++ }
++ }
++ base = ra_alloc1(as, ir->op1, RSET_GPR);
++ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
++ emit_sh3add(as, dest, base, idx, RID_TMP);
++}
++
++/* Inlined hash lookup. Specialized for key type and for const keys.
++** The equivalent C code is:
++** Node *n = hashkey(t, key);
++** do {
++** if (lj_obj_equal(&n->key, key)) return &n->val;
++** } while ((n = nextnode(n)));
++** return niltv(L);
++*/
++static void asm_href(ASMState *as, IRIns *ir, IROp merge)
++{
++ RegSet allow = RSET_GPR;
++ int destused = ra_used(ir);
++ Reg dest = ra_dest(as, ir, allow);
++ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
++ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2;
++ Reg cmp64 = RID_NONE;
++ IRRef refkey = ir->op2;
++ IRIns *irkey = IR(refkey);
++ int isk = irref_isk(refkey);
++ IRType1 kt = irkey->t;
++ uint32_t khash;
++ MCLabel l_end, l_loop, l_next;
++ rset_clear(allow, tab);
++ tmp1 = ra_scratch(as, allow);
++ rset_clear(allow, tmp1);
++ tmp2 = ra_scratch(as, allow);
++ rset_clear(allow, tmp2);
++
++ if (irt_isnum(kt)) {
++ key = ra_alloc1(as, refkey, RSET_FPR);
++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
++ } else {
++ /* Allocate cmp64 register used for 64-bit comparisons */
++ if (!isk && irt_isaddr(kt)) {
++ cmp64 = tmp2;
++ } else {
++ int64_t k;
++ if (isk && irt_isaddr(kt)) {
++ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
++ } else {
++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
++ k = ~((int64_t)~irt_toitype(kt) << 47);
++ }
++ cmp64 = ra_allock(as, k, allow);
++ rset_clear(allow, cmp64);
++ }
++ if (!irt_ispri(kt)) {
++ key = ra_alloc1(as, refkey, allow);
++ rset_clear(allow, key);
++ }
++ }
++
++ /* Key not found in chain: jump to exit (if merged) or load niltv. */
++ l_end = emit_label(as);
++ int is_lend_exit = 0;
++ as->invmcp = NULL;
++ if (merge == IR_NE)
++ asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO);
++ else if (destused)
++ emit_loada(as, dest, niltvg(J2G(as->J)));
++
++ /* Follow hash chain until the end. */
++ l_loop = --as->mcp;
++ emit_mv(as, dest, tmp1);
++ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next));
++ l_next = emit_label(as);
++
++ /* Type and value comparison. */
++ if (merge == IR_EQ) { /* Must match asm_guard(). */
++ l_end = asm_exitstub_addr(as, as->snapno);
++ is_lend_exit = 1;
++ }
++ if (irt_isnum(kt)) {
++ emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit);
++ emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key);
++ emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, 0);
++ emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM));
++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47);
++ emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1);
++ } else {
++ emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit);
++ }
++ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
++ *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO)
++ | RISCVF_IMMB((char *)as->mcp-(char *)l_loop);
++ if (!isk && irt_isaddr(kt)) {
++ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
++ emit_ds1s2(as, RISCVI_ADD, tmp2, key, type);
++ rset_clear(allow, type);
++ }
++
++ /* Load main position relative to tab->node into dest. */
++ khash = isk ? ir_khash(as, irkey) : 1;
++ if (khash == 0) {
++ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node));
++ } else {
++ Reg tmphash = tmp1;
++ if (isk)
++ tmphash = ra_allock(as, khash, allow);
++ /* node = tab->node + (idx*32-idx*8) */
++ emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1);
++ lj_assertA(sizeof(Node) == 24, "bad Node size");
++ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1);
++ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3);
++ emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5);
++ emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask
++ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node));
++ emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
++ if (isk) {
++ /* Nothing to do. */
++ } else if (irt_isstr(kt)) {
++ emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid));
++ } else { /* Must match with hash*() in lj_tab.c. */
++ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2);
++ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f);
++ emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2);
++ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f);
++ emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest);
++ emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1);
++ emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f);
++ if (irt_isnum(kt)) {
++ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1);
++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi
++ emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1); // lo
++ emit_ds(as, RISCVI_FMV_X_D, tmp1, key);
++ } else {
++ checkmclim(as);
++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi
++ emit_ext(as, RISCVI_SEXT_W, tmp2, key); // lo
++ emit_ds1s2(as, RISCVI_ADD, tmp1, key, type);
++ }
++ }
++ }
++}
++
++static void asm_hrefk(ASMState *as, IRIns *ir)
++{
++ IRIns *kslot = IR(ir->op2);
++ IRIns *irkey = IR(kslot->op1);
++ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
++ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
++ int bigofs = !checki12(kofs);
++ Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
++ RegSet allow = rset_exclude(RSET_GPR, node);
++ Reg idx = node;
++ int64_t k;
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
++ if (bigofs) {
++ idx = dest;
++ rset_clear(allow, dest);
++ kofs = (int32_t)offsetof(Node, key);
++ } else if (ra_hasreg(dest)) {
++ emit_dsi(as, RISCVI_ADDI, dest, node, ofs);
++ }
++ if (irt_ispri(irkey->t)) {
++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
++ k = ~((int64_t)~irt_toitype(irkey->t) << 47);
++ } else if (irt_isnum(irkey->t)) {
++ k = (int64_t)ir_knum(irkey)->u64;
++ } else {
++ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
++ }
++ asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow));
++ emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs);
++ if (bigofs)
++ emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow));
++}
++
++static void asm_uref(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
++ if (irref_isk(ir->op1) && !guarded) {
++ GCfunc *fn = ir_kfunc(IR(ir->op1));
++ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
++ emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR);
++ } else {
++ if (guarded)
++ asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO);
++ if (ir->o == IR_UREFC)
++ emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
++ else
++ emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v));
++ if (guarded)
++ emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
++ if (irref_isk(ir->op1)) {
++ GCfunc *fn = ir_kfunc(IR(ir->op1));
++ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
++ emit_loada(as, dest, o);
++ } else {
++ emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR),
++ (int32_t)offsetof(GCfuncL, uvptr) +
++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
++ }
++ }
++}
++
++static void asm_fref(ASMState *as, IRIns *ir)
++{
++ UNUSED(as); UNUSED(ir);
++ lj_assertA(!ra_used(ir), "unfused FREF");
++}
++
++static void asm_strref(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_GPR;
++ Reg dest = ra_dest(as, ir, allow);
++ Reg base = ra_alloc1(as, ir->op1, allow);
++ IRIns *irr = IR(ir->op2);
++ int32_t ofs = sizeof(GCstr);
++ rset_clear(allow, base);
++ if (irref_isk(ir->op2) && checki12(ofs + irr->i)) {
++ emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i);
++ } else {
++ emit_dsi(as, RISCVI_ADDI, dest, dest, ofs);
++ emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow));
++ }
++}
++
++/* -- Loads and stores ---------------------------------------------------- */
++
++static RISCVIns asm_fxloadins(IRIns *ir)
++{
++ switch (irt_type(ir->t)) {
++ case IRT_I8: return RISCVI_LB;
++ case IRT_U8: return RISCVI_LBU;
++ case IRT_I16: return RISCVI_LH;
++ case IRT_U16: return RISCVI_LHU;
++ case IRT_NUM: return RISCVI_FLD;
++ case IRT_FLOAT: return RISCVI_FLW;
++ default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW;
++ }
++}
++
++static RISCVIns asm_fxstoreins(IRIns *ir)
++{
++ switch (irt_type(ir->t)) {
++ case IRT_I8: case IRT_U8: return RISCVI_SB;
++ case IRT_I16: case IRT_U16: return RISCVI_SH;
++ case IRT_NUM: return RISCVI_FSD;
++ case IRT_FLOAT: return RISCVI_FSW;
++ default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW;
++ }
++}
++
++static void asm_fload(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_GPR;
++ Reg idx, dest = ra_dest(as, ir, allow);
++ rset_clear(allow, dest);
++ RISCVIns riscvi = asm_fxloadins(ir);
++ int32_t ofs;
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
++ idx = RID_GL;
++ ofs = (ir->op2 << 2) - GG_OFS(g);
++ } else {
++ idx = ra_alloc1(as, ir->op1, allow);
++ if (ir->op2 == IRFL_TAB_ARRAY) {
++ ofs = asm_fuseabase(as, ir->op1);
++ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
++ emit_dsi(as, RISCVI_ADDI, dest, idx, ofs);
++ return;
++ }
++ }
++ ofs = field_ofs[ir->op2];
++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
++ }
++ rset_clear(allow, idx);
++ emit_lso(as, riscvi, dest, idx, ofs);
++}
++
++static void asm_fstore(ASMState *as, IRIns *ir)
++{
++ if (ir->r != RID_SINK) {
++ Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
++ IRIns *irf = IR(ir->op1);
++ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
++ int32_t ofs = field_ofs[irf->op2];
++ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
++ emit_lso(as, asm_fxstoreins(ir), src, idx, ofs);
++ }
++}
++
++static void asm_xload(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
++ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
++ "unaligned XLOAD");
++ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
++}
++
++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
++{
++ if (ir->r != RID_SINK) {
++ Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
++ asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
++ rset_exclude(RSET_GPR, src), ofs);
++ }
++}
++
++#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
++
++static void asm_ahuvload(ASMState *as, IRIns *ir)
++{
++ Reg dest = RID_NONE, type = RID_TMP, idx;
++ RegSet allow = RSET_GPR;
++ int32_t ofs = 0;
++ IRType1 t = ir->t;
++ if (ra_used(ir)) {
++ lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad load type %d", irt_type(ir->t));
++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
++ rset_clear(allow, dest);
++ if (irt_isaddr(t)) {
++ emit_cleartp(as, dest, dest);
++ } else if (irt_isint(t))
++ emit_ext(as, RISCVI_SEXT_W, dest, dest);
++ }
++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
++ rset_clear(allow, idx);
++ if (irt_isnum(t)) {
++ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO);
++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
++ } else {
++ asm_guard(as, RISCVI_BNE, type,
++ ra_allock(as, (int32_t)irt_toitype(t), allow));
++ }
++ if (ra_hasreg(dest)) {
++ if (irt_isnum(t)) {
++ emit_lso(as, RISCVI_FLD, dest, idx, ofs);
++ dest = type;
++ }
++ } else {
++ dest = type;
++ }
++ emit_dsshamt(as, RISCVI_SRAI, type, dest, 47);
++ emit_lso(as, RISCVI_LD, dest, idx, ofs);
++}
++
++static void asm_ahustore(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_GPR;
++ Reg idx, src = RID_NONE, type = RID_NONE;
++ int32_t ofs = 0;
++ if (ir->r == RID_SINK)
++ return;
++ if (irt_isnum(ir->t)) {
++ src = ra_alloc1(as, ir->op2, RSET_FPR);
++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++ emit_lso(as, RISCVI_FSD, src, idx, ofs);
++ } else {
++ Reg tmp = RID_TMP;
++ if (irt_ispri(ir->t)) {
++ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
++ rset_clear(allow, tmp);
++ } else {
++ src = ra_alloc1(as, ir->op2, allow);
++ rset_clear(allow, src);
++ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
++ rset_clear(allow, type);
++ }
++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++ emit_lso(as, RISCVI_SD, tmp, idx, ofs);
++ if (ra_hasreg(src)) {
++ if (irt_isinteger(ir->t)) {
++ emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type);
++ emit_ext(as, RISCVI_ZEXT_W, tmp, src);
++ } else {
++ emit_ds1s2(as, RISCVI_ADD, tmp, src, type);
++ }
++ }
++ }
++}
++
++static void asm_sload(ASMState *as, IRIns *ir)
++{
++ Reg dest = RID_NONE, type = RID_NONE, base;
++ RegSet allow = RSET_GPR;
++ IRType1 t = ir->t;
++ int32_t ofs = 8*((int32_t)ir->op1-2);
++ lj_assertA(checki12(ofs), "sload IR operand out of range");
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
++ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
++ dest = ra_scratch(as, RSET_FPR);
++ asm_tointg(as, ir, dest);
++ t.irt = IRT_NUM; /* Continue with a regular number type check. */
++ } else if (ra_used(ir)) {
++ Reg tmp = RID_NONE;
++ if ((ir->op2 & IRSLOAD_CONVERT))
++ tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
++ lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t),
++ "bad SLOAD type %d", irt_type(t));
++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
++ rset_clear(allow, dest);
++ base = ra_alloc1(as, REF_BASE, allow);
++ rset_clear(allow, base);
++ if (irt_isaddr(t)) { /* Clear type from pointers. */
++ emit_cleartp(as, dest, dest);
++ } else if (ir->op2 & IRSLOAD_CONVERT) {
++ if (irt_isint(t)) {
++ emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp);
++ /* If value is already loaded for type check, move it to FPR. */
++ if ((ir->op2 & IRSLOAD_TYPECHECK))
++ emit_ds(as, RISCVI_FMV_D_X, tmp, dest);
++ else
++ dest = tmp;
++ t.irt = IRT_NUM; /* Check for original type. */
++ } else {
++ emit_ds(as, RISCVI_FCVT_D_W, dest, tmp);
++ dest = tmp;
++ t.irt = IRT_INT; /* Check for original type. */
++ }
++ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
++ /* Sign-extend integers. */
++ emit_ext(as, RISCVI_SEXT_W, dest, dest);
++ }
++ goto dotypecheck;
++ }
++ base = ra_alloc1(as, REF_BASE, allow);
++ rset_clear(allow, base);
++dotypecheck:
++ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
++ type = dest < RID_MAX_GPR ? dest : RID_TMP;
++ if (irt_ispri(t)) {
++ asm_guard(as, RISCVI_BNE, type,
++ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
++ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) {
++ asm_guard(as, RISCVI_BNE, RID_TMP,
++ ra_allock(as, (int32_t)LJ_KEYINDEX, allow));
++ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 32);
++ } else {
++ if (irt_isnum(t)) {
++ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO);
++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM);
++ if (ra_hasreg(dest)) {
++ emit_lso(as, RISCVI_FLD, dest, base, ofs);
++ }
++ } else {
++ asm_guard(as, RISCVI_BNE, RID_TMP,
++ ra_allock(as, (int32_t)irt_toitype(t), allow));
++ }
++ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47);
++ }
++ emit_lso(as, RISCVI_LD, type, base, ofs);
++ } else if (ra_hasreg(dest)) {
++ emit_lso(as, irt_isnum(t) ? RISCVI_FLD :
++ irt_isint(t) ? RISCVI_LW : RISCVI_LD,
++ dest, base, ofs);
++ }
++}
++
++/* -- Allocations --------------------------------------------------------- */
++
++#if LJ_HASFFI
++static void asm_cnew(ASMState *as, IRIns *ir)
++{
++ CTState *cts = ctype_ctsG(J2G(as->J));
++ CTypeID id = (CTypeID)IR(ir->op1)->i;
++ CTSize sz;
++ CTInfo info = lj_ctype_info(cts, id, &sz);
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
++ IRRef args[4];
++ RegSet drop = RSET_SCRATCH;
++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
++ "bad CNEW/CNEWI operands");
++
++ as->gcsteps++;
++ if (ra_hasreg(ir->r))
++ rset_clear(drop, ir->r); /* Dest reg handled below. */
++ ra_evictset(as, drop);
++ if (ra_used(ir))
++ ra_destreg(as, ir, RID_RET); /* GCcdata * */
++
++ /* Initialize immutable cdata object. */
++ if (ir->o == IR_CNEWI) {
++ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
++ emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow),
++ RID_RET, (sizeof(GCcdata)));
++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
++ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
++ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
++ args[0] = ASMREF_L; /* lua_State *L */
++ args[1] = ir->op1; /* CTypeID id */
++ args[2] = ir->op2; /* CTSize sz */
++ args[3] = ASMREF_TMP1; /* CTSize align */
++ asm_gencall(as, ci, args);
++ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
++ return;
++ }
++
++ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
++ emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct)));
++ emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid)));
++ emit_loadk12(as, RID_RET+1, ~LJ_TCDATA);
++ emit_loadk32(as, RID_TMP, id);
++ args[0] = ASMREF_L; /* lua_State *L */
++ args[1] = ASMREF_TMP1; /* MSize size */
++ asm_gencall(as, ci, args);
++ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
++ ra_releasetmp(as, ASMREF_TMP1));
++}
++#endif
++
++/* -- Write barriers ------------------------------------------------------ */
++
++static void asm_tbar(ASMState *as, IRIns *ir)
++{
++ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
++ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
++ Reg link = RID_TMP;
++ MCLabel l_end = emit_label(as);
++ emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist));
++ emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
++ emit_setgl(as, tab, gc.grayagain); // make tab gray again
++ emit_getgl(as, link, gc.grayagain);
++ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, 0); // black: not jump
++ emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP); // mark=0: gray
++ emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
++ emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked)));
++}
++
++static void asm_obar(ASMState *as, IRIns *ir)
++{
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
++ IRRef args[2];
++ MCLabel l_end;
++ Reg obj, val, tmp;
++ /* No need for other object barriers (yet). */
++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue
++ ra_evictset(as, RSET_SCRATCH);
++ l_end = emit_label(as);
++ args[0] = ASMREF_TMP1; /* global_State *g */
++ args[1] = ir->op1; /* TValue *tv */
++ asm_gencall(as, ci, args);
++ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
++ obj = IR(ir->op1)->r;
++ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
++ emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, 0);
++ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, 0); // black: jump
++ emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK);
++ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
++ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
++ emit_lso(as, RISCVI_LBU, tmp, obj,
++ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)));
++ emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked)));
++}
++
++/* -- Arithmetic and logic operations ------------------------------------- */
++
++static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi)
++{
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
++ right = (left >> 8); left &= 255;
++ emit_ds1s2(as, riscvi, dest, left, right);
++}
++
++static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi)
++{
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
++ switch(riscvi) {
++ case RISCVI_FSQRT_S: case RISCVI_FSQRT_D:
++ emit_ds(as, riscvi, dest, left);
++ break;
++ case RISCVI_FMV_S: case RISCVI_FMV_D:
++ case RISCVI_FABS_S: case RISCVI_FABS_D:
++ case RISCVI_FNEG_S: case RISCVI_FNEG_D:
++ emit_ds1s2(as, riscvi, dest, left, left);
++ break;
++ default:
++ lj_assertA(0, "bad fp unary instruction");
++ return;
++ }
++}
++
++static void asm_fpmath(ASMState *as, IRIns *ir)
++{
++ IRFPMathOp fpm = (IRFPMathOp)ir->op2;
++ if (fpm <= IRFPM_TRUNC)
++ asm_callround(as, ir, IRCALL_lj_vm_floor + fpm);
++ else if (fpm == IRFPM_SQRT)
++ asm_fpunary(as, ir, RISCVI_FSQRT_D);
++ else
++ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
++}
++
++static void asm_add(ASMState *as, IRIns *ir)
++{
++ IRType1 t = ir->t;
++ if (irt_isnum(t)) {
++ if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D))
++ asm_fparith(as, ir, RISCVI_FADD_D);
++ return;
++ } else {
++ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA))
++ return;
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ if (irref_isk(ir->op2)) {
++ intptr_t k = get_kval(as, ir->op2);
++ if (checki12(k)) {
++ if (irt_is64(t)) {
++ emit_dsi(as, RISCVI_ADDI, dest, left, k);
++ } else {
++ emit_dsi(as, RISCVI_ADDIW, dest, left, k);
++ }
++ return;
++ }
++ }
++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest,
++ left, right);
++ }
++}
++
++static void asm_sub(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t)) {
++ if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D))
++ asm_fparith(as, ir, RISCVI_FSUB_D);
++ return;
++ } else {
++ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS))
++ return;
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest,
++ left, right);
++ }
++}
++
++static void asm_mul(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t)) {
++ asm_fparith(as, ir, RISCVI_FMUL_D);
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest,
++ left, right);
++ }
++}
++
++static void asm_fpdiv(ASMState *as, IRIns *ir)
++{
++ asm_fparith(as, ir, RISCVI_FDIV_D);
++}
++
++static void asm_neg(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t)) {
++ asm_fpunary(as, ir, RISCVI_FNEG_D);
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest,
++ RID_ZERO, left);
++ }
++}
++
++#define asm_abs(as, ir) asm_fpunary(as, ir, RISCVI_FABS_D)
++
++static void asm_arithov(ASMState *as, IRIns *ir)
++{
++ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
++ lj_assertA(!irt_is64(ir->t), "bad usage");
++ if (irref_isk(ir->op2)) {
++ int k = IR(ir->op2)->i;
++ if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u);
++ if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */
++ left = ra_alloc1(as, ir->op1, RSET_GPR);
++ asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left);
++ emit_dsi(as, RISCVI_ADDI, dest, left, k);
++ if (dest == left) emit_mv(as, RID_TMP, left);
++ return;
++ }
++ }
++ left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
++ right), dest));
++ asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO);
++ emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp);
++ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */
++ emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
++ } else { /* ((dest^left) & (dest^~right)) < 0 */
++ emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right);
++ }
++ emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left);
++ emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right);
++ if (dest == left || dest == right)
++ emit_mv(as, RID_TMP, dest == left ? left : right);
++}
++
++#define asm_addov(as, ir) asm_arithov(as, ir)
++#define asm_subov(as, ir) asm_arithov(as, ir)
++
++static void asm_mulov(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ asm_guard(as, RISCVI_BNE, dest, RID_TMP);
++ emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP); // dest: [31:0]+signextend
++ emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right); // RID_TMP: [63:0]
++}
++
++static void asm_bnot(ASMState *as, IRIns *ir)
++{
++ Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
++ IRIns *irl = IR(ir->op1);
++ if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) {
++ left = ra_alloc2(as, irl, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ emit_ds1s2(as, RISCVI_XNOR, dest, left, right);
++ } else {
++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ emit_ds(as, RISCVI_NOT, dest, left);
++ }
++}
++
++static void asm_bswap(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++ RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left);
++ if (as->flags & JIT_F_RVZbb) {
++ if (!irt_is64(ir->t))
++ emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32);
++ emit_ds(as, RISCVI_REV8, dest, left);
++ } else if (as->flags & JIT_F_RVXThead) {
++ emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW,
++ dest, left);
++ } else if (irt_is64(ir->t)) {
++ Reg tmp1, tmp2, tmp3, tmp4;
++ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1);
++ tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2);
++ tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3);
++ tmp4 = ra_scratch(as, allow);
++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4);
++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3);
++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2);
++ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40);
++ emit_dsshamt(as, RISCVI_SLLI, dest, left, 56);
++ emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3);
++ emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP);
++ emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32);
++ emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24);
++ emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24);
++ emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2);
++ emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1);
++ emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3);
++ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24);
++ emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24);
++ emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1);
++ emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8);
++ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24);
++ emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3);
++ emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u));
++ emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP);
++ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56);
++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00));
++ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u));
++ emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40);
++ } else {
++ Reg tmp1, tmp2;
++ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1);
++ tmp2 = ra_scratch(as, allow);
++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2);
++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1);
++ emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8);
++ emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24);
++ emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2);
++ emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP);
++ emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP);
++ emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24);
++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00));
++ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u));
++ emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8);
++ }
++}
++
++static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left, right;
++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
++ if (irref_isk(ir->op2)) {
++ intptr_t k = get_kval(as, ir->op2);
++ if (checki12(k)) {
++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ emit_dsi(as, riscvik, dest, left, k);
++ return;
++ }
++ } else if (as->flags & JIT_F_RVZbb) {
++ if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) {
++ left = ra_alloc1(as, irl->op1, RSET_GPR);
++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ emit_ds1s2(as, riscvin, dest, right, left);
++ return;
++ } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) {
++ left = ra_alloc1(as, ir->op1, RSET_GPR);
++ right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
++ emit_ds1s2(as, riscvin, dest, left, right);
++ return;
++ }
++ }
++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ emit_ds1s2(as, riscvi, dest, left, right);
++}
++
++#define asm_band(as, ir) asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN)
++#define asm_bor(as, ir) asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN)
++#define asm_bxor(as, ir) asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR)
++
++static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++ uint32_t shmsk = irt_is64(ir->t) ? 63 : 31;
++ if (irref_isk(ir->op2)) { /* Constant shifts. */
++ uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk);
++ switch (riscvik) {
++ case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI:
++ case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW:
++ emit_dsshamt(as, riscvik, dest, left, shift);
++ break;
++ case RISCVI_RORI: case RISCVI_RORIW:
++ emit_roti(as, riscvik, dest, left, RID_TMP, shift);
++ break;
++ default:
++ lj_assertA(0, "bad shift instruction");
++ return;
++ }
++ } else {
++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ switch (riscvi) {
++ case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL:
++ case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW:
++ emit_ds1s2(as, riscvi, dest, left, right);
++ break;
++ case RISCVI_ROR: case RISCVI_ROL:
++ case RISCVI_RORW: case RISCVI_ROLW:
++ emit_rot(as, riscvi, dest, left, right, RID_TMP);
++ break;
++ default:
++ lj_assertA(0, "bad shift instruction");
++ return;
++ }
++ }
++}
++
++#define asm_bshl(as, ir) (irt_is64(ir->t) ? \
++ asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \
++ asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW))
++#define asm_bshr(as, ir) (irt_is64(ir->t) ? \
++ asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \
++ asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW))
++#define asm_bsar(as, ir) (irt_is64(ir->t) ? \
++ asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \
++ asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW))
++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
++#define asm_bror(as, ir) (irt_is64(ir->t) ? \
++ asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \
++ asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW))
++
++static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
++{
++ if (irt_isnum(ir->t)) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
++ right = (left >> 8); left &= 255;
++ emit_ds1s2(as, ismax ? RISCVI_FMAX_D : RISCVI_FMIN_D, dest, left, right);
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ if (as->flags & JIT_F_RVZbb) {
++ emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right);
++ } else {
++ if (as->flags & JIT_F_RVXThead) {
++ if (left == right) {
++ if (dest != left) emit_mv(as, dest, left);
++ } else {
++ if (dest == left) {
++ emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP);
++ } else {
++ emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP);
++ if (dest != right) emit_mv(as, dest, right);
++ }
++ }
++ } else {
++ emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP);
++ if (dest != right) {
++ emit_ds1s2(as, RISCVI_AND, RID_TMP, right, RID_TMP);
++ emit_ds(as, RISCVI_NOT, RID_TMP, RID_TMP);
++ emit_ds1s2(as, RISCVI_AND, dest, left, RID_TMP);
++ } else {
++ emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP);
++ emit_ds(as, RISCVI_NOT, RID_TMP, RID_TMP);
++ emit_ds1s2(as, RISCVI_AND, dest, right, RID_TMP);
++ }
++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1);
++ }
++ emit_ds1s2(as, RISCVI_SLT, RID_TMP,
++ ismax ? left : right, ismax ? right : left);
++ }
++ }
++}
++
++#define asm_min(as, ir) asm_min_max(as, ir, 0)
++#define asm_max(as, ir) asm_min_max(as, ir, 1)
++
++/* -- Comparisons --------------------------------------------------------- */
++
++/* FP comparisons. */
++static void asm_fpcomp(ASMState *as, IRIns *ir)
++{
++ IROp op = ir->o;
++ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
++ right = (left >> 8); left &= 255;
++ asm_guard(as, (op < IR_EQ ? (op&4) : (op&1))
++ ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO);
++ switch (op) {
++ case IR_LT: case IR_UGE:
++ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right);
++ break;
++ case IR_LE: case IR_UGT: case IR_ABC:
++ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right);
++ break;
++ case IR_GT: case IR_ULE:
++ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left);
++ break;
++ case IR_GE: case IR_ULT:
++ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left);
++ break;
++ case IR_EQ: case IR_NE:
++ emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right);
++ break;
++ default:
++ break;
++ }
++}
++
++/* Integer comparisons. */
++static void asm_intcomp(ASMState *as, IRIns *ir)
++{
++ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
++ /* 00 01 10 11 100 101 110 111 */
++ IROp op = ir->o;
++ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
++ if (op == IR_ABC) op = IR_UGT;
++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
++ switch (op) {
++ case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break;
++ case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break;
++ case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break;
++ case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break;
++ default: break;
++ }
++ return;
++ }
++ if (irref_isk(ir->op2)) {
++ intptr_t k = get_kval(as, ir->op2);
++ if ((op&2)) k++;
++ if (checki12(k)) {
++ asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO);
++ emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k);
++ return;
++ }
++ }
++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1),
++ (op&2) ? right : left, (op&2) ? left : right);
++}
++
++static void asm_comp(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t))
++ asm_fpcomp(as, ir);
++ else
++ asm_intcomp(as, ir);
++}
++
++static void asm_equal(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t)) {
++ asm_fpcomp(as, ir);
++ } else {
++ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right);
++ }
++}
++
++/* -- Split register ops -------------------------------------------------- */
++
++/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++static void asm_hiop(ASMState *as, IRIns *ir)
++{
++ /* HIOP is marked as a store because it needs its own DCE logic. */
++ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
++ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
++ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
++ switch ((ir-1)->o) {
++ case IR_CALLN:
++ case IR_CALLL:
++ case IR_CALLS:
++ case IR_CALLXS:
++ if (!uselo)
++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
++ break;
++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
++ }
++}
++
++/* -- Profiling ----------------------------------------------------------- */
++
++static void asm_prof(ASMState *as, IRIns *ir)
++{
++ UNUSED(ir);
++ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO);
++ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
++ emit_lsglptr(as, RISCVI_LBU, RID_TMP,
++ (int32_t)offsetof(global_State, hookmask));
++}
++
++/* -- Stack handling ------------------------------------------------------ */
++
++/* Check Lua stack size for overflow. Use exit handler as fallback. */
++static void asm_stack_check(ASMState *as, BCReg topslot,
++ IRIns *irp, RegSet allow, ExitNo exitno)
++{
++ /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */
++ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
++ ExitNo oldsnap = as->snapno;
++ rset_clear(allow, pbase);
++ as->snapno = exitno;
++ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO);
++ as->snapno = oldsnap;
++ if (allow) {
++ tmp = rset_pickbot(allow);
++ ra_modified(as, tmp);
++ } else { // allow == RSET_EMPTY
++ tmp = RID_RET;
++ emit_lso(as, RISCVI_LD, tmp, RID_SP, 0); /* Restore tmp1 register. */
++ }
++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
++ emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase);
++ emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack));
++ if (pbase == RID_TMP)
++ emit_getgl(as, RID_TMP, jit_base);
++ emit_getgl(as, tmp, cur_L);
++ if (allow == RSET_EMPTY) /* Spill temp register. */
++ emit_lso(as, RISCVI_SD, tmp, RID_SP, 0);
++}
++
++/* Restore Lua stack from on-trace state. */
++static void asm_stack_restore(ASMState *as, SnapShot *snap)
++{
++ SnapEntry *map = &as->T->snapmap[snap->mapofs];
++#ifdef LUA_USE_ASSERT
++ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
++#endif
++ MSize n, nent = snap->nent;
++ /* Store the value of all modified slots to the Lua stack. */
++ for (n = 0; n < nent; n++) {
++ SnapEntry sn = map[n];
++ BCReg s = snap_slot(sn);
++ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
++ IRRef ref = snap_ref(sn);
++ IRIns *ir = IR(ref);
++ if ((sn & SNAP_NORESTORE))
++ continue;
++ if (irt_isnum(ir->t)) {
++ Reg src = ra_alloc1(as, ref, RSET_FPR);
++ emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs);
++ } else {
++ if ((sn & SNAP_KEYINDEX)) {
++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
++ int64_t kki = (int64_t)LJ_KEYINDEX << 32;
++ if (irref_isk(ref)) {
++ emit_lso(as, RISCVI_SD,
++ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow),
++ RID_BASE, ofs);
++ } else {
++ Reg src = ra_alloc1(as, ref, allow);
++ Reg rki = ra_allock(as, kki, rset_exclude(allow, src));
++ emit_lso(as, RISCVI_SD, RID_TMP, RID_BASE, ofs);
++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, rki);
++ }
++ } else {
++ asm_tvstore64(as, RID_BASE, ofs, ref);
++ }
++ }
++ checkmclim(as);
++ }
++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
++}
++
++/* -- GC handling --------------------------------------------------------- */
++
++/* Marker to prevent patching the GC check exit. */
++#define RISCV_NOPATCH_GC_CHECK \
++ (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP))
++
++/* Check GC threshold and do one or more GC steps. */
++static void asm_gc_check(ASMState *as)
++{
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
++ IRRef args[2];
++ MCLabel l_end;
++ Reg tmp;
++ ra_evictset(as, RSET_SCRATCH);
++ l_end = emit_label(as);
++ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
++ asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */
++ *--as->mcp = RISCV_NOPATCH_GC_CHECK;
++ args[0] = ASMREF_TMP1; /* global_State *g */
++ args[1] = ASMREF_TMP2; /* MSize steps */
++ asm_gencall(as, ci, args);
++ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
++ tmp = ra_releasetmp(as, ASMREF_TMP2);
++ emit_loadi(as, tmp, as->gcsteps);
++ /* Jump around GC step if GC total < GC threshold. */
++ emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, 0);
++ emit_getgl(as, tmp, gc.threshold);
++ emit_getgl(as, RID_TMP, gc.total);
++ as->gcsteps = 0;
++ checkmclim(as);
++}
++
++/* -- Loop handling ------------------------------------------------------- */
++
++/* Fixup the loop branch. */
++static void asm_loop_fixup(ASMState *as)
++{
++ MCode *p = as->mctop;
++ MCode *target = as->mcp;
++ ptrdiff_t delta;
++ if (as->loopinv) { /* Inverted loop branch? */
++ delta = (char *)target - (char *)(p - 2);
++ /* asm_guard* already inverted the branch, and patched the final b. */
++ lj_assertA(checki21(delta), "branch target out of range");
++ p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta);
++ } else {
++ /* J */
++ delta = (char *)target - (char *)(p - 1);
++ p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta);
++ }
++}
++
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++ UNUSED(as); /* Nothing to do(?) */
++}
++
++/* -- Head of trace ------------------------------------------------------- */
++
++/* Coalesce BASE register for a root trace. */
++static void asm_head_root_base(ASMState *as)
++{
++ IRIns *ir = IR(REF_BASE);
++ Reg r = ir->r;
++ if (ra_hasreg(r)) {
++ ra_free(as, r);
++ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
++ if (r != RID_BASE)
++ emit_mv(as, r, RID_BASE);
++ }
++}
++
++/* Coalesce BASE register for a side trace. */
++static Reg asm_head_side_base(ASMState *as, IRIns *irp)
++{
++ IRIns *ir = IR(REF_BASE);
++ Reg r = ir->r;
++ if (ra_hasreg(r)) {
++ ra_free(as, r);
++ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
++ if (irp->r == r) {
++ return r; /* Same BASE register already coalesced. */
++ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
++ emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */
++ return irp->r;
++ } else {
++ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
++ }
++ }
++ return RID_NONE;
++}
++
++/* -- Tail of trace ------------------------------------------------------- */
++
++/* Fixup the tail code. */
++static void asm_tail_fixup(ASMState *as, TraceNo lnk)
++{
++ MCode *p = as->mctop;
++ MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
++ int32_t spadj = as->T->spadjust;
++ if (spadj == 0) {
++ p[-3] = RISCVI_NOP;
++ // as->mctop = p-2;
++ } else {
++ /* Patch stack adjustment. */
++ p[-3] = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj);
++ }
++ /* Patch exit jump. */
++ ptrdiff_t delta = (char *)target - (char *)(p - 2);
++ p[-2] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
++ p[-1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
++}
++
++/* Prepare tail of code. */
++static void asm_tail_prep(ASMState *as)
++{
++ MCode *p = as->mctop - 2; /* Leave room for exitstub. */
++ if (as->loopref) {
++ as->invmcp = as->mcp = p;
++ } else {
++ as->mcp = p-1; /* Leave room for stack pointer adjustment. */
++ as->invmcp = NULL;
++ }
++ p[0] = p[1] = RISCVI_NOP; /* Prevent load/store merging. */
++}
++
++/* -- Trace setup --------------------------------------------------------- */
++
++/* Ensure there are enough stack slots for call arguments. */
++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
++{
++ IRRef args[CCI_NARGS_MAX*2];
++ uint32_t i, nargs = CCI_XNARGS(ci);
++ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
++ asm_collectargs(as, ir, ci, args);
++ for (i = 0; i < nargs; i++) {
++ if (args[i] && irt_isfp(IR(args[i])->t)) {
++ if (nfpr > 0) {
++ nfpr--; if(ci->flags & CCI_VARARG) ngpr--;
++ } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--;
++ else nslots += 2;
++ } else {
++ if (ngpr > 0) {
++ ngpr--; if(ci->flags & CCI_VARARG) nfpr--;
++ } else nslots += 2;
++ }
++ }
++ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
++ as->evenspill = nslots;
++ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
++}
++
++static void asm_setup_target(ASMState *as)
++{
++ asm_sparejump_setup(as);
++ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
++}
++
++/* -- Trace patching ------------------------------------------------------ */
++
++/* Patch exit jumps of existing machine code to a new target. */
++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
++{
++ MCode *p = T->mcode;
++ MCode *pe = (MCode *)((char *)p + T->szmcode);
++ MCode *px = exitstub_trace_addr(T, exitno);
++ MCode *cstart = NULL;
++ MCode *mcarea = lj_mcode_patch(J, p, 0);
++
++ for (; p < pe; p++) {
++ /* Look for exitstub branch, replace with branch to target. */
++ ptrdiff_t odelta = (char *)px - (char *)(p+1),
++ ndelta = (char *)target - (char *)(p+1);
++ if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 &&
++ ((p[0] & 0x0000007fu) == 0x63u) &&
++ ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 &&
++ ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) ||
++ (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 &&
++ ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) {
++ lj_assertJ(checki32(ndelta), "branch target out of range");
++ /* Patch jump, if within range. */
++ patchbranch:
++ if (checki21(ndelta)) { /* Patch jump */
++ p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta);
++ if (!cstart) cstart = p + 1;
++ } else { /* Branch out of range. Use spare jump slot in mcarea. */
++ MCode *mcjump = asm_sparejump_use(mcarea, target);
++ if (mcjump) {
++ lj_mcode_sync(mcjump, mcjump+2);
++ ndelta = (char *)mcjump - (char *)(p+1);
++ if (checki21(ndelta)) {
++ goto patchbranch;
++ } else {
++ lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
++ }
++ }
++ /* Ignore jump slot overflow. Child trace is simply not attached. */
++ }
++ } else if (p+2 == pe) {
++ if (p[0] == RISCVI_NOP && p[1] == RISCVI_NOP) {
++ ptrdiff_t delta = (char *)target - (char *)p;
++ lj_assertJ(checki32(delta), "jump target out of range");
++ p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
++ p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
++ if (!cstart) cstart = p;
++ }
++ }
++ }
++ if (cstart) lj_mcode_sync(cstart, px+1);
++ lj_mcode_patch(J, mcarea, 1);
++}
+--- a/src/lj_ccall.c
++++ b/src/lj_ccall.c
+@@ -687,6 +687,97 @@
+ if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
+ }
+
++#elif LJ_TARGET_RISCV64
++/* -- RISC-V lp64d calling conventions ------------------------------------ */
++
++#define CCALL_HANDLE_STRUCTRET \
++ /* Return structs of size > 16 by reference. */ \
++ cc->retref = !(sz <= 16); \
++ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
++
++#define CCALL_HANDLE_STRUCTRET2 \
++ unsigned int cl = ccall_classify_struct(cts, ctr); \
++ if ((cl & 4) && (cl >> 8) <= 2) { \
++ CTSize i = (cl >> 8) - 1; \
++ do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
++ } else { \
++ if (cl > 1) { \
++ sp = (uint8_t *)&cc->fpr[0]; \
++ if ((cl >> 8) > 2) \
++ sp = (uint8_t *)&cc->gpr[0]; \
++ } \
++ memcpy(dp, sp, ctr->size); \
++ } \
++
++#define CCALL_HANDLE_COMPLEXRET \
++ /* Complex values are returned in 1 or 2 FPRs. */ \
++ cc->retref = 0;
++
++#define CCALL_HANDLE_COMPLEXRET2 \
++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
++ ((float *)dp)[0] = cc->fpr[0].f; \
++ ((float *)dp)[1] = cc->fpr[1].f; \
++ } else { /* Copy complex double from FPRs. */ \
++ ((double *)dp)[0] = cc->fpr[0].d; \
++ ((double *)dp)[1] = cc->fpr[1].d; \
++ }
++
++#define CCALL_HANDLE_COMPLEXARG \
++ /* Pass long double complex by reference. */ \
++ if (sz == 2*sizeof(long double)) { \
++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
++ sz = CTSIZE_PTR; \
++ } \
++ /* Pass complex in two FPRs or on stack. */ \
++ else if (sz == 2*sizeof(float)) { \
++ isfp = 2; \
++ sz = 2*CTSIZE_PTR; \
++ } else { \
++ isfp = 1; \
++ sz = 2*CTSIZE_PTR; \
++ }
++
++#define CCALL_HANDLE_RET \
++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
++ sp = (uint8_t *)&cc->fpr[0].f;
++
++#define CCALL_HANDLE_STRUCTARG \
++ /* Pass structs of size >16 by reference. */ \
++ unsigned int cl = ccall_classify_struct(cts, d); \
++ nff = cl >> 8; \
++ if (sz > 16) { \
++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
++ sz = CTSIZE_PTR; \
++ } \
++ /* Pass struct in FPRs. */ \
++ if (cl > 1) { \
++ isfp = (cl & 4) ? 2 : 1; \
++ }
++
++
++#define CCALL_HANDLE_REGARG \
++ if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \
++ int n2 = ctype_isvector(d->info) ? 1 : \
++ isfp == 1 ? n : 2; \
++ if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
++ dp = &cc->fpr[nfpr]; \
++ nfpr += n2; \
++ goto done; \
++ } else { \
++ if (ngpr + n2 <= maxgpr) { \
++ dp = &cc->gpr[ngpr]; \
++ ngpr += n2; \
++ goto done; \
++ } \
++ } \
++ } else { /* Try to pass argument in GPRs. */ \
++ if (ngpr + n <= maxgpr) { \
++ dp = &cc->gpr[ngpr]; \
++ ngpr += n; \
++ goto done; \
++ } \
++ }
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+@@ -1047,6 +1138,51 @@
+
+ #endif
+
++/* -- RISC-V ABI struct classification ---------------------------- */
++
++#if LJ_TARGET_RISCV64
++
++static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
++{
++ CTSize sz = ct->size;
++ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
++ while (ct->sib) {
++ CType *sct;
++ ct = ctype_get(cts, ct->sib);
++ if (ctype_isfield(ct->info)) {
++ sct = ctype_rawchild(cts, ct);
++ if (ctype_isfp(sct->info)) {
++ r |= sct->size;
++ if (!isu) n++; else if (n == 0) n = 1;
++ } else if (ctype_iscomplex(sct->info)) {
++ r |= (sct->size >> 1);
++ if (!isu) n += 2; else if (n < 2) n = 2;
++ } else if (ctype_isstruct(sct->info)) {
++ goto substruct;
++ } else {
++ goto noth;
++ }
++ } else if (ctype_isbitfield(ct->info)) {
++ goto noth;
++ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
++ sct = ctype_rawchild(cts, ct);
++ substruct:
++ if (sct->size > 0) {
++ unsigned int s = ccall_classify_struct(cts, sct);
++ if (s <= 1) goto noth;
++ r |= (s & 255);
++ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
++ }
++ }
++ }
++ if ((r == 4 || r == 8) && n <= 4)
++ return r + (n << 8);
++noth: /* Not a homogeneous float/double aggregate. */
++ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
++}
++
++#endif
++
+ /* -- Common C call handling ---------------------------------------------- */
+
+ /* Infer the destination CTypeID for a vararg argument. */
+@@ -1093,6 +1229,10 @@
+ #endif
+ #endif
+
++#if LJ_TARGET_RISCV64
++ int nff = 0;
++#endif
++
+ /* Clear unused regs to get some determinism in case of misdeclaration. */
+ memset(cc->gpr, 0, sizeof(cc->gpr));
+ #if CCALL_NUM_FPR
+@@ -1282,7 +1422,11 @@
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+ #endif
+-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
++#if LJ_TARGET_RISCV64
++ if (isfp && d->size == sizeof(float))
++ ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */
++#endif
++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
+ #if LJ_TARGET_MIPS64
+ || (isfp && nsp == 0)
+@@ -1322,6 +1466,14 @@
+ CTSize i = (sz >> 2) - 1;
+ do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
+ }
++#elif LJ_TARGET_RISCV64
++ if (isfp == 2 && nff <= 2) {
++ /* Split complex float into separate registers. */
++ CTSize i = (sz >> 2) - 1;
++ do {
++ ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i];
++ } while (i--);
++ }
+ #else
+ UNUSED(isfp);
+ #endif
+@@ -1331,7 +1483,7 @@
+ if ((int32_t)nsp < 0) nsp = 0;
+ #endif
+
+-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
++#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64
+ cc->nfpr = nfpr; /* Required for vararg functions. */
+ #endif
+ cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
+--- a/src/lj_ccall.h
++++ b/src/lj_ccall.h
+@@ -157,6 +157,21 @@
+ float f;
+ } FPRArg;
+
++#elif LJ_TARGET_RISCV64
++
++#define CCALL_NARG_GPR 8
++#define CCALL_NARG_FPR 8
++#define CCALL_NRET_GPR 2
++#define CCALL_NRET_FPR 2
++#define CCALL_SPS_EXTRA 3
++#define CCALL_SPS_FREE 1
++
++typedef intptr_t GPRArg;
++typedef union FPRArg {
++ double d;
++ struct { LJ_ENDIAN_LOHI(float f; , float g;) };
++} FPRArg;
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+@@ -204,7 +219,7 @@
+ uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
+ #elif LJ_TARGET_ARM64
+ void *retp; /* Aggregate return pointer in x8. */
+-#elif LJ_TARGET_PPC
++#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64
+ uint8_t nfpr; /* Number of arguments in FPRs. */
+ #endif
+ #if LJ_32
+--- a/src/lj_ccallback.c
++++ b/src/lj_ccallback.c
+@@ -91,6 +91,10 @@
+
+ #define CALLBACK_MCODE_HEAD 52
+
++#elif LJ_TARGET_RISCV64
++
++#define CALLBACK_MCODE_HEAD 68
++
+ #else
+
+ /* Missing support for this architecture. */
+@@ -293,6 +297,39 @@
+ }
+ return p;
+ }
++#elif LJ_TARGET_RISCV64
++static void *callback_mcode_init(global_State *g, uint32_t *page)
++{
++ uint32_t *p = page;
++ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
++ uintptr_t ug = (uintptr_t)(void *)g;
++ uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL;
++ uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL;
++ MSize slot;
++ *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi));
++ *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi));
++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi));
++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi));
++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21);
++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21);
++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff);
++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff);
++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10);
++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10);
++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff);
++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff);
++ *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0);
++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
++ *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot);
++ *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p));
++ p++;
++ }
++ return p;
++}
+ #else
+ /* Missing support for this architecture. */
+ #define callback_mcode_init(g, p) (p)
+@@ -571,6 +608,31 @@
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ ((float *)dp)[1] = *(float *)dp;
+
++#elif LJ_TARGET_RISCV64
++
++#define CALLBACK_HANDLE_REGARG \
++ if (isfp) { \
++ if (nfpr + n <= CCALL_NARG_FPR) { \
++ sp = &cts->cb.fpr[nfpr]; \
++ nfpr += n; \
++ goto done; \
++ } else if (ngpr + n <= maxgpr) { \
++ sp = &cts->cb.gpr[ngpr]; \
++ ngpr += n; \
++ goto done; \
++ } \
++ } else { \
++ if (ngpr + n <= maxgpr) { \
++ sp = &cts->cb.gpr[ngpr]; \
++ ngpr += n; \
++ goto done; \
++ } \
++ }
++
++#define CALLBACK_HANDLE_RET \
++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
++ ((float *)dp)[1] = *(float *)dp;
++
+ #elif LJ_TARGET_S390X
+
+ #define CALLBACK_HANDLE_REGARG \
+@@ -735,7 +797,7 @@
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+ #endif
+-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
+ /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
+ if (ctr->size <= 4 &&
+ (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
+--- /dev/null
++++ b/src/lj_emit_riscv.h
+@@ -0,0 +1,519 @@
++/*
++** RISC-V instruction emitter.
++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++static intptr_t get_k64val(ASMState *as, IRRef ref)
++{
++ IRIns *ir = IR(ref);
++ if (ir->o == IR_KINT64) {
++ return (intptr_t)ir_kint64(ir)->u64;
++ } else if (ir->o == IR_KGC) {
++ return (intptr_t)ir_kgc(ir);
++ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
++ return (intptr_t)ir_kptr(ir);
++ } else {
++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
++ "bad 64 bit const IR op %d", ir->o);
++ return ir->i; /* Sign-extended. */
++ }
++}
++
++#define get_kval(as, ref) get_k64val(as, ref)
++
++/* -- Emit basic instructions --------------------------------------------- */
++
++static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2)
++{
++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2);
++}
++
++#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0)
++#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2)
++#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2)
++
++static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3)
++{
++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3);
++}
++
++#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3)
++
++static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i)
++{
++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff);
++}
++
++#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i)
++#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i)
++#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f)
++
++static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i)
++{
++ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff);
++}
++
++#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i)
++
++/*
++static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i)
++{
++ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe);
++}
++*/
++
++static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i)
++{
++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff);
++}
++
++#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i)
++
++/*
++static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i)
++{
++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe);
++}
++*/
++
++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
++static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
++static Reg ra_scratch(ASMState *as, RegSet allow);
++
++static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs)
++{
++ lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs);
++ switch (riscvi) {
++ case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB:
++ case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU:
++ case RISCVI_FLW: case RISCVI_FLD:
++ emit_dsi(as, riscvi, data, base, ofs);
++ break;
++ case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB:
++ case RISCVI_FSW: case RISCVI_FSD:
++ emit_s1s2i(as, riscvi, base, data, ofs);
++ break;
++ default: lj_assertA(0, "invalid lso"); break;
++ }
++}
++
++static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp,
++ int32_t shamt)
++{
++ if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) {
++ if (as->flags & JIT_F_RVXThead) switch (riscvi) {
++ case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break;
++ case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break;
++ default: lj_assertA(0, "invalid roti op"); break;
++ }
++ emit_dsshamt(as, riscvi, rd, rs1, shamt);
++ } else {
++ RISCVIns ai, bi;
++ int32_t shwid, shmsk;
++ switch (riscvi) {
++ case RISCVI_RORI:
++ ai = RISCVI_SRLI, bi = RISCVI_SLLI;
++ shwid = 64, shmsk = 63;
++ break;
++ case RISCVI_RORIW:
++ ai = RISCVI_SRLIW, bi = RISCVI_SLLIW;
++ shwid = 32, shmsk = 31;
++ break;
++ default:
++ lj_assertA(0, "invalid roti op");
++ return;
++ }
++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
++ emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk);
++ emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk);
++ }
++}
++
++static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp)
++{
++ if (as->flags & JIT_F_RVZbb) {
++ emit_ds1s2(as, riscvi, rd, rs1, rs2);
++ } else {
++ RISCVIns sai, sbi;
++ switch (riscvi) {
++ case RISCVI_ROL:
++ sai = RISCVI_SLL, sbi = RISCVI_SRL;
++ break;
++ case RISCVI_ROR:
++ sai = RISCVI_SRL, sbi = RISCVI_SLL;
++ break;
++ case RISCVI_ROLW:
++ sai = RISCVI_SLLW, sbi = RISCVI_SRLW;
++ break;
++ case RISCVI_RORW:
++ sai = RISCVI_SRLW, sbi = RISCVI_SLLW;
++ break;
++ default:
++ lj_assertA(0, "invalid rot op");
++ return;
++ }
++ if (rd == rs2) {
++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
++ emit_ds1s2(as, sbi, tmp, rs1, tmp);
++ emit_ds1s2(as, sai, rd, rs1, rs2);
++ emit_ds2(as, RISCVI_NEG, tmp, rs2);
++ } else {
++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
++ emit_ds1s2(as, sai, rd, rs1, rs2);
++ emit_ds1s2(as, sbi, tmp, rs1, tmp);
++ emit_ds2(as, RISCVI_NEG, tmp, rs2);
++ }
++ }
++}
++
++static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1)
++{
++ if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) ||
++ (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) {
++ emit_ds(as, riscvi, rd, rs1);
++ } else if (as->flags & JIT_F_RVXThead) {
++ uint32_t hi, sext;
++ switch (riscvi) {
++ case RISCVI_ZEXT_B:
++ case RISCVI_SEXT_W:
++ emit_ds(as, riscvi, rd, rs1);
++ return;
++ case RISCVI_ZEXT_H:
++ hi = 15, sext = 0;
++ break;
++ case RISCVI_ZEXT_W:
++ hi = 31, sext = 0;
++ break;
++ case RISCVI_SEXT_B:
++ hi = 7, sext = 1;
++ break;
++ case RISCVI_SEXT_H:
++ hi = 15, sext = 1;
++ break;
++ default:
++ lj_assertA(0, "invalid ext op");
++ return;
++ }
++ emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU,
++ rd, rs1, hi << 6);
++ } else {
++ RISCVIns sli, sri;
++ int32_t shamt;
++ switch (riscvi) {
++ case RISCVI_ZEXT_B:
++ case RISCVI_SEXT_W:
++ emit_ds(as, riscvi, rd, rs1);
++ return;
++ case RISCVI_ZEXT_H:
++ sli = RISCVI_SLLI, sri = RISCVI_SRLI;
++ shamt = 48;
++ break;
++ case RISCVI_ZEXT_W:
++ sli = RISCVI_SLLI, sri = RISCVI_SRLI;
++ shamt = 32;
++ break;
++ case RISCVI_SEXT_B:
++ sli = RISCVI_SLLI, sri = RISCVI_SRAI;
++ shamt = 56;
++ break;
++ case RISCVI_SEXT_H:
++ sli = RISCVI_SLLI, sri = RISCVI_SRAI;
++ shamt = 48;
++ break;
++ default:
++ lj_assertA(0, "invalid ext op");
++ return;
++ }
++ emit_dsshamt(as, sri, rd, rd, shamt);
++ emit_dsshamt(as, sli, rd, rs1, shamt);
++ }
++}
++
++static void emit_cleartp(ASMState *as, Reg rd, Reg rs1)
++{
++ if (as->flags & JIT_F_RVXThead) {
++ emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6);
++ } else {
++ emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17);
++ emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17);
++ }
++}
++
++/*
++static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp)
++{
++ if (as->flags & JIT_F_RVZbb) {
++ emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2);
++ } else {
++ emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp);
++ emit_ds(as, RISCVI_NOT, tmp, rs2);
++ }
++}
++*/
++
++/*
++static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp)
++{
++ if (as->flags & JIT_F_RVZbb) {
++ emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2);
++ } else {
++ emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp);
++ emit_ds(as, RISCVI_NOT, tmp, rs2);
++ }
++}
++*/
++
++static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2)
++{
++ if (as->flags & JIT_F_RVZbb) {
++ emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2);
++ } else {
++ emit_ds(as, RISCVI_NOT, rd, rd);
++ emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2);
++ }
++}
++
++static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt)
++{
++ if (as->flags & JIT_F_RVZba) {
++ switch (shamt) {
++ case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break;
++ case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break;
++ case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break;
++ default: return;
++ }
++ } else if (as->flags & JIT_F_RVXThead) {
++ emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2);
++ } else {
++ emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp);
++ emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt);
++ }
++}
++
++#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1)
++#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2)
++#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3)
++
++static void emit_loadk12(ASMState *as, Reg rd, int32_t i)
++{
++ emit_di(as, RISCVI_ADDI, rd, i);
++}
++
++static void emit_loadk32(ASMState *as, Reg rd, int32_t i)
++{
++ if (checki12((int64_t)i)) {
++ emit_loadk12(as, rd, i);
++ } else {
++ if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0))
++ emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i));
++ else
++ emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i));
++ emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i));
++ }
++}
++
++/* -- Emit loads/stores --------------------------------------------------- */
++
++/* Prefer rematerialization of BASE/L from global_State over spills. */
++#define emit_canremat(ref) ((ref) <= REF_BASE)
++
++
++/* Load a 32 bit constant into a GPR. */
++#define emit_loadi(as, r, i) emit_loadk32(as, r, i);
++
++/* Load a 64 bit constant into a GPR. */
++static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
++{
++ if (checki32((int64_t)u64)) {
++ emit_loadk32(as, r, (int32_t)u64);
++ } else {
++ uint32_t lo32 = u64 & 0xfffffffful;
++ RISCVIns instrs[7] = {0};
++ int shamt = 0, step = 0;
++ for(int bit = 0; bit < 32; bit++) {
++ if (lo32 & (1u << bit)) {
++ if (shamt) instrs[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt);
++ int inc = bit+10 > 31 ? 31-bit : 10;
++ bit += inc, shamt = inc+1;
++ uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1);
++ uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0);
++ instrs[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload);
++ } else shamt++;
++ }
++ if (shamt) instrs[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt);
++
++ if (step < 6) {
++ for(int i = 0; i < step; i++)
++ *--as->mcp = instrs[i];
++ } else {
++ emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff);
++ emit_dsshamt(as, RISCVI_SLLI, r, r, 10);
++ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff);
++ emit_dsshamt(as, RISCVI_SLLI, r, r, 11);
++ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff);
++ emit_dsshamt(as, RISCVI_SLLI, r, r, 11);
++ }
++
++ uint32_t hi32 = u64 >> 32;
++ if (hi32 & 0xfff) emit_loadk32(as, r, hi32);
++ else emit_du(as, RISCVI_LUI, r, hi32 >> 12);
++ }
++}
++
++#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
++
++/* Get/set from constant pointer. */
++static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow)
++{
++ emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0);
++}
++
++/* Load 64 bit IR constant into register. */
++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
++{
++ const uint64_t *k = &ir_k64(ir)->u64;
++ Reg r64 = r;
++ if (rset_test(RSET_FPR, r)) {
++ r64 = RID_TMP;
++ emit_ds(as, RISCVI_FMV_D_X, r, r64);
++ }
++ emit_loadu64(as, r64, *k);
++}
++
++/* Get/set global_State fields. */
++static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs)
++{
++ emit_lso(as, riscvi, r, RID_GL, ofs);
++}
++
++#define emit_getgl(as, r, field) \
++ emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field))
++#define emit_setgl(as, r, field) \
++ emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field))
++
++/* Trace number is determined from per-trace exit stubs. */
++#define emit_setvmstate(as, i) UNUSED(i)
++
++/* -- Emit control-flow instructions -------------------------------------- */
++
++/* Label for internal jumps. */
++typedef MCode *MCLabel;
++
++/* Return label pointing to current PC. */
++#define emit_label(as) ((as)->mcp)
++
++static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump)
++{
++ MCode *p = as->mcp;
++ ptrdiff_t delta = (char *)target - (char *)(p - 1);
++ // lj_assertA(((delta + 0x10000) >> 13) == 0, "branch target out of range"); /* B */
++ lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); /* ^B+J */
++ if (checki13(delta) && !jump) {
++ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta);
++ *--p = RISCVI_NOP;
++ } else {
++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */
++ *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8);
++ }
++ as->mcp = p;
++}
++
++static void emit_jmp(ASMState *as, MCode *target)
++{
++ MCode *p = as->mcp;
++ ptrdiff_t delta = (char *)target - (char *)(p - 2);
++ // lj_assertA(((delta + 0x100000) >> 21) == 0, "jump target out of range"); /* J */
++ lj_assertA(checki32(delta), "jump target out of range"); /* AUIPC+JALR */
++ if (checki21(delta)) {
++ *--p = RISCVI_NOP;
++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta);
++ } else {
++ *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
++ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
++ }
++ as->mcp = p;
++}
++
++#define emit_mv(as, dst, src) \
++ emit_ds(as, RISCVI_MV, (dst), (src))
++
++static void emit_call(ASMState *as, void *target, int needcfa)
++{
++ MCode *p = as->mcp;
++ ptrdiff_t delta = (char *)target - (char *)(p - 2);
++ if (checki21(delta)) {
++ *--p = RISCVI_NOP;
++ *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta);
++ } else if (checki32(delta)) {
++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
++ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
++ needcfa = 1;
++ } else {
++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0);
++ needcfa = 2;
++ }
++ as->mcp = p;
++ if (needcfa > 1)
++ ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
++}
++
++/* -- Emit generic operations --------------------------------------------- */
++
++/* Generic move between two regs. */
++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
++{
++ if (src < RID_MAX_GPR && dst < RID_MAX_GPR)
++ emit_mv(as, dst, src);
++ else if (src < RID_MAX_GPR)
++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src);
++ else if (dst < RID_MAX_GPR)
++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src);
++ else
++ emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src);
++}
++
++/* Emit an arithmetic operation with a constant operand. */
++static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src,
++ Reg tmp, intptr_t k)
++{
++ if (checki12(k)) emit_dsi(as, riscvi, dest, src, k);
++ else {
++ switch (riscvi) {
++ case RISCVI_ADDI: riscvi = RISCVI_ADD; break;
++ case RISCVI_XORI: riscvi = RISCVI_XOR; break;
++ case RISCVI_ORI: riscvi = RISCVI_OR; break;
++ case RISCVI_ANDI: riscvi = RISCVI_AND; break;
++ default: lj_assertA(0, "NYI arithmetic RISCVIns"); return;
++ }
++ emit_ds1s2(as, riscvi, dest, src, tmp);
++ emit_loadu64(as, tmp, (uintptr_t)k);
++ }
++}
++
++/* Generic load of register with base and (small) offset address. */
++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
++{
++ if (r < RID_MAX_GPR)
++ emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs);
++ else
++ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs);
++}
++
++/* Generic store of register with base and (small) offset address. */
++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
++{
++ if (r < RID_MAX_GPR)
++ emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs);
++ else
++ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs);
++}
++
++/* Add offset to pointer. */
++static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
++{
++ if (ofs)
++ emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs);
++}
++
++
++#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
+--- a/src/lj_frame.h
++++ b/src/lj_frame.h
+@@ -281,6 +281,15 @@
+ #define CFRAME_OFS_PC 168
+ #define CFRAME_OFS_MULTRES 160
+ #define CFRAME_SIZE 240
++#elif LJ_TARGET_RISCV64
++#define CFRAME_OFS_ERRF 252
++#define CFRAME_OFS_NRES 248
++#define CFRAME_OFS_PREV 240
++#define CFRAME_OFS_L 232
++#define CFRAME_OFS_PC 224
++#define CFRAME_OFS_MULTRES 0
++#define CFRAME_SIZE 256
++#define CFRAME_SHIFT_MULTRES 3
+ /*
+ ** TODO: it would be good if we always decoded param*8 like
+ ** the RISC architectures do. If so then SHIFT_MULTRES will
+--- a/src/lj_gdbjit.c
++++ b/src/lj_gdbjit.c
+@@ -306,6 +306,9 @@
+ #elif LJ_TARGET_MIPS
+ DW_REG_SP = 29,
+ DW_REG_RA = 31,
++#elif LJ_TARGET_RISCV64
++ DW_REG_SP = 2,
++ DW_REG_RA = 1,
+ #else
+ #error "Unsupported target architecture"
+ #endif
+@@ -383,6 +386,8 @@
+ .machine = 20,
+ #elif LJ_TARGET_MIPS
+ .machine = 8,
++#elif LJ_TARGET_RISCV64
++ .machine = 243,
+ #else
+ #error "Unsupported target architecture"
+ #endif
+@@ -591,6 +596,16 @@
+ for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); }
+ for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); }
+ }
++#elif LJ_TARGET_RISCV64
++ {
++ int i;
++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); }
++ DB(DW_CFA_offset|9); DUV(17);
++ DB(DW_CFA_offset|8); DUV(18);
++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); }
++ DB(DW_CFA_offset|32|9); DUV(29);
++ DB(DW_CFA_offset|32|8); DUV(30);
++ }
+ #else
+ #error "Unsupported target architecture"
+ #endif
+--- a/src/lj_jit.h
++++ b/src/lj_jit.h
+@@ -68,6 +68,15 @@
+ #endif
+ #endif
+
++#elif LJ_TARGET_RISCV64
++
++#define JIT_F_RVC (JIT_F_CPU << 0)
++#define JIT_F_RVZba (JIT_F_CPU << 1)
++#define JIT_F_RVZbb (JIT_F_CPU << 2)
++#define JIT_F_RVXThead (JIT_F_CPU << 3)
++
++#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006XThead"
++
+ #else
+
+ #define JIT_F_CPUSTRING ""
+--- a/src/lj_mcode.c
++++ b/src/lj_mcode.c
+@@ -38,6 +38,12 @@
+ void sys_icache_invalidate(void *start, size_t len);
+ #endif
+
++#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX
++#include <unistd.h>
++#include <sys/syscall.h>
++#include <sys/cachectl.h>
++#endif
++
+ /* Synchronize data/instruction cache. */
+ void lj_mcode_sync(void *start, void *end)
+ {
+@@ -52,6 +58,17 @@
+ sys_icache_invalidate(start, (char *)end-(char *)start);
+ #elif LJ_TARGET_PPC
+ lj_vm_cachesync(start, end);
++#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX
++#if (defined(__GNUC__) || defined(__clang__))
++ __asm__ volatile("fence rw, rw");
++#else
++ lj_vm_fence_rw_rw();
++#endif
++#ifdef __GLIBC__
++ __riscv_flush_icache(start, end, 0);
++#else
++ syscall(__NR_riscv_flush_icache, start, end, 0UL);
++#endif
+ #elif defined(__GNUC__) || defined(__clang__)
+ __clear_cache(start, end);
+ #else
+--- a/src/lj_target.h
++++ b/src/lj_target.h
+@@ -55,7 +55,7 @@
+ /* Bitset for registers. 32 registers suffice for most architectures.
+ ** Note that one set holds bits for both GPRs and FPRs.
+ */
+-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64
+ typedef uint64_t RegSet;
+ #define RSET_BITS 6
+ #define rset_picktop_(rs) ((Reg)lj_fls64(rs))
+@@ -143,6 +143,8 @@
+ #include "lj_target_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "lj_target_mips.h"
++#elif LJ_TARGET_RISCV64
++#include "lj_target_riscv.h"
+ #elif LJ_TARGET_S390X
+ #include "lj_target_s390x.h"
+ #else
+--- /dev/null
++++ b/src/lj_target_riscv.h
+@@ -0,0 +1,509 @@
++/*
++** Definitions for RISC-V CPUs.
++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#ifndef _LJ_TARGET_RISCV_H
++#define _LJ_TARGET_RISCV_H
++
++/* -- Registers IDs ------------------------------------------------------- */
++
++#if LJ_ARCH_EMBEDDED
++#define GPRDEF(_) \
++ _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \
++ _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15)
++#else
++#define GPRDEF(_) \
++ _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \
++ _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
++ _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
++ _(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31)
++#endif
++#if LJ_SOFTFP
++#define FPRDEF(_)
++#else
++#define FPRDEF(_) \
++ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
++ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
++ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
++ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
++#endif
++#define VRIDDEF(_)
++
++#define RIDENUM(name) RID_##name,
++
++enum {
++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
++ RID_MAX,
++ RID_ZERO = RID_X0,
++ RID_TMP = RID_RA,
++ RID_GP = RID_X3,
++ RID_TP = RID_X4,
++
++ /* Calling conventions. */
++ RID_RET = RID_X10,
++#if LJ_LE
++ RID_RETHI = RID_X11,
++ RID_RETLO = RID_X10,
++#else
++ RID_RETHI = RID_X10,
++ RID_RETLO = RID_X11,
++#endif
++#if LJ_SOFTFP
++ RID_FPRET = RID_X10,
++#else
++ RID_FPRET = RID_F10,
++#endif
++ RID_CFUNCADDR = RID_X5,
++
++ /* These definitions must match with the *.dasc file(s): */
++ RID_BASE = RID_X18, /* Interpreter BASE. */
++ RID_LPC = RID_X20, /* Interpreter PC. */
++ RID_GL = RID_X21, /* Interpreter GL. */
++ RID_LREG = RID_X23, /* Interpreter L. */
++
++ /* Register ranges [min, max) and number of registers. */
++ RID_MIN_GPR = RID_X0,
++ RID_MAX_GPR = RID_X31+1,
++ RID_MIN_FPR = RID_MAX_GPR,
++#if LJ_SOFTFP
++ RID_MAX_FPR = RID_MIN_FPR,
++#else
++ RID_MAX_FPR = RID_F31+1,
++#endif
++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */
++};
++
++#define RID_NUM_KREF RID_NUM_GPR
++#define RID_MIN_KREF RID_X0
++
++/* -- Register sets ------------------------------------------------------- */
++
++/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */
++#define RSET_FIXED \
++ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
++ RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL))
++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
++#if LJ_SOFTFP
++#define RSET_FPR 0
++#else
++#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
++#endif
++
++#define RSET_ALL (RSET_GPR|RSET_FPR)
++#define RSET_INIT RSET_ALL
++
++#define RSET_SCRATCH_GPR \
++ (RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\
++ RSET_RANGE(RID_X10, RID_X17+1))
++
++#if LJ_SOFTFP
++#define RSET_SCRATCH_FPR 0
++#else
++#define RSET_SCRATCH_FPR \
++ (RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\
++ RSET_RANGE(RID_F28, RID_F31+1))
++#endif
++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
++
++#define REGARG_FIRSTGPR RID_X10
++#define REGARG_LASTGPR RID_X17
++#define REGARG_NUMGPR 8
++
++#if LJ_ABI_SOFTFP
++#define REGARG_FIRSTFPR 0
++#define REGARG_LASTFPR 0
++#define REGARG_NUMFPR 0
++#else
++#define REGARG_FIRSTFPR RID_F10
++#define REGARG_LASTFPR RID_F17
++#define REGARG_NUMFPR 8
++#endif
++
++/* -- Spill slots --------------------------------------------------------- */
++
++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
++**
++** SPS_FIXED: Available fixed spill slots in interpreter frame.
++** This definition must match with the *.dasc file(s).
++**
++** SPS_FIRST: First spill slot for general use.
++*/
++#if LJ_32
++#define SPS_FIXED 5
++#else
++#define SPS_FIXED 4
++#endif
++#define SPS_FIRST 4
++
++#define SPOFS_TMP 0
++
++#define sps_scale(slot) (4 * (int32_t)(slot))
++#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
++
++/* -- Exit state ---------------------------------------------------------- */
++/* This definition must match with the *.dasc file(s). */
++typedef struct {
++#if !LJ_SOFTFP
++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
++#endif
++ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
++ int32_t spill[256]; /* Spill slots. */
++} ExitState;
++
++/* Highest exit + 1 indicates stack check. */
++#define EXITSTATE_CHECKEXIT 1
++
++/* Return the address of a per-trace exit stub. */
++static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
++{
++ while (*p == 0x00000013) p++; /* Skip RISCVI_NOP. */
++ return p + 4 + exitno;
++}
++/* Avoid dependence on lj_jit.h if only including lj_target.h. */
++#define exitstub_trace_addr(T, exitno) \
++ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
++
++/* -- Instructions -------------------------------------------------------- */
++
++/* Instruction fields. */
++#define RISCVF_D(d) (((d)&31) << 7)
++#define RISCVF_S1(r) (((r)&31) << 15)
++#define RISCVF_S2(r) (((r)&31) << 20)
++#define RISCVF_S3(r) (((r)&31) << 27)
++#define RISCVF_FUNCT2(f) (((f)&3) << 25)
++#define RISCVF_FUNCT3(f) (((f)&7) << 12)
++#define RISCVF_FUNCT7(f) (((f)&127) << 25)
++#define RISCVF_SHAMT(s) ((s) << 20)
++#define RISCVF_RM(m) (((m)&7) << 12)
++#define RISCVF_IMMI(i) ((i) << 20)
++#define RISCVF_IMMS(i) (((i)&0xfe0) << 20 | ((i)&0x1f) << 7)
++#define RISCVF_IMMB(i) (((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7)
++#define RISCVF_IMMU(i) (((i)&0xfffff) << 12)
++#define RISCVF_IMMJ(i) (((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20)
++
++/* Encode helpers. */
++#define RISCVF_W_HI(w) ((w) - ((((w)&0xfff)^0x800) - 0x800))
++#define RISCVF_W_LO(w) ((w)&0xfff)
++#define RISCVF_HI(i) ((RISCVF_W_HI(i) >> 12) & 0xfffff)
++#define RISCVF_LO(i) RISCVF_W_LO(i)
++
++/* Check for valid field range. */
++#define RISCVF_SIMM_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
++#define checki12(i) RISCVF_SIMM_OK(i, 12)
++#define checki13(i) RISCVF_SIMM_OK(i, 13)
++#define checki20(i) RISCVF_SIMM_OK(i, 20)
++#define checki21(i) RISCVF_SIMM_OK(i, 21)
++
++typedef enum RISCVIns {
++
++ /* --- RVI --- */
++ RISCVI_LUI = 0x00000037,
++ RISCVI_AUIPC = 0x00000017,
++
++ RISCVI_JAL = 0x0000006f,
++ RISCVI_JALR = 0x00000067,
++
++ RISCVI_ADDI = 0x00000013,
++ RISCVI_SLTI = 0x00002013,
++ RISCVI_SLTIU = 0x00003013,
++ RISCVI_XORI = 0x00004013,
++ RISCVI_ORI = 0x00006013,
++ RISCVI_ANDI = 0x00007013,
++
++ RISCVI_SLLI = 0x00001013,
++ RISCVI_SRLI = 0x00005013,
++ RISCVI_SRAI = 0x40005013,
++
++ RISCVI_ADD = 0x00000033,
++ RISCVI_SUB = 0x40000033,
++ RISCVI_SLL = 0x00001033,
++ RISCVI_SLT = 0x00002033,
++ RISCVI_SLTU = 0x00003033,
++ RISCVI_XOR = 0x00004033,
++ RISCVI_SRL = 0x00005033,
++ RISCVI_SRA = 0x40005033,
++ RISCVI_OR = 0x00006033,
++ RISCVI_AND = 0x00007033,
++
++ RISCVI_LB = 0x00000003,
++ RISCVI_LH = 0x00001003,
++ RISCVI_LW = 0x00002003,
++ RISCVI_LBU = 0x00004003,
++ RISCVI_LHU = 0x00005003,
++ RISCVI_SB = 0x00000023,
++ RISCVI_SH = 0x00001023,
++ RISCVI_SW = 0x00002023,
++
++ RISCVI_BEQ = 0x00000063,
++ RISCVI_BNE = 0x00001063,
++ RISCVI_BLT = 0x00004063,
++ RISCVI_BGE = 0x00005063,
++ RISCVI_BLTU = 0x00006063,
++ RISCVI_BGEU = 0x00007063,
++
++ RISCVI_ECALL = 0x00000073,
++ RISCVI_EBREAK = 0x00100073,
++
++ RISCVI_NOP = 0x00000013,
++ RISCVI_MV = 0x00000013,
++ RISCVI_NOT = 0xfff04013,
++ RISCVI_NEG = 0x40000033,
++ RISCVI_RET = 0x00008067,
++ RISCVI_ZEXT_B = 0x0ff07013,
++
++#if LJ_TARGET_RISCV64
++ RISCVI_LWU = 0x00007003,
++ RISCVI_LD = 0x00003003,
++ RISCVI_SD = 0x00003023,
++
++ RISCVI_ADDIW = 0x0000001b,
++
++ RISCVI_SLLIW = 0x0000101b,
++ RISCVI_SRLIW = 0x0000501b,
++ RISCVI_SRAIW = 0x4000501b,
++
++ RISCVI_ADDW = 0x0000003b,
++ RISCVI_SUBW = 0x4000003b,
++ RISCVI_SLLW = 0x0000103b,
++ RISCVI_SRLW = 0x0000503b,
++ RISCVI_SRAW = 0x4000503b,
++
++ RISCVI_NEGW = 0x4000003b,
++ RISCVI_SEXT_W = 0x0000001b,
++#endif
++
++ /* --- RVM --- */
++ RISCVI_MUL = 0x02000033,
++ RISCVI_MULH = 0x02001033,
++ RISCVI_MULHSU = 0x02002033,
++ RISCVI_MULHU = 0x02003033,
++ RISCVI_DIV = 0x02004033,
++ RISCVI_DIVU = 0x02005033,
++ RISCVI_REM = 0x02006033,
++ RISCVI_REMU = 0x02007033,
++#if LJ_TARGET_RISCV64
++ RISCVI_MULW = 0x0200003b,
++ RISCVI_DIVW = 0x0200403b,
++ RISCVI_DIVUW = 0x0200503b,
++ RISCVI_REMW = 0x0200603b,
++ RISCVI_REMUW = 0x0200703b,
++#endif
++
++ /* --- RVF --- */
++ RISCVI_FLW = 0x00002007,
++ RISCVI_FSW = 0x00002027,
++
++ RISCVI_FMADD_S = 0x00000043,
++ RISCVI_FMSUB_S = 0x00000047,
++ RISCVI_FNMSUB_S = 0x0000004b,
++ RISCVI_FNMADD_S = 0x0000004f,
++
++ RISCVI_FADD_S = 0x00000053,
++ RISCVI_FSUB_S = 0x08000053,
++ RISCVI_FMUL_S = 0x10000053,
++ RISCVI_FDIV_S = 0x18000053,
++ RISCVI_FSQRT_S = 0x58000053,
++
++ RISCVI_FSGNJ_S = 0x20000053,
++ RISCVI_FSGNJN_S = 0x20001053,
++ RISCVI_FSGNJX_S = 0x20002053,
++
++ RISCVI_FMIN_S = 0x28000053,
++ RISCVI_FMAX_S = 0x28001053,
++
++ RISCVI_FCVT_W_S = 0xc0000053,
++ RISCVI_FCVT_WU_S = 0xc0100053,
++
++ RISCVI_FMV_X_W = 0xe0000053,
++
++ RISCVI_FEQ_S = 0xa0002053,
++ RISCVI_FLT_S = 0xa0001053,
++ RISCVI_FLE_S = 0xa0000053,
++
++ RISCVI_FCLASS_S = 0xe0001053,
++
++ RISCVI_FCVT_S_W = 0xd0000053,
++ RISCVI_FCVT_S_WU = 0xd0100053,
++ RISCVI_FMV_W_X = 0xf0000033,
++
++ RISCVI_FMV_S = 0x20000053,
++ RISCVI_FNEG_S = 0x20001053,
++ RISCVI_FABS_S = 0x20002053,
++#if LJ_TARGET_RISCV64
++ RISCVI_FCVT_L_S = 0xc0200053,
++ RISCVI_FCVT_LU_S = 0xc0300053,
++ RISCVI_FCVT_S_L = 0xd0200053,
++ RISCVI_FCVT_S_LU = 0xd0300053,
++#endif
++
++ /* --- RVD --- */
++ RISCVI_FLD = 0x00003007,
++ RISCVI_FSD = 0x00003027,
++
++ RISCVI_FMADD_D = 0x02000043,
++ RISCVI_FMSUB_D = 0x02000047,
++ RISCVI_FNMSUB_D = 0x0200004b,
++ RISCVI_FNMADD_D = 0x0200004f,
++
++ RISCVI_FADD_D = 0x02000053,
++ RISCVI_FSUB_D = 0x0a000053,
++ RISCVI_FMUL_D = 0x12000053,
++ RISCVI_FDIV_D = 0x1a000053,
++ RISCVI_FSQRT_D = 0x5a000053,
++
++ RISCVI_FSGNJ_D = 0x22000053,
++ RISCVI_FSGNJN_D = 0x22001053,
++ RISCVI_FSGNJX_D = 0x22002053,
++
++ RISCVI_FMIN_D = 0x2a000053,
++ RISCVI_FMAX_D = 0x2a001053,
++
++ RISCVI_FCVT_S_D = 0x40100053,
++ RISCVI_FCVT_D_S = 0x42000053,
++
++ RISCVI_FEQ_D = 0xa2002053,
++ RISCVI_FLT_D = 0xa2001053,
++ RISCVI_FLE_D = 0xa2000053,
++
++ RISCVI_FCLASS_D = 0xe2001053,
++
++ RISCVI_FCVT_W_D = 0xc2000053,
++ RISCVI_FCVT_WU_D = 0xc2100053,
++ RISCVI_FCVT_D_W = 0xd2000053,
++ RISCVI_FCVT_D_WU = 0xd2100053,
++
++ RISCVI_FMV_D = 0x22000053,
++ RISCVI_FNEG_D = 0x22001053,
++ RISCVI_FABS_D = 0x22002053,
++#if LJ_TARGET_RISCV64
++ RISCVI_FCVT_L_D = 0xc2200053,
++ RISCVI_FCVT_LU_D = 0xc2300053,
++ RISCVI_FMV_X_D = 0xe2000053,
++ RISCVI_FCVT_D_L = 0xd2200053,
++ RISCVI_FCVT_D_LU = 0xd2300053,
++ RISCVI_FMV_D_X = 0xf2000053,
++#endif
++
++ /* --- Zifencei --- */
++ RISCVI_FENCE = 0x0000000f,
++ RISCVI_FENCE_I = 0x0000100f,
++
++ /* --- Zicsr --- */
++ RISCVI_CSRRW = 0x00001073,
++ RISCVI_CSRRS = 0x00002073,
++ RISCVI_CSRRC = 0x00003073,
++ RISCVI_CSRRWI = 0x00005073,
++ RISCVI_CSRRSI = 0x00006073,
++ RISCVI_CSRRCI = 0x00007073,
++
++ /* --- RVB --- */
++ /* Zba */
++ RISCVI_SH1ADD = 0x20002033,
++ RISCVI_SH2ADD = 0x20004033,
++ RISCVI_SH3ADD = 0x20006033,
++#if LJ_TARGET_RISCV64
++ RISCVI_ADD_UW = 0x0800003b,
++
++ RISCVI_SH1ADD_UW = 0x2000203b,
++ RISCVI_SH2ADD_UW = 0x2000403b,
++ RISCVI_SH3ADD_UW = 0x2000603b,
++
++ RISCVI_SLLI_UW = 0x0800101b,
++
++ RISCVI_ZEXT_W = 0x0800003b,
++#endif
++ /* Zbb */
++ RISCVI_ANDN = 0x40007033,
++ RISCVI_ORN = 0x40006033,
++ RISCVI_XNOR = 0x40004033,
++
++ RISCVI_CLZ = 0x60001013,
++ RISCVI_CTZ = 0x60101013,
++
++ RISCVI_CPOP = 0x60201013,
++
++ RISCVI_MAX = 0x0a006033,
++ RISCVI_MAXU = 0x0a007033,
++ RISCVI_MIN = 0x0a004033,
++ RISCVI_MINU = 0x0a005033,
++
++ RISCVI_SEXT_B = 0x60401013,
++ RISCVI_SEXT_H = 0x60501013,
++#if LJ_TARGET_RISCV64
++ RISCVI_ZEXT_H = 0x0800403b,
++#endif
++
++ RISCVI_ROL = 0x60001033,
++ RISCVI_ROR = 0x60005033,
++ RISCVI_RORI = 0x60005013,
++
++ RISCVI_ORC_B = 0x28705013,
++
++#if LJ_TARGET_RISCV64
++ RISCVI_REV8 = 0x6b805013,
++
++ RISCVI_CLZW = 0x6000101b,
++ RISCVI_CTZW = 0x6010101b,
++
++ RISCVI_CPOPW = 0x6020101b,
++
++ RISCVI_ROLW = 0x6000103b,
++ RISCVI_RORIW = 0x6000501b,
++ RISCVI_RORW = 0x6000503b,
++#endif
++ /* NYI: Zbc, Zbs */
++
++ /* TBD: RVV?, RVP?, RVJ? */
++
++ /* --- XThead* --- */
++ /* XTHeadBa */
++ RISCVI_TH_ADDSL = 0x0000100b,
++
++ /* XTHeadBb */
++ RISCVI_TH_SRRI = 0x1000100b,
++#if LJ_TARGET_RISCV64
++ RISCVI_TH_SRRIW = 0x1400100b,
++#endif
++ RISCVI_TH_EXT = 0x0000200b,
++ RISCVI_TH_EXTU = 0x0000300b,
++ RISCVI_TH_FF0 = 0x8400100b,
++ RISCVI_TH_FF1 = 0x8600100b,
++ RISCVI_TH_REV = 0x8200100b,
++#if LJ_TARGET_RISCV64
++ RISCVI_TH_REVW = 0x9000100b,
++#endif
++ RISCVI_TH_TSTNBZ = 0x8000100b,
++
++ /* XTHeadBs */
++ RISCVI_TH_TST = 0x8800100b,
++
++ /* XTHeadCondMov */
++ RISCVI_TH_MVEQZ = 0x4000100b,
++ RISCVI_TH_MVNEZ = 0x4200100b,
++
++ /* XTHeadMac */
++ RISCVI_TH_MULA = 0x2000100b,
++ RISCVI_TH_MULAH = 0x2800100b,
++#if LJ_TARGET_RISCV64
++ RISCVI_TH_MULAW = 0x2400100b,
++#endif
++ RISCVI_TH_MULS = 0x2200100b,
++ RISCVI_TH_MULSH = 0x2a00100b,
++ RISCVI_TH_MULSW = 0x2600100b,
++
++ /* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */
++} RISCVIns;
++
++typedef enum RISCVRM {
++ RISCVRM_RNE = 0,
++ RISCVRM_RTZ = 1,
++ RISCVRM_RDN = 2,
++ RISCVRM_RUP = 3,
++ RISCVRM_RMM = 4,
++ RISCVRM_DYN = 7,
++} RISCVRM;
++
++#endif
+--- a/src/lj_vm.h
++++ b/src/lj_vm.h
+@@ -37,6 +37,9 @@
+ #if LJ_TARGET_PPC
+ void lj_vm_cachesync(void *start, void *end);
+ #endif
++#if LJ_TARGET_RISCV64
++void lj_vm_fence_rw_rw();
++#endif
+ LJ_ASMF double lj_vm_foldarith(double x, double y, int op);
+ #if LJ_HASJIT
+ LJ_ASMF double lj_vm_foldfpm(double x, int op);
+--- a/src/lj_vmmath.c
++++ b/src/lj_vmmath.c
+@@ -69,7 +69,8 @@
+
+ /* -- Helper functions for generated machine code ------------------------- */
+
+-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
++#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \
++ || LJ_TARGET_RISCV64
+ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
+ {
+ uint32_t y, ua, ub;
+--- /dev/null
++++ b/src/vm_riscv64.dasc
+@@ -0,0 +1,4866 @@
++|// Low-level VM code for RISC-V 64 CPUs.
++|// Bytecode interpreter, fast functions and helper functions.
++|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
++|//
++|// Contributed by gns from PLCT Lab, ISCAS.
++|// Sponsored by PLCT Lab, ISCAS.
++|
++|.arch riscv64
++|.section code_op, code_sub
++|
++|.actionlist build_actionlist
++|.globals GLOB_
++|.globalnames globnames
++|.externnames extnames
++|
++|// Note: The ragged indentation of the instructions is intentional.
++|// The starting columns indicate data dependencies.
++|
++|//-----------------------------------------------------------------------
++|
++|// Fixed register assignments for the interpreter.
++|// Don't use: x0 = 0, x1 = ra, x2 = sp, x3 = gp, x4 = tp
++|
++|
++|// The following must be C callee-save (but BASE is often refetched).
++|.define BASE, x18 // Base of current Lua stack frame.
++|.define KBASE, x19 // Constants of current Lua function.
++|.define PC, x20 // Next PC.
++|.define GLREG, x21 // Global state.
++|.define DISPATCH, x22 // Opcode dispatch table.
++|.define LREG, x23 // Register holding lua_State (also in SAVE_L).
++|.define MULTRES, x24 // Size of multi-result: (nresults+1)*8.
++|
++|// Constants for type-comparisons, stores and conversions. C callee-save.
++|.define TISNIL, x8
++|.define TISNUM, x25
++|.define TOBIT, f27 // 2^52 + 2^51.
++|
++|// The following temporaries are not saved across C calls, except for RA.
++|.define RA, x9 // Callee-save.
++|.define RB, x14
++|.define RC, x15
++|.define RD, x16
++|.define INS, x17
++|
++|.define TMP0, x6
++|.define TMP1, x7
++|.define TMP2, x28
++|.define TMP3, x29
++|.define TMP4, x30
++|
++|// RISC-V lp64d calling convention.
++|.define CFUNCADDR, x5
++|.define CARG1, x10
++|.define CARG2, x11
++|.define CARG3, x12
++|.define CARG4, x13
++|.define CARG5, x14
++|.define CARG6, x15
++|.define CARG7, x16
++|.define CARG8, x17
++|
++|.define CRET1, x10
++|.define CRET2, x11
++|
++|.define FARG1, f10
++|.define FARG2, f11
++|.define FARG3, f12
++|.define FARG4, f13
++|.define FARG5, f14
++|.define FARG6, f15
++|.define FARG7, f16
++|.define FARG8, f17
++|
++|.define FRET1, f10
++|.define FRET2, f11
++|
++|.define FTMP0, f0
++|.define FTMP1, f1
++|.define FTMP2, f2
++|.define FTMP3, f3
++|.define FTMP4, f4
++|
++|// Stack layout while in interpreter. Must match with lj_frame.h.
++|// RISC-V 64 lp64d.
++|
++|.define CFRAME_SPACE, 256 // Delta for sp.
++|
++|//----- 16 byte aligned, <-- sp entering interpreter
++|.define SAVE_ERRF, 252 // 32 bit values.
++|.define SAVE_NRES, 248
++|.define SAVE_CFRAME, 240 // 64 bit values.
++|.define SAVE_L, 232
++|.define SAVE_PC, 224
++|//----- 16 byte aligned
++|// Padding 216
++|.define SAVE_GPR_, 112 // .. 112+13*8: 64 bit GPR saves.
++|.define SAVE_FPR_, 16 // .. 16+12*8: 64 bit FPR saves.
++|
++|
++|.define TMPD, 0
++|//----- 16 byte aligned
++|
++|.define TMPD_OFS, 0
++|
++|//-----------------------------------------------------------------------
++|
++|.macro saveregs
++| addi sp, sp, -CFRAME_SPACE
++| fsd f27, SAVE_FPR_+11*8(sp)
++| fsd f26, SAVE_FPR_+10*8(sp)
++| fsd f25, SAVE_FPR_+9*8(sp)
++| fsd f24, SAVE_FPR_+8*8(sp)
++| fsd f23, SAVE_FPR_+7*8(sp)
++| fsd f22, SAVE_FPR_+6*8(sp)
++| fsd f21, SAVE_FPR_+5*8(sp)
++| fsd f20, SAVE_FPR_+4*8(sp)
++| fsd f19, SAVE_FPR_+3*8(sp)
++| fsd f18, SAVE_FPR_+2*8(sp)
++| fsd f9, SAVE_FPR_+1*8(sp)
++| fsd f8, SAVE_FPR_+0*8(sp)
++| sd ra, SAVE_GPR_+12*8(sp)
++| sd x27, SAVE_GPR_+11*8(sp)
++| sd x26, SAVE_GPR_+10*8(sp)
++| sd x25, SAVE_GPR_+9*8(sp)
++| sd x24, SAVE_GPR_+8*8(sp)
++| sd x23, SAVE_GPR_+7*8(sp)
++| sd x22, SAVE_GPR_+6*8(sp)
++| sd x21, SAVE_GPR_+5*8(sp)
++| sd x20, SAVE_GPR_+4*8(sp)
++| sd x19, SAVE_GPR_+3*8(sp)
++| sd x18, SAVE_GPR_+2*8(sp)
++| sd x9, SAVE_GPR_+1*8(sp)
++| sd x8, SAVE_GPR_+0*8(sp)
++|.endmacro
++|
++|.macro restoreregs_ret
++| ld ra, SAVE_GPR_+12*8(sp)
++| ld x27, SAVE_GPR_+11*8(sp)
++| ld x26, SAVE_GPR_+10*8(sp)
++| ld x25, SAVE_GPR_+9*8(sp)
++| ld x24, SAVE_GPR_+8*8(sp)
++| ld x23, SAVE_GPR_+7*8(sp)
++| ld x22, SAVE_GPR_+6*8(sp)
++| ld x21, SAVE_GPR_+5*8(sp)
++| ld x20, SAVE_GPR_+4*8(sp)
++| ld x19, SAVE_GPR_+3*8(sp)
++| ld x18, SAVE_GPR_+2*8(sp)
++| ld x9, SAVE_GPR_+1*8(sp)
++| ld x8, SAVE_GPR_+0*8(sp)
++| fld f27, SAVE_FPR_+11*8(sp)
++| fld f26, SAVE_FPR_+10*8(sp)
++| fld f25, SAVE_FPR_+9*8(sp)
++| fld f24, SAVE_FPR_+8*8(sp)
++| fld f23, SAVE_FPR_+7*8(sp)
++| fld f22, SAVE_FPR_+6*8(sp)
++| fld f21, SAVE_FPR_+5*8(sp)
++| fld f20, SAVE_FPR_+4*8(sp)
++| fld f19, SAVE_FPR_+3*8(sp)
++| fld f18, SAVE_FPR_+2*8(sp)
++| fld f9, SAVE_FPR_+1*8(sp)
++| fld f8, SAVE_FPR_+0*8(sp)
++| addi sp, sp, CFRAME_SPACE
++| ret
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|// Pseudo-instruction macros
++|// Be cautious with local label 9 since we use them here!
++|.macro bxeq, a, b, tgt
++| bne a, b, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxne, a, b, tgt
++| beq a, b, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxlt, a, b, tgt
++| bge a, b, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxge, a, b, tgt
++| blt a, b, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxgt, a, b, tgt
++| bge b, a, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxle, a, b, tgt
++| blt b, a, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxltu, a, b, tgt
++| bgeu a, b, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxgeu, a, b, tgt
++| bltu a, b, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxgtu, a, b, tgt
++| bgeu b, a, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxleu, a, b, tgt
++| bltu b, a, >9
++| j tgt
++|9:
++|.endmacro
++|
++|.macro bxeqz, a, tgt
++| bxeq a, x0, tgt
++|.endmacro
++|
++|.macro bxnez, a, tgt
++| bxne a, x0, tgt
++|.endmacro
++|
++|.macro bxlez, a, tgt
++| bxge x0, a, tgt
++|.endmacro
++|
++|.macro bxgez, a, tgt
++| bxge a, x0, tgt
++|.endmacro
++|
++|.macro bxltz, a, tgt
++| bxlt a, x0, tgt
++|.endmacro
++|
++|.macro bxgtz, a, tgt
++| bxlt x0, a, tgt
++|.endmacro
++|
++|.macro lxi, a, b
++| lui a, (b)&0xfffff
++| srai a, a, 12
++|.endmacro
++|
++|.macro lzi, a, b
++| lui a, (b)&0xfffff
++| srli a, a, 12
++|.endmacro
++|
++|.macro addxi, a, b, c
++| lui x31, (c)&0xfffff
++| srai x31, x31, 12
++| add a, x31, b
++|.endmacro
++|
++|.macro sext.b, a, b
++| slli a, b, 56
++| srai a, a, 56
++|.endmacro
++|
++|.macro sext.h, a, b
++| slli a, b, 48
++| srai a, a, 48
++|.endmacro
++|
++|.macro zext.h, a, b
++| slli a, b, 48
++| srli a, a, 48
++|.endmacro
++|
++|.macro zext.w, a, b
++| slli a, b, 32
++| srli a, a, 32
++|.endmacro
++|
++|.macro bfextri, a, b, c, d
++| slli a, b, (63-c)
++| srli a, a, (d+63-c)
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|// Type definitions. Some of these are only used for documentation.
++|.type L, lua_State, LREG
++|.type GL, global_State, GLREG
++|.type TVALUE, TValue
++|.type GCOBJ, GCobj
++|.type STR, GCstr
++|.type TAB, GCtab
++|.type LFUNC, GCfuncL
++|.type CFUNC, GCfuncC
++|.type PROTO, GCproto
++|.type UPVAL, GCupval
++|.type NODE, Node
++|.type NARGS8, int
++|.type TRACE, GCtrace
++|.type SBUF, SBuf
++|
++|//-----------------------------------------------------------------------
++|
++|// Trap for not-yet-implemented parts.
++|.macro NYI; .long 0x00100073; .endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|// Access to frame relative to BASE.
++|.define FRAME_PC, -8
++|.define FRAME_FUNC, -16
++|
++|//-----------------------------------------------------------------------
++|
++|// Endian-specific defines. RISC-V only has little endian ABI for now.
++|.define OFS_RD, 2
++|.define OFS_RA, 1
++|.define OFS_OP, 0
++|
++|// Instruction decode.
++|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
++|.macro decode_BC4b, dst; slliw dst, dst, 2; .endmacro
++|.macro decode_BC8b, dst; slliw dst, dst, 3; .endmacro
++|.macro decode_RX8b, dst; andi dst, dst, 0x7f8; .endmacro
++|
++|.macro decode_OP8a, dst, ins; decode_OP1 dst, ins; .endmacro
++|.macro decode_OP8b, dst; decode_BC8b dst; .endmacro
++|.macro decode_RA8a, dst, ins; srliw dst, ins, 5; .endmacro
++|.macro decode_RA8b, dst; decode_RX8b dst; .endmacro
++|.macro decode_RB8a, dst, ins; srliw dst, ins, 21; .endmacro
++|.macro decode_RB8b, dst; decode_RX8b dst; .endmacro
++|.macro decode_RC8a, dst, ins; srliw dst, ins, 13; .endmacro
++|.macro decode_RC8b, dst; decode_RX8b dst; .endmacro
++|.macro decode_RD8a, dst, ins; srliw dst, ins, 16; .endmacro
++|.macro decode_RD4b, dst; decode_BC4b dst; .endmacro
++|.macro decode_RD8b, dst; decode_BC8b dst; .endmacro
++|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
++|
++|.macro decode_OP8, dst, ins; decode_OP1 dst, ins; decode_BC8b dst; .endmacro
++|.macro decode_RA8, dst, ins; decode_RA8a dst, ins; decode_RA8b dst; .endmacro
++|.macro decode_RB8, dst, ins; decode_RB8a dst, ins; decode_RB8b dst; .endmacro
++|.macro decode_RC8, dst, ins; decode_RC8a dst, ins; decode_RC8b dst; .endmacro
++|.macro decode_RD8, dst, ins; decode_RD8a dst, ins; decode_RD8b dst; .endmacro
++|
++|// Instruction fetch.
++|.macro ins_NEXT1
++| lw INS, 0(PC)
++| addi PC, PC, 4
++|.endmacro
++|// Instruction decode+dispatch.
++|.macro ins_NEXT2
++| decode_OP8 TMP1, INS
++| add TMP0, DISPATCH, TMP1
++| decode_RD8a RD, INS
++| ld TMP4, 0(TMP0)
++| decode_RA8a RA, INS
++| decode_RD8b RD
++| decode_RA8b RA
++| jr TMP4
++|.endmacro
++|.macro ins_NEXT
++| ins_NEXT1
++| ins_NEXT2
++|.endmacro
++|
++|// Instruction footer.
++|.if 1
++| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
++| .define ins_next, ins_NEXT
++| .define ins_next_, ins_NEXT
++| .define ins_next1, ins_NEXT1
++| .define ins_next2, ins_NEXT2
++|.else
++| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
++| // Affects only certain kinds of benchmarks (and only with -j off).
++| .macro ins_next
++| j ->ins_next
++| .endmacro
++| .macro ins_next1
++| .endmacro
++| .macro ins_next2
++| j ->ins_next
++| .endmacro
++| .macro ins_next_
++| ->ins_next:
++| ins_NEXT
++| .endmacro
++|.endif
++|
++|// Call decode and dispatch.
++|.macro ins_callt
++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
++| ld PC, LFUNC:RB->pc
++| lw INS, 0(PC)
++| addi PC, PC, 4
++| decode_OP8 TMP1, INS
++| decode_RA8 RA, INS
++| add TMP0, DISPATCH, TMP1
++| ld TMP0, 0(TMP0)
++| add RA, RA, BASE
++| jr TMP0
++|.endmacro
++|
++|.macro ins_call
++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
++| sd PC, FRAME_PC(BASE)
++| ins_callt
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|.macro branch_RD
++| srliw TMP0, RD, 1
++| lui TMP4, (-(BCBIAS_J*4 >> 12)) & 0xfffff
++| addw TMP0, TMP0, TMP4
++| add PC, PC, TMP0
++|.endmacro
++|
++|// Assumes J is relative to GL. Some J members might be out of range though.
++#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
++|
++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
++|
++|.macro call_intern, curfunc, func
++|->curfunc .. _pcrel_ .. func:
++| auipc CFUNCADDR, extern %pcrel_hi(func)
++| jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func)
++|.endmacro
++|.macro call_extern, func
++| call extern func
++| empty
++|.endmacro
++|
++|// Set current VM state. Uses TMP0.
++|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
++|.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro
++|
++|.macro hotcheck, delta, target
++| srli TMP1, PC, 1
++| andi TMP1, TMP1, 126
++| add TMP1, TMP1, DISPATCH
++| lhu TMP2, GG_DISP2HOT(TMP1)
++| addiw TMP2, TMP2, -delta
++| sh TMP2, GG_DISP2HOT(TMP1)
++| bxltz TMP2, target
++|.endmacro
++|
++|.macro hotloop
++| hotcheck HOTCOUNT_LOOP, ->vm_hotloop
++|.endmacro
++|
++|.macro hotcall
++| hotcheck HOTCOUNT_CALL, ->vm_hotcall
++|.endmacro
++|
++|// Move table write barrier back. Overwrites mark and tmp.
++|.macro barrierback, tab, mark, tmp, target
++| ld tmp, GL->gc.grayagain
++| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab)
++| sd tab, GL->gc.grayagain
++| sb mark, tab->marked
++| sd tmp, tab->gclist
++| j target
++|.endmacro
++|
++|// Clear type tag. Isolate lowest 64-17=47 bits of reg.
++|.macro cleartp, reg; slli reg, reg, 17; srli reg, reg, 17; .endmacro
++|.macro cleartp, dst, reg; slli dst, reg, 17; srli dst, dst, 17; .endmacro
++|
++|// Set type tag: Merge 17 type bits into bits [47, 63] of dst.
++|.macro settp_a, dst; cleartp dst; .endmacro
++|.macro settp_a, dst, src; cleartp dst, src; .endmacro
++|.macro settp_b, dst, tp;
++| slli x31, tp, 47
++| or dst, dst, x31
++|.endmacro
++|.macro settp_b, dst, src, tp;
++| slli x31, tp, 47
++| or dst, src, x31
++|.endmacro
++|.macro settp, dst, tp; settp_a dst; settp_b dst, tp; .endmacro
++|.macro settp, dst, src, tp; settp_a dst, src; settp_b dst, dst, tp; .endmacro
++|
++|// Extract (negative) type tag.
++|.macro gettp, dst, src; srai dst, src, 47; .endmacro
++|
++|// Macros to check the TValue type and extract the GCobj. Branch on failure.
++|.macro checktp, reg, tp, target
++| gettp TMP4, reg
++| addi TMP4, TMP4, tp
++| cleartp reg
++| bxnez TMP4, target
++|.endmacro
++|.macro checktp, dst, reg, tp, target
++| gettp TMP4, reg
++| addi TMP4, TMP4, tp
++| cleartp dst, reg
++| bxnez TMP4, target
++|.endmacro
++|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
++|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
++|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
++|.macro checkint, reg, target
++| gettp TMP4, reg
++| bxne TMP4, TISNUM, target
++|.endmacro
++|.macro checknum, reg, target
++| gettp TMP4, reg
++| sltiu TMP4, TMP4, LJ_TISNUM
++| bxeqz TMP4, target
++|.endmacro
++|
++|.macro mov_false, reg
++| li reg, 0x001
++| slli reg, reg, 47
++| not reg, reg
++|.endmacro
++|.macro mov_true, reg
++| li reg, 0x001
++| slli reg, reg, 48
++| not reg, reg
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++
++/* Generate subroutines used by opcodes and other parts of the VM. */
++/* The .code_sub section should be last to help static branch prediction. */
++static void build_subroutines(BuildCtx *ctx)
++{
++ |.code_sub
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Return handling ----------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_returnp:
++ | // See vm_return. Also: TMP2 = previous base.
++ | andi TMP0, PC, FRAME_P
++ |
++ | // Return from pcall or xpcall fast func.
++ | mov_true TMP1
++ | bxeqz TMP0, ->cont_dispatch
++ | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
++ | mv BASE, TMP2 // Restore caller base.
++ | // Prepending may overwrite the pcall frame, so do it at the end.
++ | sd TMP1, -8(RA) // Prepend true to results.
++ | addi RA, RA, -8
++ |
++ |->vm_returnc:
++ | addiw RD, RD, 8 // RD = (nresults+1)*8.
++ | andi TMP0, PC, FRAME_TYPE
++ | li CRET1, LUA_YIELD
++ | bxeqz RD, ->vm_unwind_c_eh
++ | mv MULTRES, RD
++ | bxeqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
++ |
++ |->vm_return:
++ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
++ | // TMP0 = PC & FRAME_TYPE
++ | andi TMP2, PC, ~FRAME_TYPEP
++ | xori TMP0, TMP0, FRAME_C
++ | sub TMP2, BASE, TMP2 // TMP2 = previous base.
++ | bxnez TMP0, ->vm_returnp
++ |
++ | addiw TMP1, RD, -8
++ | sd TMP2, L->base
++ | li_vmstate C
++ | lw TMP2, SAVE_NRES(sp)
++ | addi BASE, BASE, -16
++ | st_vmstate
++ | slliw TMP2, TMP2, 3
++ | beqz TMP1, >2
++ |1:
++ | addiw TMP1, TMP1, -8
++ | ld CRET1, 0(RA)
++ | addi RA, RA, 8
++ | sd CRET1, 0(BASE)
++ | addi BASE, BASE, 8
++ | bnez TMP1, <1
++ |
++ |2:
++ | bne TMP2, RD, >6
++ |3:
++ | sd BASE, L->top // Store new top.
++ |
++ |->vm_leave_cp:
++ | ld TMP0, SAVE_CFRAME(sp) // Restore previous C frame.
++ | mv CRET1, x0 // Ok return status for vm_pcall.
++ | sd TMP0, L->cframe
++ |
++ |->vm_leave_unw:
++ | restoreregs_ret
++ |
++ |6:
++ | ld TMP1, L->maxstack
++ | blt TMP2, RD, >7
++ | // More results wanted. Check stack size and fill up results with nil.
++ | bge BASE, TMP1, >9
++ | sd TISNIL, 0(BASE)
++ | addiw RD, RD, 8
++ | addi BASE, BASE, 8
++ | j <2
++ |
++ |7: // Less results wanted.
++ | subw TMP0, RD, TMP2
++ | sub TMP0, BASE, TMP0 // Either keep top or shrink it.
++ | beqz TMP2, >8
++ | mv BASE, TMP0 // LUA_MULTRET+1 case
++ |8:
++ | j <3
++ |
++ |9: // Corner case: need to grow stack for filling up results.
++ | // This can happen if:
++ | // - A C function grows the stack (a lot).
++ | // - The GC shrinks the stack in between.
++ | // - A return back from a lua_call() with (high) nresults adjustment.
++ |
++ | sd BASE, L->top // Save current top held in BASE (yes).
++ | mv MULTRES, RD
++ | srliw CARG2, TMP2, 3
++ | mv CARG1, L
++ | call_intern vm_leave_unw, lj_state_growstack // (lua_State *L, int n)
++ | lw TMP2, SAVE_NRES(sp)
++ | ld BASE, L->top // Need the (realloced) L->top in BASE.
++ | mv RD, MULTRES
++ | slliw TMP2, TMP2, 3
++ | j <2
++ |
++ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
++ | // (void *cframe, int errcode)
++ | mv sp, CARG1
++ | mv CRET1, CARG2
++ |->vm_unwind_c_eh: // Landing pad for external unwinder.
++ | ld L, SAVE_L(sp)
++ | li TMP0, ~LJ_VMST_C
++ | ld GL, L->glref
++ | sw TMP0, GL->vmstate
++ | j ->vm_leave_unw
++ |
++ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
++ | // (void *cframe)
++ | andi sp, CARG1, CFRAME_RAWMASK
++ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
++ | ld L, SAVE_L(sp)
++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
++ | li TISNIL, LJ_TNIL
++ | li TISNUM, LJ_TISNUM
++ | ld BASE, L->base
++ | ld GL, L->glref // Setup pointer to global state.
++ | slli TMP3, TMP3, 32
++ | mov_false TMP1
++ | li_vmstate INTERP
++ | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame.
++ | fmv.d.x TOBIT, TMP3
++ | addi RA, BASE, -8 // Results start at BASE-8.
++ | addxi DISPATCH, GL, GG_G2DISP
++ | sd TMP1, 0(RA) // Prepend false to error message.
++ | st_vmstate
++ | li RD, 16 // 2 results: false + error message.
++ | j ->vm_returnc
++ |
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Grow stack for calls -----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_growstack_c: // Grow stack for C function.
++ | li CARG2, LUA_MINSTACK
++ | j >2
++ |
++ |->vm_growstack_l: // Grow stack for Lua function.
++ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
++ | add RC, BASE, RC
++ | sub RA, RA, BASE
++ | sd BASE, L->base
++ | addi PC, PC, 4 // Must point after first instruction.
++ | sd RC, L->top
++ | srliw CARG2, RA, 3
++ |2:
++ | // L->base = new base, L->top = top
++ | sd PC, SAVE_PC(sp)
++ | mv CARG1, L
++ | call_intern vm_growstack_l, lj_state_growstack // (lua_State *L, int n)
++ | ld BASE, L->base
++ | ld RC, L->top
++ | ld LFUNC:RB, FRAME_FUNC(BASE)
++ | sub RC, RC, BASE
++ | cleartp LFUNC:RB
++ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
++ | ins_callt // Just retry the call.
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Entry points into the assembler VM ---------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_resume: // Setup C frame and resume thread.
++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
++ | saveregs
++ | mv L, CARG1
++ | ld GL, L->glref // Setup pointer to global state.
++ | mv BASE, CARG2
++ | lbu TMP1, L->status
++ | sd L, SAVE_L(sp)
++ | li PC, FRAME_CP
++ | addi TMP0, sp, CFRAME_RESUME
++ | addxi DISPATCH, GL, GG_G2DISP
++ | sw x0, SAVE_NRES(sp)
++ | sw x0, SAVE_ERRF(sp)
++ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
++ | sd x0, SAVE_CFRAME(sp)
++ | sd TMP0, L->cframe
++ | beqz TMP1, >3
++ |
++ | // Resume after yield (like a return).
++ | sd L, GL->cur_L
++ | mv RA, BASE
++ | ld BASE, L->base
++ | ld TMP1, L->top
++ | ld PC, FRAME_PC(BASE)
++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
++ | sub RD, TMP1, BASE
++ | slli TMP3, TMP3, 32
++ | sb x0, L->status
++ | fmv.d.x TOBIT, TMP3
++ | li_vmstate INTERP
++ | addi RD, RD, 8
++ | st_vmstate
++ | mv MULTRES, RD
++ | andi TMP0, PC, FRAME_TYPE
++ | li TISNIL, LJ_TNIL
++ | li TISNUM, LJ_TISNUM
++ | bxeqz TMP0, ->BC_RET_Z
++ | j ->vm_return
++ |
++ |->vm_pcall: // Setup protected C frame and enter VM.
++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
++ | saveregs
++ | sw CARG4, SAVE_ERRF(sp)
++ | li PC, FRAME_CP
++ | j >1
++ |
++ |->vm_call: // Setup C frame and enter VM.
++ | // (lua_State *L, TValue *base, int nres1)
++ | saveregs
++ | li PC, FRAME_C
++ |
++ |1: // Entry point for vm_pcall above (PC = ftype).
++ | ld TMP1, L:CARG1->cframe
++ | mv L, CARG1
++ | sw CARG3, SAVE_NRES(sp)
++ | ld GL, L->glref // Setup pointer to global state.
++ | sd CARG1, SAVE_L(sp)
++ | mv BASE, CARG2
++ | addxi DISPATCH, GL, GG_G2DISP
++ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
++ | sd TMP1, SAVE_CFRAME(sp)
++ | sd sp, L->cframe // Add our C frame to cframe chain.
++ |
++ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
++ | sd L, GL->cur_L
++ | ld TMP2, L->base // TMP2 = old base (used in vmeta_call).
++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
++ | ld TMP1, L->top
++ | slli TMP3, TMP3, 32
++ | add PC, PC, BASE
++ | sub NARGS8:RC, TMP1, BASE
++ | li TISNUM, LJ_TISNUM
++ | sub PC, PC, TMP2 // PC = frame delta + frame type
++ | fmv.d.x TOBIT, TMP3
++ | li_vmstate INTERP
++ | li TISNIL, LJ_TNIL
++ | st_vmstate
++ |
++ |->vm_call_dispatch:
++ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
++ | ld LFUNC:RB, FRAME_FUNC(BASE)
++ | checkfunc LFUNC:RB, ->vmeta_call
++ |
++ |->vm_call_dispatch_f:
++ | ins_call
++ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
++ |
++ |->vm_cpcall: // Setup protected C frame, call C.
++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
++ | saveregs
++ | mv L, CARG1
++ | ld TMP0, L:CARG1->stack
++ | sd CARG1, SAVE_L(sp)
++ | ld TMP1, L->top
++ | ld GL, L->glref // Setup pointer to global state.
++ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
++ | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
++ | ld TMP1, L->cframe
++ | addxi DISPATCH, GL, GG_G2DISP
++ | sw TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame.
++ | sw x0, SAVE_ERRF(sp) // No error function.
++ | sd TMP1, SAVE_CFRAME(sp)
++ | sd sp, L->cframe // Add our C frame to cframe chain.
++ | sd L, GL->cur_L
++ | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
++ | mv BASE, CRET1
++ | li PC, FRAME_CP
++ | bnez CRET1, <3 // Else continue with the call.
++ | j ->vm_leave_cp // No base? Just remove C frame.
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Metamethod handling ------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |//-- Continuation dispatch ----------------------------------------------
++ |
++ |->cont_dispatch:
++ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
++ | ld TMP0, -32(BASE) // Continuation.
++ | mv RB, BASE
++ | mv BASE, TMP2 // Restore caller BASE.
++ | ld LFUNC:TMP1, FRAME_FUNC(TMP2)
++ | ld PC, -24(RB) // Restore PC from [cont|PC].
++ |.if FFI
++ | sltiu TMP3, TMP0, 2
++ |.endif
++ | cleartp LFUNC:TMP1
++ | add TMP2, RA, RD
++ | ld TMP1, LFUNC:TMP1->pc
++ | sd TISNIL, -8(TMP2) // Ensure one valid arg.
++ |.if FFI
++ | bnez TMP3, >1
++ |.endif
++ | // BASE = base, RA = resultptr, RB = meta base
++ | ld KBASE, PC2PROTO(k)(TMP1)
++ | jr TMP0 // Jump to continuation.
++ |
++ |.if FFI
++ |1:
++ | addi TMP1, RB, -32
++ | bxnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
++ | // cont = 0: tailcall from C function.
++ | sub RC, TMP1, BASE
++ | j ->vm_call_tail
++ |.endif
++ |
++ |->cont_cat: // RA = resultptr, RB = meta base
++ | lw INS, -4(PC)
++ | addi CARG2, RB, -32
++ | ld TMP0, 0(RA)
++ | decode_RB8 MULTRES, INS
++ | decode_RA8 RA, INS
++ | add TMP1, BASE, MULTRES
++ | sd BASE, L->base
++ | sub CARG3, CARG2, TMP1
++ | sd TMP0, 0(CARG2)
++ | bxne TMP1, CARG2, ->BC_CAT_Z
++ | add RA, BASE, RA
++ | sd TMP0, 0(RA)
++ | j ->cont_nop
++ |
++ |//-- Table indexing metamethods -----------------------------------------
++ |
++ |->vmeta_tgets1:
++ | addi CARG3, GL, offsetof(global_State, tmptv)
++ | li TMP0, LJ_TSTR
++ | settp STR:RC, TMP0
++ | sd STR:RC, 0(CARG3)
++ | j >1
++ |
++ |->vmeta_tgets:
++ | addi CARG2, GL, offsetof(global_State, tmptv)
++ | addi CARG3, GL, offsetof(global_State, tmptv2)
++ | li TMP0, LJ_TTAB
++ | li TMP1, LJ_TSTR
++ | settp TAB:RB, TMP0
++ | settp STR:RC, TMP1
++ | sd TAB:RB, 0(CARG2)
++ | sd STR:RC, 0(CARG3)
++ | j >1
++ |
++ |->vmeta_tgetb: // TMP0 = index
++ | addi CARG3, GL, offsetof(global_State, tmptv)
++ | settp TMP0, TISNUM
++ | sd TMP0, 0(CARG3)
++ |
++ |->vmeta_tgetv:
++ |1:
++ | sd BASE, L->base
++ | mv CARG1, L
++ | sd PC, SAVE_PC(sp)
++ | // (lua_State *L, TValue *o, TValue *k)
++ | call_intern vmeta_tgetv, lj_meta_tget
++ | // Returns TValue * (finished) or NULL (metamethod).
++ | beqz CRET1, >3
++ | ld TMP0, 0(CRET1)
++ | ins_next1
++ | sd TMP0, 0(RA)
++ | ins_next2
++ |
++ |3: // Call __index metamethod.
++ | // BASE = base, L->top = new base, stack = cont/func/t/k
++ | addi TMP1, BASE, -FRAME_CONT
++ | li NARGS8:RC, 16 // 2 args for func(t, k).
++ | ld BASE, L->top
++ | sd PC, -24(BASE) // [cont|PC]
++ | sub PC, BASE, TMP1
++ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
++ | cleartp LFUNC:RB
++ | j ->vm_call_dispatch_f
++ |
++ |->vmeta_tgetr:
++ | call_intern vmeta_tgetr, lj_tab_getinth // (GCtab *t, int32_t key)
++ | // Returns cTValue * or NULL.
++ | mv TMP1, TISNIL
++ | bxeqz CRET1, ->BC_TGETR_Z
++ | ld TMP1, 0(CRET1)
++ | j ->BC_TGETR_Z
++ |
++ |//-----------------------------------------------------------------------
++ |
++ |->vmeta_tsets1:
++ | addi, CARG3, GL, offsetof(global_State, tmptv)
++ | li TMP0, LJ_TSTR
++ | settp STR:RC, TMP0
++ | sd STR:RC, 0(CARG3)
++ | j >1
++ |
++ |->vmeta_tsets:
++ | addi CARG2, GL, offsetof(global_State, tmptv)
++ | addi CARG3, GL, offsetof(global_State, tmptv2)
++ | li TMP0, LJ_TTAB
++ | li TMP1, LJ_TSTR
++ | settp TAB:RB, TMP0
++ | settp STR:RC, TMP1
++ | sd TAB:RB, 0(CARG2)
++ | sd STR:RC, 0(CARG3)
++ | j >1
++ |
++ |->vmeta_tsetb: // TMP0 = index
++ | addi CARG3, GL, offsetof(global_State, tmptv)
++ | settp TMP0, TISNUM
++ | sd TMP0, 0(CARG3)
++ |
++ |->vmeta_tsetv:
++ |1:
++ | sd BASE, L->base
++ | mv CARG1, L
++ | sd PC, SAVE_PC(sp)
++ | // (lua_State *L, TValue *o, TValue *k)
++ | call_intern vmeta_tsetv, lj_meta_tset
++ | // Returns TValue * (finished) or NULL (metamethod).
++ | ld TMP2, 0(RA)
++ | beqz CRET1, >3
++ | ins_next1
++ | // NOBARRIER: lj_meta_tset ensures the table is not black.
++ | sd TMP2, 0(CRET1)
++ | ins_next2
++ |
++ |3: // Call __newindex metamethod.
++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
++ | addi TMP1, BASE, -FRAME_CONT
++ | ld BASE, L->top
++ | sd PC, -24(BASE) // [cont|PC]
++ | sub PC, BASE, TMP1
++ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
++ | li NARGS8:RC, 24 // 3 args for func(t, k, v)
++ | cleartp LFUNC:RB
++ | sd TMP2, 16(BASE) // Copy value to third argument.
++ | j ->vm_call_dispatch_f
++ |
++ |->vmeta_tsetr:
++ | sd BASE, L->base
++ | mv CARG1, L
++ | sd PC, SAVE_PC(sp)
++ | // (lua_State *L, GCtab *t, int32_t key)
++ | call_intern vmeta_tsetr, lj_tab_setinth
++ | // Returns TValue *.
++ | j ->BC_TSETR_Z
++ |
++ |//-- Comparison metamethods ---------------------------------------------
++ |
++ |->vmeta_comp:
++ | // RA/RD point to o1/o2.
++ | mv CARG2, RA
++ | mv CARG3, RD
++ | addi PC, PC, -4
++ | sd BASE, L->base
++ | mv CARG1, L
++ | decode_OP1 CARG4, INS
++ | sd PC, SAVE_PC(sp)
++ | // (lua_State *L, TValue *o1, *o2, int op)
++ | call_intern vmeta_comp, lj_meta_comp
++ | // Returns 0/1 or TValue * (metamethod).
++ |3:
++ | sltiu TMP1, CRET1, 2
++ | bxeqz TMP1, ->vmeta_binop
++ | negw TMP2, CRET1
++ |4:
++ | lhu RD, OFS_RD(PC)
++ | addi PC, PC, 4
++ | lui TMP1, (-(BCBIAS_J*4 >> 12)) & 0xfffff
++ | slliw RD, RD, 2
++ | addw RD, RD, TMP1
++ | and RD, RD, TMP2
++ | add PC, PC, RD
++ |->cont_nop:
++ | ins_next
++ |
++ |->cont_ra: // RA = resultptr
++ | lbu TMP1, -4+OFS_RA(PC)
++ | ld TMP2, 0(RA)
++ | slliw TMP1, TMP1, 3
++ | add TMP1, BASE, TMP1
++ | sd TMP2, 0(TMP1)
++ | j ->cont_nop
++ |
++ |->cont_condt: // RA = resultptr
++ | ld TMP0, 0(RA)
++ | gettp TMP0, TMP0
++ | sltiu TMP1, TMP0, LJ_TISTRUECOND
++ | negw TMP2, TMP1 // Branch if result is true.
++ | j <4
++ |
++ |->cont_condf: // RA = resultptr
++ | ld TMP0, 0(RA)
++ | gettp TMP0, TMP0
++ | sltiu TMP1, TMP0, LJ_TISTRUECOND
++ | addiw TMP2, TMP1, -1 // Branch if result is false.
++ | j <4
++ |
++ |->vmeta_equal:
++ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
++ | cleartp LFUNC:CARG3, CARG2
++ | cleartp LFUNC:CARG2, CARG1
++ | mv CARG4, TMP0
++ | addi PC, PC, -4
++ | sd BASE, L->base
++ | mv CARG1, L
++ | sd PC, SAVE_PC(sp)
++ | // (lua_State *L, GCobj *o1, *o2, int ne)
++ | call_intern vmeta_equal, lj_meta_equal
++ | // Returns 0/1 or TValue * (metamethod).
++ | j <3
++ |
++ |->vmeta_equal_cd:
++ |.if FFI
++ | addi PC, PC, -4
++ | mv CARG1, L
++ | mv CARG2, INS
++ | sd BASE, L->base
++ | sd PC, SAVE_PC(sp)
++ | call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op)
++ | // Returns 0/1 or TValue * (metamethod).
++ | j <3
++ |.endif
++ |
++ |->vmeta_istype:
++ | addi PC, PC, -4
++ | sd BASE, L->base
++ | mv CARG1, L
++ | srliw CARG2, RA, 3
++ | srliw CARG3, RD, 3
++ | sd PC, SAVE_PC(sp)
++ | // (lua_State *L, TValue *o, BCReg tp)
++ | call_intern vmeta_istype, lj_meta_istype
++ | j ->cont_nop
++ |
++ |//-- Arithmetic metamethods ---------------------------------------------
++ |
++ |->vmeta_unm:
++ | mv RC, RB
++ |
++ |->vmeta_arith:
++ | mv CARG1, L
++ | sd BASE, L->base
++ | mv CARG2, RA
++ | sd PC, SAVE_PC(sp)
++ | mv CARG3, RB
++ | mv CARG4, RC
++ | decode_OP1 CARG5, INS
++ | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
++ | call_intern vmeta_arith, lj_meta_arith
++ | // Returns NULL (finished) or TValue * (metamethod).
++ | bxeqz CRET1, ->cont_nop
++ |
++ | // Call metamethod for binary op.
++ |->vmeta_binop:
++ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
++ | sub TMP1, CRET1, BASE
++ | sd PC, -24(CRET1) // [cont|PC]
++ | mv TMP2, BASE
++ | addi PC, TMP1, FRAME_CONT
++ | mv BASE, CRET1
++ | li NARGS8:RC, 16 // 2 args for func(o1, o2).
++ | j ->vm_call_dispatch
++ |
++ |->vmeta_len:
++ | // CARG2 already set by BC_LEN.
++#if LJ_52
++ | mv MULTRES, CARG1
++#endif
++ | sd BASE, L->base
++ | mv CARG1, L
++ | sd PC, SAVE_PC(sp)
++ | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o)
++ | // Returns NULL (retry) or TValue * (metamethod base).
++#if LJ_52
++ | bxnez CRET1, ->vmeta_binop // Binop call for compatibility.
++ | mv CARG1, MULTRES
++ | j ->BC_LEN_Z
++#else
++ | j ->vmeta_binop // Binop call for compatibility.
++#endif
++ |
++ |//-- Call metamethod ----------------------------------------------------
++ |
++ |->vmeta_call: // Resolve and call __call metamethod.
++ | // TMP2 = old base, BASE = new base, RC = nargs*8
++ | mv CARG1, L
++ | sd TMP2, L->base // This is the callers base!
++ | addi CARG2, BASE, -16
++ | sd PC, SAVE_PC(sp)
++ | add CARG3, BASE, RC
++ | mv MULTRES, NARGS8:RC
++ | // (lua_State *L, TValue *func, TValue *top)
++ | call_intern vmeta_call, lj_meta_call
++ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
++ | addi NARGS8:RC, MULTRES, 8 // Got one more argument now.
++ | cleartp LFUNC:RB
++ | ins_call
++ |
++ |->vmeta_callt: // Resolve __call for BC_CALLT.
++ | // BASE = old base, RA = new base, RC = nargs*8
++ | mv CARG1, L
++ | sd BASE, L->base
++ | addi CARG2, RA, -16
++ | sd PC, SAVE_PC(sp)
++ | add CARG3, RA, RC
++ | mv MULTRES, NARGS8:RC
++ | // (lua_State *L, TValue *func, TValue *top)
++ | call_intern vmeta_callt, lj_meta_call
++ | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
++ | ld TMP1, FRAME_PC(BASE)
++ | addi NARGS8:RC, MULTRES, 8 // Got one more argument now.
++ | cleartp LFUNC:CARG3, RB
++ | j ->BC_CALLT_Z
++ |
++ |//-- Argument coercion for 'for' statement ------------------------------
++ |
++ |->vmeta_for:
++ | mv CARG1, L
++ | sd BASE, L->base
++ | mv CARG2, RA
++ | sd PC, SAVE_PC(sp)
++ | mv MULTRES, INS
++ | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base)
++ |.if JIT
++ | decode_OP1 TMP0, MULTRES
++ | li TMP1, BC_JFORI
++ |.endif
++ | decode_RA8 RA, MULTRES
++ | decode_RD8 RD, MULTRES
++ |.if JIT
++ | bxeq TMP0, TMP1, =>BC_JFORI
++ |.endif
++ | j =>BC_FORI
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Fast functions -----------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |.macro .ffunc, name
++ |->ff_ .. name:
++ |.endmacro
++ |
++ |.macro .ffunc_1, name
++ |->ff_ .. name:
++ | ld CARG1, 0(BASE)
++ | bxeqz NARGS8:RC, ->fff_fallback
++ |.endmacro
++ |
++ |.macro .ffunc_2, name
++ |->ff_ .. name:
++ | sltiu TMP0, NARGS8:RC, 16
++ | ld CARG1, 0(BASE)
++ | ld CARG2, 8(BASE)
++ | bxnez TMP0, ->fff_fallback
++ |.endmacro
++ |
++ |.macro .ffunc_n, name
++ |->ff_ .. name:
++ | ld CARG1, 0(BASE)
++ | fld FARG1, 0(BASE)
++ | bxeqz NARGS8:RC, ->fff_fallback
++ | checknum CARG1, ->fff_fallback
++ |.endmacro
++ |
++ |.macro .ffunc_nn, name
++ |->ff_ .. name:
++ | ld CARG1, 0(BASE)
++ | sltiu TMP0, NARGS8:RC, 16
++ | ld CARG2, 8(BASE)
++ | bxnez TMP0, ->fff_fallback
++ | gettp TMP1, CARG1
++ | gettp TMP2, CARG2
++ | sltiu TMP1, TMP1, LJ_TISNUM
++ | sltiu TMP2, TMP2, LJ_TISNUM
++ | fld FARG1, 0(BASE)
++ | and TMP1, TMP1, TMP2
++ | fld FARG2, 8(BASE)
++ | bxeqz TMP1, ->fff_fallback
++ |.endmacro
++ |
++ |// Inlined GC threshold check.
++ |.macro ffgccheck
++ | ld TMP0, GL->gc.total
++ | ld TMP1, GL->gc.threshold
++ | bltu TMP0, TMP1, >1
++ | jal ->fff_gcstep
++ |1:
++ |.endmacro
++ |
++ |//-- Base library: checks -----------------------------------------------
++ |.ffunc_1 assert
++ | gettp TMP1, CARG1
++ | sltiu TMP1, TMP1, LJ_TISTRUECOND
++ | addi RA, BASE, -16
++ | bxeqz TMP1, ->fff_fallback
++ | ld PC, FRAME_PC(BASE)
++ | addiw RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
++ | addi TMP1, BASE, 8
++ | add TMP2, RA, RD
++ | sd CARG1, 0(RA)
++ | bne BASE, TMP2, >1
++ | j ->fff_res // Done if exactly 1 argument.
++ |1:
++ | ld TMP0, 0(TMP1)
++ | sd TMP0, -16(TMP1)
++ | mv TMP3, TMP1
++ | addi TMP1, TMP1, 8
++ | bne TMP3, TMP2, <1
++ | j ->fff_res
++ |
++ |.ffunc_1 type
++ | gettp TMP0, CARG1
++ | not TMP3, TMP0
++ | bltu TISNUM, TMP0, >1
++ | li TMP3, ~LJ_TISNUM
++ |1:
++ | slli TMP3, TMP3, 3
++ | add TMP3, CFUNC:RB, TMP3
++ | ld CARG1, CFUNC:TMP3->upvalue
++ | j ->fff_restv
++ |
++ |//-- Base library: getters and setters ---------------------------------
++ |
++ |.ffunc_1 getmetatable
++ | gettp TMP2, CARG1
++ | addi TMP0, TMP2, -LJ_TTAB
++ | addi TMP1, TMP2, -LJ_TUDATA
++ | snez TMP0, TMP0
++ | neg TMP0, TMP0
++ | and TMP0, TMP0, TMP1
++ | cleartp TAB:CARG1
++ | bnez TMP0, >6
++ |1: // Field metatable must be at same offset for GCtab and GCudata!
++ | ld TAB:RB, TAB:CARG1->metatable
++ |2:
++ | ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
++ | li CARG1, LJ_TNIL
++ | bxeqz TAB:RB, ->fff_restv
++ | lw TMP0, TAB:RB->hmask
++ | lw TMP1, STR:RC->sid
++ | ld NODE:TMP2, TAB:RB->node
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
++ | slli TMP0, TMP1, 5
++ | slli TMP1, TMP1, 3
++ | sub TMP1, TMP0, TMP1
++ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
++ | li CARG4, LJ_TSTR
++ | settp STR:RC, CARG4 // Tagged key to look for.
++ |3: // Rearranged logic, because we expect _not_ to find the key.
++ | ld TMP0, NODE:TMP2->key
++ | ld CARG1, NODE:TMP2->val
++ | ld NODE:TMP2, NODE:TMP2->next
++ | li TMP3, LJ_TTAB
++ | beq RC, TMP0, >5
++ | bnez NODE:TMP2, <3
++ |4:
++ | settp CARG1, RB, TMP3
++ | j ->fff_restv // Not found, keep default result.
++ |5:
++ | bxne CARG1, TISNIL, ->fff_restv
++ | j <4 // Ditto for nil value.
++ |
++ |6:
++ | sltiu TMP3, TMP2, LJ_TISNUM
++ | neg TMP3, TMP3
++ | and TMP0, TISNUM, TMP3
++ | not TMP3, TMP3
++ | and TMP2, TMP2, TMP3
++ | or TMP2, TMP2, TMP0
++ | slli TMP2, TMP2, 3
++ | sub TMP0, GL, TMP2
++ | ld TAB:RB, (offsetof(global_State, gcroot[GCROOT_BASEMT])-8)(TMP0)
++ | j <2
++ |
++ |.ffunc_2 setmetatable
++ | // Fast path: no mt for table yet and not clearing the mt.
++ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++ | gettp TMP3, CARG2
++ | ld TAB:TMP0, TAB:TMP1->metatable
++ | lbu TMP2, TAB:TMP1->marked
++ | addi TMP3, TMP3, -LJ_TTAB
++ | cleartp TAB:CARG2
++ | or TMP3, TMP3, TAB:TMP0
++ | bxnez TMP3, ->fff_fallback
++ | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table)
++ | sd TAB:CARG2, TAB:TMP1->metatable
++ | bxeqz TMP3, ->fff_restv
++ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
++ |
++ |.ffunc rawget
++ | ld CARG2, 0(BASE)
++ | sltiu TMP0, NARGS8:RC, 16
++ | gettp TMP1, CARG2
++ | cleartp CARG2
++ | addi TMP1, TMP1, -LJ_TTAB
++ | or TMP0, TMP0, TMP1
++ | addi CARG3, BASE, 8
++ | bxnez TMP0, ->fff_fallback
++ | mv CARG1, L
++ | call_intern ff_rawget, lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
++ | // Returns cTValue *.
++ | ld CARG1, 0(CRET1)
++ | j ->fff_restv
++ |
++ |//-- Base library: conversions ------------------------------------------
++ |
++ |.ffunc tonumber
++ | // Only handles the number case inline (without a base argument).
++ | ld CARG1, 0(BASE)
++ | xori TMP0, NARGS8:RC, 8 // Exactly one number argument.
++ | gettp TMP1, CARG1
++ | sltu TMP1, TISNUM, TMP1
++ | or TMP0, TMP0, TMP1
++ | bxnez TMP0, ->fff_fallback // No args or CARG1 is not number
++ | j ->fff_restv
++ |
++ |.ffunc_1 tostring
++ | // Only handles the string or number case inline.
++ | gettp TMP0, CARG1
++ | addi TMP1, TMP0, -LJ_TSTR
++ | // A __tostring method in the string base metatable is ignored.
++ | bxeqz TMP1, ->fff_restv // String key?
++ | // Handle numbers inline, unless a number base metatable is present.
++ | ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
++ | sltu TMP0, TISNUM, TMP0
++ | sd BASE, L->base // Add frame since C call can throw.
++ | or TMP0, TMP0, TMP1
++ | bxnez TMP0, ->fff_fallback
++ | sd PC, SAVE_PC(sp) // Redundant (but a defined value).
++ | ffgccheck
++ | mv CARG1, L
++ | mv CARG2, BASE
++ | call_intern ff_tostring, lj_strfmt_number // (lua_State *L, cTValue *o)
++ | // Returns GCstr *.
++ | li TMP1, LJ_TSTR
++ |// ld BASE, L->base
++ | settp CARG1, TMP1
++ | j ->fff_restv
++ |
++ |//-- Base library: iterators -------------------------------------------
++ |
++ |.ffunc_1 next
++ | checktp CARG1, -LJ_TTAB, ->fff_fallback
++ | add TMP0, BASE, NARGS8:RC
++ | ld PC, FRAME_PC(BASE)
++ | sd TISNIL, 0(TMP0) // Set missing 2nd arg to nil.
++ | addi CARG2, BASE, 8
++ | addi CARG3, BASE, -16
++ | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ | // Returns 1=found, 0=end, -1=error.
++ |// addi RA, BASE, -16
++ | li RD, (2+1)*8
++ | bxgtz CRET1, ->fff_res // Found key/value.
++ | mv TMP1, CRET1
++ | mv CARG1, TISNIL
++ | bxeqz TMP1, ->fff_restv // End of traversal: return nil.
++ | ld CFUNC:RB, FRAME_FUNC(BASE)
++ | li RC, 2*8
++ | cleartp CFUNC:RB
++ | j ->fff_fallback // Invalid key.
++ |
++ |.ffunc_1 pairs
++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++ | ld PC, FRAME_PC(BASE)
++#if LJ_52
++ | ld TAB:TMP2, TAB:TMP1->metatable
++ | ld TMP0, CFUNC:RB->upvalue[0]
++ | addi RA, BASE, -16
++ | bxnez TAB:TMP2, ->fff_fallback
++#else
++ | ld TMP0, CFUNC:RB->upvalue[0]
++ | addi RA, BASE, -16
++#endif
++ | sd TISNIL, 0(BASE)
++ | sd CARG1, -8(BASE)
++ | sd TMP0, 0(RA)
++ | li RD, (3+1)*8
++ | j ->fff_res
++ |
++ |.ffunc_2 ipairs_aux
++ | checktab CARG1, ->fff_fallback
++ | checkint CARG2, ->fff_fallback
++ | lw TMP0, TAB:CARG1->asize
++ | ld TMP1, TAB:CARG1->array
++ | ld PC, FRAME_PC(BASE)
++ | sext.w TMP2, CARG2
++ | addiw TMP2, TMP2, 1
++ | sltu TMP3, TMP2, TMP0
++ | addi RA, BASE, -16
++ | zext.w TMP0, TMP2
++ | settp_b TMP0, TISNUM
++ | sd TMP0, 0(RA)
++ | beqz TMP3, >2 // Not in array part?
++ | slli TMP3, TMP2, 3
++ | add TMP3, TMP1, TMP3
++ | ld TMP1, 0(TMP3)
++ |1:
++ | li RD, (0+1)*8
++ | bxeq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
++ | sd TMP1, -8(BASE)
++ | li RD, (2+1)*8
++ | j ->fff_res
++ |2: // Check for empty hash part first. Otherwise call C function.
++ | lw TMP0, TAB:CARG1->hmask
++ | li RD, (0+1)*8
++ | bxeqz TMP0, ->fff_res
++ | mv CARG2, TMP2
++ | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key)
++ | // Returns cTValue * or NULL.
++ | li RD, (0+1)*8
++ | bxeqz CRET1, ->fff_res
++ | ld TMP1, 0(CRET1)
++ | j <1
++ |
++ |.ffunc_1 ipairs
++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++ | ld PC, FRAME_PC(BASE)
++#if LJ_52
++ | ld TAB:TMP2, TAB:TMP1->metatable
++#endif
++ | ld CFUNC:TMP0, CFUNC:RB->upvalue[0]
++ | addi RA, BASE, -16
++#if LJ_52
++ | bxnez TAB:TMP2, ->fff_fallback
++#endif
++ | slli TMP1, TISNUM, 47
++ | sd CARG1, -8(BASE)
++ | sd TMP1, 0(BASE)
++ | sd CFUNC:TMP0, 0(RA)
++ | li RD, (3+1)*8
++ | j ->fff_res
++ |
++ |//-- Base library: catch errors ----------------------------------------
++ |
++ |.ffunc pcall
++ | ld TMP1, L->maxstack
++ | add TMP2, BASE, NARGS8:RC
++ | bxltu TMP1, TMP2, ->fff_fallback
++ | addi NARGS8:RC, NARGS8:RC, -8
++ | lbu TMP3, GL->hookmask
++ | mv TMP2, BASE
++ | bxltz NARGS8:RC, ->fff_fallback
++ | addi BASE, BASE, 16
++ | // Remember active hook before pcall.
++ | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT
++ | andi TMP3, TMP3, 1
++ | addi PC, TMP3, 16+FRAME_PCALL
++ | bxeqz NARGS8:RC, ->vm_call_dispatch
++ |1:
++ | add TMP0, BASE, NARGS8:RC
++ |2:
++ | ld TMP1, -16(TMP0)
++ | sd TMP1, -8(TMP0)
++ | addi TMP0, TMP0, -8
++ | bne TMP0, BASE, <2
++ | j ->vm_call_dispatch
++ |
++ |.ffunc xpcall
++ | ld TMP1, L->maxstack
++ | add TMP2, BASE, NARGS8:RC
++ | bxltu TMP1, TMP2, ->fff_fallback
++ | addi NARGS8:TMP0, NARGS8:RC, -16
++ | ld CARG1, 0(BASE)
++ | ld CARG2, 8(BASE)
++ | lbu TMP1, GL->hookmask
++ | bxltz NARGS8:TMP0, ->fff_fallback
++ | gettp TMP2, CARG2
++ | addi TMP2, TMP2, -LJ_TFUNC
++ | bxnez TMP2, ->fff_fallback // Traceback must be a function.
++ | mv TMP2, BASE
++ | mv NARGS8:RC, NARGS8:TMP0
++ | addi BASE, BASE, 24
++ | // Remember active hook before pcall.
++ | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT
++ | sd CARG2, 0(TMP2) // Swap function and traceback.
++ | andi TMP3, TMP3, 1
++ | sd CARG1, 8(TMP2)
++ | addi PC, TMP3, 24+FRAME_PCALL
++ | bnez NARGS8:RC, <1
++ | j ->vm_call_dispatch
++ |
++ |//-- Coroutine library --------------------------------------------------
++ |
++ |.macro coroutine_resume_wrap, resume
++ |.if resume
++ |.ffunc_1 coroutine_resume
++ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
++ |.else
++ |.ffunc coroutine_wrap_aux
++ | ld L:CARG1, CFUNC:RB->upvalue[0].gcr
++ | cleartp L:CARG1
++ |.endif
++ | lbu TMP0, L:CARG1->status
++ | ld TMP1, L:CARG1->cframe
++ | ld CARG2, L:CARG1->top
++ | ld TMP2, L:CARG1->base
++ | addiw CARG4, TMP0, -LUA_YIELD
++ | add CARG3, CARG2, TMP0
++ | addi TMP3, CARG2, 8
++ | seqz TMP4, CARG4
++ | neg TMP4, TMP4
++ | and CARG2, CARG2, TMP4
++ | not TMP4, TMP4
++ | and TMP3, TMP3, TMP4
++ | or CARG2, CARG2, TMP3
++ | bxgtz CARG4, ->fff_fallback // st > LUA_YIELD?
++ | xor TMP2, TMP2, CARG3
++ | or CARG4, TMP2, TMP0
++ | bxnez TMP1, ->fff_fallback // cframe != 0?
++ | ld TMP0, L:CARG1->maxstack
++ | ld PC, FRAME_PC(BASE)
++ | bxeqz CARG4, ->fff_fallback // base == top && st == 0?
++ | add TMP2, CARG2, NARGS8:RC
++ | sd BASE, L->base
++ | sd PC, SAVE_PC(sp)
++ | bxltu TMP0, TMP2, ->fff_fallback // Stack overflow?
++ |1:
++ |.if resume
++ | addi BASE, BASE, 8 // Keep resumed thread in stack for GC.
++ | addi NARGS8:RC, NARGS8:RC, -8
++ | addi TMP2, TMP2, -8
++ |.endif
++ | sd TMP2, L:CARG1->top
++ | sd BASE, L->top
++ | add TMP1, BASE, NARGS8:RC
++ | mv CARG3, CARG2
++ |2: // Move args to coroutine.
++ | ld TMP0, 0(BASE)
++ | sltu TMP3, BASE, TMP1
++ | addi BASE, BASE, 8
++ | beqz TMP3, >3
++ | sd TMP0, 0(CARG3)
++ | addi CARG3, CARG3, 8
++ | j <2
++ |3:
++ | mv L:RA, L:CARG1
++ | jal ->vm_resume // (lua_State *L, TValue *base, 0, 0)
++ | // Returns thread status.
++ |4:
++ | ld TMP2, L:RA->base
++ | sltiu TMP1, CRET1, LUA_YIELD+1
++ | ld TMP3, L:RA->top
++ | li_vmstate INTERP
++ | ld BASE, L->base
++ | sd L, GL->cur_L
++ | st_vmstate
++ | sub RD, TMP3, TMP2
++ | beqz TMP1, >8
++ | ld TMP0, L->maxstack
++ | add TMP1, BASE, RD
++ | beqz RD, >6 // No results?
++ | add TMP3, TMP2, RD
++ | bltu TMP0, TMP1, >9 // Need to grow stack?
++ | sd TMP2, L:RA->top // Clear coroutine stack.
++ | mv TMP1, BASE
++ |5: // Move results from coroutine.
++ | ld TMP0, 0(TMP2)
++ | addi TMP2, TMP2, 8
++ | sd TMP0, 0(TMP1)
++ | addi TMP1, TMP1, 8
++ | bltu TMP2, TMP3, <5
++ |6:
++ |.if resume
++ | mov_true TMP1
++ | addi RD, RD, 16
++ |7:
++ | sd TMP1, -8(BASE) // Prepend true/false to results.
++ | addi RA, BASE, -8
++ |.else
++ | mv RA, BASE
++ | addi RD, RD, 8
++ |.endif
++ | andi TMP0, PC, FRAME_TYPE
++ | sd PC, SAVE_PC(sp)
++ | mv MULTRES, RD
++ |// bxeqz TMP0, ->BC_RET_Z // Local label 9 in use
++ | bnez TMP0, >6
++ | j ->BC_RET_Z
++ |6:
++ | j ->vm_return
++ |
++ |8: // Coroutine returned with error (at co->top-1).
++ |.if resume
++ | addi TMP3, TMP3, -8
++ | mov_false TMP1
++ | li RD, (2+1)*8
++ | ld TMP0, 0(TMP3)
++ | sd TMP3, L:RA->top // Remove error from coroutine stack.
++ | sd TMP0, 0(BASE) // Copy error message.
++ | j <7
++ |.else
++ | mv CARG1, L
++ | mv CARG2, L:RA
++ | // (lua_State *L, lua_State *co)
++ | call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err
++ |.endif
++ |
++ |9: // Handle stack expansion on return from yield.
++ | mv CARG1, L
++ | srliw CARG2, RD, 3
++ | // (lua_State *L, int n)
++ |.if resume
++ | call_intern ff_coroutine_resume, lj_state_growstack
++ |.else
++ | call_intern ff_coroutine_wrap_aux, lj_state_growstack
++ |.endif
++ | mv CRET1, x0
++ | j <4
++ |.endmacro
++ |
++ | coroutine_resume_wrap 1 // coroutine.resume
++ | coroutine_resume_wrap 0 // coroutine.wrap
++ |
++ |.ffunc coroutine_yield
++ | ld TMP0, L->cframe
++ | add TMP1, BASE, NARGS8:RC
++ | li CRET1, LUA_YIELD
++ | sd BASE, L->base
++ | andi TMP0, TMP0, CFRAME_RESUME
++ | sd TMP1, L->top
++ | bxeqz TMP0, ->fff_fallback
++ | sd x0, L->cframe
++ | sb CRET1, L->status
++ | j ->vm_leave_unw
++ |
++ |//-- Math library -------------------------------------------------------
++ |
++ |.macro math_round, func
++ |->ff_math_ .. func:
++ | ld CARG1, 0(BASE)
++ | gettp TMP0, CARG1
++ | bxeqz NARGS8:RC, ->fff_fallback
++ | bxeq TMP0, TISNUM, ->fff_restv
++ | fld FARG1, 0(BASE)
++ | bxgeu TMP0, TISNUM, ->fff_fallback
++ | jal ->vm_ .. func
++ | j ->fff_resn
++ |.endmacro
++ |
++ | math_round floor
++ | math_round ceil
++ |
++ |.ffunc_1 math_abs
++ | gettp CARG2, CARG1
++ | addi TMP2, CARG2, -LJ_TISNUM
++ | sext.w TMP1, CARG1
++ | bnez TMP2, >1
++ | sraiw TMP0, TMP1, 31 // Extract sign. int
++ | xor TMP1, TMP1, TMP0
++ | sub CARG1, TMP1, TMP0
++ | slli TMP3, CARG1, 32
++ | settp CARG1, TISNUM
++ | bxgez TMP3, ->fff_restv
++ | lui CARG1, 0x41e00 // 2^31 as a double.
++ | slli CARG1, CARG1, 32
++ | j ->fff_restv
++ |1:
++ | sltiu TMP2, CARG2, LJ_TISNUM
++ | slli CARG1, CARG1, 1
++ | srli CARG1, CARG1, 1
++ | bxeqz TMP2, ->fff_fallback // int
++ |// fallthrough
++ |
++ |->fff_restv:
++ | // CARG1 = TValue result.
++ | ld PC, FRAME_PC(BASE)
++ | sd CARG1, -16(BASE)
++ |->fff_res1:
++ | // RA = results, PC = return.
++ | li RD, (1+1)*8
++ |->fff_res:
++ | // RA = results, RD = (nresults+1)*8, PC = return.
++ | andi TMP0, PC, FRAME_TYPE
++ | mv MULTRES, RD
++ | addi RA, BASE, -16
++ | bxnez TMP0, ->vm_return
++ | lw INS, -4(PC)
++ | decode_RB8 RB, INS
++ |5:
++ | bltu RD, RB, >6 // More results expected?
++ | decode_RA8a TMP0, INS
++ | ins_next1
++ | decode_RA8b TMP0
++ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
++ | sub BASE, RA, TMP0
++ | ins_next2
++ |
++ |6: // Fill up results with nil.
++ | add TMP1, RA, RD
++ | addi RD, RD, 8
++ | sd TISNIL, -8(TMP1)
++ | j <5
++ |
++ |.macro math_extern, func
++ | .ffunc_n math_ .. func
++ | call_extern func
++ | j ->fff_resn
++ |.endmacro
++ |
++ |.macro math_extern2, func
++ | .ffunc_nn math_ .. func
++ | call_extern func
++ | j ->fff_resn
++ |.endmacro
++ |
++ |.ffunc_n math_sqrt
++ | fsqrt.d FRET1, FARG1
++ |->fff_resn:
++ | ld PC, FRAME_PC(BASE)
++ | fsd FRET1, -16(BASE)
++ | j ->fff_res1
++ |
++ |.ffunc math_log
++ | li TMP1, 8
++ | ld CARG1, 0(BASE)
++ | fld FARG1, 0(BASE)
++ | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument.
++ | checknum CARG1, ->fff_fallback
++ | call_extern log
++ | j ->fff_resn
++ |
++ | math_extern log10
++ | math_extern exp
++ | math_extern sin
++ | math_extern cos
++ | math_extern tan
++ | math_extern asin
++ | math_extern acos
++ | math_extern atan
++ | math_extern sinh
++ | math_extern cosh
++ | math_extern tanh
++ | math_extern2 pow
++ | math_extern2 atan2
++ | math_extern2 fmod
++ |
++ |.ffunc_2 math_ldexp
++ | checknum CARG1, ->fff_fallback
++ | checkint CARG2, ->fff_fallback
++ | fld FARG1, 0(BASE)
++ | lw CARG1, 8(BASE)
++ | call_extern ldexp // (double x, int exp)
++ | j ->fff_resn
++ |
++ |.ffunc_n math_frexp
++ | ld PC, FRAME_PC(BASE)
++ | addi CARG1, GL, offsetof(global_State, tmptv)
++ | call_extern frexp
++ | lw TMP1, GL->tmptv
++ | fcvt.d.w FARG2, TMP1
++ | fsd FRET1, -16(BASE)
++ | fsd FARG2, -8(BASE)
++ | li RD, (2+1)*8
++ | j ->fff_res
++ |
++ |.ffunc_n math_modf
++ | addi CARG1, BASE, -16
++ | ld PC, FRAME_PC(BASE)
++ | call_extern modf
++ | fsd FRET1, -8(BASE)
++ | li RD, (2+1)*8
++ | j ->fff_res
++ |
++ |.macro math_minmax, name, ismax, fpins
++ | .ffunc_1 name
++ | add RB, BASE, NARGS8:RC
++ | addi RA, BASE, 8
++ | checkint CARG1, >4
++ |1: // Handle integers.
++ | ld CARG2, 0(RA)
++ | bxeq RA, RB, ->fff_restv
++ | sext.w CARG1, CARG1
++ | checkint CARG2, >3
++ | sext.w CARG2, CARG2
++ | slt TMP0, CARG1, CARG2
++ |.if ismax
++ | addi TMP1, TMP0, -1
++ |.else
++ | neg TMP1, TMP0
++ |.endif
++ | and CARG1, CARG1, TMP1
++ | not TMP1, TMP1
++ | and CARG2, CARG2, TMP1
++ | or CARG1, CARG1, CARG2
++ | addi RA, RA, 8
++ | zext.w CARG1, CARG1
++ | settp_b CARG1, TISNUM
++ | j <1
++ |3: // Convert intermediate result to number and continue below.
++ | fcvt.d.w FARG1, CARG1
++ | checknum CARG2, ->fff_fallback
++ | fld FARG2, 0(RA)
++ | j >6
++ |
++ |4:
++ | fld FARG1, 0(BASE)
++ | checknum CARG1, ->fff_fallback
++ |5: // Handle numbers.
++ | ld CARG2, 0(RA)
++ | fld FARG2, 0(RA)
++ | bxgeu RA, RB, ->fff_resn
++ | checknum CARG2, >7
++ |6:
++ | fpins FARG1, FARG1, FARG2
++ | addi RA, RA, 8
++ | j <5
++ |7: // Convert integer to number and continue above.
++ | checkint CARG2, ->fff_fallback
++ | fcvt.d.w FARG2, CARG2
++ | j <6
++ |.endmacro
++ |
++ | math_minmax math_min, 0, fmin.d
++ | math_minmax math_max, 1, fmax.d
++ |
++ |//-- String library -----------------------------------------------------
++ |
++ |.ffunc string_byte // Only handle the 1-arg case here.
++ | ld CARG1, 0(BASE)
++ | gettp TMP0, CARG1
++ | xori TMP1, NARGS8:RC, 8
++ | addi TMP0, TMP0, -LJ_TSTR
++ | or TMP1, TMP1, TMP0
++ | cleartp STR:CARG1
++ | bxnez TMP1, ->fff_fallback // Need exactly 1 string argument.
++ | lw TMP0, STR:CARG1->len
++ | ld PC, FRAME_PC(BASE)
++ | snez RD, TMP0
++ | lbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end).
++ | addiw RD, RD, 1
++ | slliw RD, RD, 3 // RD = ((str->len != 0)+1)*8
++ | settp_b TMP2, TISNUM
++ | sd TMP2, -16(BASE)
++ | j ->fff_res
++ |
++ |.ffunc string_char // Only handle the 1-arg case here.
++ | ffgccheck
++ | ld CARG1, 0(BASE)
++ | gettp TMP0, CARG1
++ | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument.
++ | addi TMP0, TMP0, -LJ_TISNUM // Integer.
++ | li TMP2, 255
++ | sext.w CARG1, CARG1
++ | or TMP1, TMP1, TMP0
++ | sltu TMP2, TMP2, CARG1 // !(255 < n).
++ | or TMP1, TMP1, TMP2
++ | li CARG3, 1
++ | bxnez TMP1, ->fff_fallback
++ | addi CARG2, sp, TMPD_OFS
++ | sb CARG1, TMPD(sp)
++ |->fff_newstr:
++ | sd BASE, L->base
++ | sd PC, SAVE_PC(sp)
++ | mv CARG1, L
++ | // (lua_State *L, const char *str, size_t l)
++ | call_intern fff_newstr, lj_str_new
++ | // Returns GCstr *.
++ | ld BASE, L->base
++ |->fff_resstr:
++ | li TMP1, LJ_TSTR
++ | settp CRET1, TMP1
++ | j ->fff_restv
++ |
++ |.ffunc string_sub
++ | ffgccheck
++ | ld CARG1, 0(BASE)
++ | ld CARG2, 8(BASE)
++ | ld CARG3, 16(BASE)
++ | addi TMP0, NARGS8:RC, -16
++ | gettp TMP1, CARG1
++ | bxltz TMP0, ->fff_fallback
++ | cleartp STR:CARG1, CARG1
++ | li CARG4, -1
++ | beqz TMP0, >1
++ | sext.w CARG4, CARG3
++ | checkint CARG3, ->fff_fallback
++ |1:
++ | checkint CARG2, ->fff_fallback
++ | addi TMP0, TMP1, -LJ_TSTR
++ | sext.w CARG3, CARG2
++ | bxnez TMP0, ->fff_fallback
++ | lw CARG2, STR:CARG1->len
++ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
++ | addiw TMP0, CARG2, 1
++ | bgez CARG4, >2
++ | addw CARG4, CARG4, TMP0 // if (end < 0) end += len+1
++ |2:
++ | bgez CARG3, >3
++ | addw CARG3, CARG3, TMP0 // if (start < 0) start += len+1
++ |3:
++ | bgez CARG4, >4
++ | mv CARG4, x0 // if (end < 0) end = 0
++ |4:
++ | bgtz CARG3, >5
++ | li CARG3, 1 // if (start < 1) start = 1
++ |5:
++ | ble CARG4, CARG2, >6
++ | mv CARG4, CARG2 // if (end > len) end = len
++ |6:
++ | add CARG2, STR:CARG1, CARG3
++ | sub CARG3, CARG4, CARG3 // len = end - start
++ | addi CARG2, CARG2, sizeof(GCstr)-1
++ | addiw CARG3, CARG3, 1 // len += 1
++ | bxgez CARG3, ->fff_newstr
++ |->fff_emptystr: // Return empty string.
++ | li TMP1, LJ_TSTR
++ | addi STR:CARG1, GL, offsetof(global_State, strempty)
++ | settp CARG1, TMP1
++ | j ->fff_restv
++ |
++ |.macro ffstring_op, name
++ | .ffunc string_ .. name
++ | ffgccheck
++ | ld CARG2, 0(BASE)
++ | bxeqz NARGS8:RC, ->fff_fallback
++ | checkstr STR:CARG2, ->fff_fallback
++ | addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf)
++ | ld TMP0, SBUF:CARG1->b
++ | sd L, SBUF:CARG1->L
++ | sd BASE, L->base
++ | sd TMP0, SBUF:CARG1->w
++ | sd PC, SAVE_PC(sp)
++ | call_intern ff_string_ .. name, lj_buf_putstr_ .. name
++ |// mv SBUF:CARG1, SBUF:CRET1
++ | call_intern ff_string_ .. name, lj_buf_tostr
++ | ld BASE, L->base
++ | j ->fff_resstr
++ |.endmacro
++ |
++ |ffstring_op reverse
++ |ffstring_op lower
++ |ffstring_op upper
++ |
++ |//-- Bit library --------------------------------------------------------
++ |
++ |->vm_tobit_fb:
++ | fld FARG1, 0(BASE)
++ | bxeqz TMP1, ->fff_fallback
++ | fadd.d FARG1, FARG1, TOBIT
++ | fmv.x.w CRET1, FARG1
++ | zext.w CRET1, CRET1
++ | ret
++ |
++ |.macro .ffunc_bit, name
++ | .ffunc_1 bit_..name
++ | gettp TMP0, CARG1
++ | zext.w CRET1, CARG1
++ | beq TMP0, TISNUM, >1
++ | sltiu TMP1, TMP0, LJ_TISNUM
++ | jal ->vm_tobit_fb
++ |1:
++ |.endmacro
++ |
++ |.macro .ffunc_bit_op, name, bins
++ | .ffunc_bit name
++ | addi TMP2, BASE, 8
++ | add TMP3, BASE, NARGS8:RC
++ |1:
++ | ld TMP1, 0(TMP2)
++ | bxeq TMP2, TMP3, ->fff_resi
++ | gettp TMP0, TMP1
++ | addi TMP2, TMP2, 8
++ | bne TMP0, TISNUM, >2
++ | zext.w TMP1, TMP1
++ | bins CRET1, CRET1, TMP1
++ | j <1
++ |2:
++ | fld FARG1, -8(TMP2)
++ | sltiu TMP0, TMP0, LJ_TISNUM
++ | fadd.d FARG1, FARG1, TOBIT
++ | bxeqz TMP0, ->fff_fallback
++ | fmv.x.w TMP1, FARG1
++ | zext.w TMP1, TMP1
++ | bins CRET1, CRET1, TMP1
++ | j <1
++ |.endmacro
++ |
++ |.ffunc_bit_op band, and
++ |.ffunc_bit_op bor, or
++ |.ffunc_bit_op bxor, xor
++ |
++ |.ffunc_bit bswap
++ | srliw CARG2, CARG1, 8
++ | lui CARG3, 16
++ | addiw CARG3, CARG3, -256
++ | and CARG2, CARG2, CARG3
++ | srliw CARG3, CARG1, 24
++ | or CARG2, CARG2, CARG3
++ | slli CARG3, CARG1, 8
++ | lui CARG4, 0x00ff0
++ | and CARG3, CARG3, CARG4
++ | slli CARG1, CARG1, 24
++ | or CARG1, CARG1, CARG3
++ | or CARG1, CARG1, CARG2
++ | slli CARG1, CARG1, 32
++ | srli CARG1, CARG1, 32
++ | j ->fff_resi
++ |
++ |.ffunc_bit tobit
++ |->fff_resi:
++ | settp CARG1, TISNUM // CARG1 = CRET1
++ | j ->fff_restv
++ |
++ |.ffunc_bit bnot
++ | not CRET1, CRET1
++ | zext.w CRET1, CRET1
++ | j ->fff_resi
++ |
++ |.macro .ffunc_bit_sh, name, shins
++ | .ffunc_2 bit_..name
++ | gettp TMP0, CARG1
++ | beq TMP0, TISNUM, >1
++ | sltiu TMP1, TMP0, LJ_TISNUM
++ | jal ->vm_tobit_fb
++ |// mv CARG1, CRET1 // CARG1 = CRET1
++ |1:
++ | gettp TMP0, CARG2
++ | zext.w CARG2, CARG2
++ | bxne TMP0, TISNUM, ->fff_fallback
++ | sext.w CARG1, CARG1
++ | shins CRET1, CARG1, CARG2
++ | zext.w CRET1, CRET1
++ | j ->fff_resi
++ |.endmacro
++ |
++ |.ffunc_bit_sh lshift, sllw
++ |.ffunc_bit_sh rshift, srlw
++ |.ffunc_bit_sh arshift, sraw
++ |
++ |.macro .ffunc_bit_rot, name, rotinsa, rotinsb
++ | .ffunc_2 bit_..name
++ | gettp TMP0, CARG1
++ | beq TMP0, TISNUM, >1
++ | sltiu TMP1, TMP0, LJ_TISNUM
++ | jal ->vm_tobit_fb
++ |// mv CARG1, CRET1 // CARG1 = CRET1
++ |1:
++ | gettp TMP0, CARG2
++ | zext.w CARG2, CARG2
++ | bxne TMP0, TISNUM, ->fff_fallback
++ | sext.w CARG1, CARG1
++ | neg TMP2, CARG2
++ | rotinsa TMP1, CARG1, CARG2
++ | rotinsb TMP0, CARG1, TMP2
++ | or CRET1, TMP0, TMP1
++ | zext.w CRET1, CRET1
++ | j ->fff_resi
++ |.endmacro
++ |
++ |.ffunc_bit_rot rol, sllw, srlw
++ |.ffunc_bit_rot ror, srlw, sllw
++ |
++ |//-----------------------------------------------------------------------
++ |
++ |->fff_fallback: // Call fast function fallback handler.
++ | // BASE = new base, RB = CFUNC, RC = nargs*8
++ | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC.
++ | ld CARG3, CFUNC:RB->f
++ | add TMP1, BASE, NARGS8:RC
++ | sd BASE, L->base
++ | addi TMP0, TMP1, 8*LUA_MINSTACK
++ | ld TMP2, L->maxstack
++ | sd PC, SAVE_PC(sp) // Redundant (but a defined value).
++ | sd TMP1, L->top
++ | mv CARG1, L
++ | bltu TMP2, TMP0, >5 // Need to grow stack.
++ | jalr CARG3 // (lua_State *L)
++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
++ | ld BASE, L->base
++ | slliw RD, CRET1, 3
++ | bxgtz CRET1, ->fff_res // Returned nresults+1?
++ |1: // Returned 0 or -1: retry fast path.
++ | ld LFUNC:RB, FRAME_FUNC(BASE)
++ | ld TMP0, L->top
++ | sub NARGS8:RC, TMP0, BASE
++ | cleartp LFUNC:RB
++ | bxnez CRET1, ->vm_call_tail // Returned -1?
++ | ins_callt // Returned 0: retry fast path.
++ |
++ |// Reconstruct previous base for vmeta_call during tailcall.
++ |->vm_call_tail:
++ | andi TMP0, PC, FRAME_TYPE
++ | andi TMP1, PC, ~FRAME_TYPEP // TODO
++ | bnez TMP0, >3
++ | lbu TMP1, OFS_RA(PC)
++ | slliw TMP1, TMP1, 3
++ | addiw TMP1, TMP1, 16
++ |3:
++ | sub TMP2, BASE, TMP1
++ | j ->vm_call_dispatch // Resolve again for tailcall.
++ |
++ |5: // Grow stack for fallback handler.
++ | li CARG2, LUA_MINSTACK
++ | mv CARG1, L
++ | call_intern vm_call_tail, lj_state_growstack // (lua_State *L, int n)
++ | ld BASE, L->base
++ | mv CRET1, x0 // Set zero-flag to force retry.
++ | j <1
++ |
++ |->fff_gcstep: // Call GC step function.
++ | // BASE = new base, RC = nargs*8
++ | mv MULTRES, ra
++ | add TMP0, BASE, NARGS8:RC // Calculate L->top.
++ | sd BASE, L->base
++ | sd PC, SAVE_PC(sp) // Redundant (but a defined value).
++ | mv CARG1, L
++ | sd TMP0, L->top
++ | call_intern fff_gc_step, lj_gc_step // (lua_State *L)
++ | ld BASE, L->base
++ | mv ra, MULTRES // Help return address predictor.
++ | ld TMP0, L->top
++ | ld CFUNC:RB, FRAME_FUNC(BASE)
++ | cleartp CFUNC:RB
++ | sub NARGS8:RC, TMP0, BASE
++ | ret
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Special dispatch targets -------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_record: // Dispatch target for recording phase.
++ |.if JIT
++ | lbu TMP3, GL->hookmask
++ | andi TMP1, TMP3, HOOK_VMEVENT // No recording while in vmevent.
++ | bnez TMP1, >5
++ | // Decrement the hookcount for consistency, but always do the call.
++ | lw TMP2, GL->hookcount
++ | andi TMP1, TMP3, HOOK_ACTIVE
++ | bnez TMP1, >1
++ | addiw TMP2, TMP2, -1
++ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
++ | beqz TMP1, >1
++ | sw TMP2, GL->hookcount
++ | j >1
++ |.endif
++ |
++ |->vm_rethook: // Dispatch target for return hooks.
++ | lbu TMP3, GL->hookmask
++ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active?
++ | beqz TMP1, >1
++ |5: // Re-dispatch to static ins.
++ | ld TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4.
++ | jr TMP1
++ |
++ |->vm_inshook: // Dispatch target for instr/line hooks.
++ | lbu TMP3, GL->hookmask
++ | lw TMP2, GL->hookcount
++ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active?
++ | bnez TMP1, <5
++ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
++ | addiw TMP2, TMP2, -1
++ | beqz TMP1, <5
++ | sw TMP2, GL->hookcount
++ | beqz TMP2, >1
++ | andi TMP1, TMP3, LUA_MASKLINE
++ | beqz TMP1, <5
++ |1:
++ | sw MULTRES, TMPD(sp)
++ | mv CARG2, PC
++ | sd BASE, L->base
++ | mv CARG1, L
++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
++ | call_intern vm_inshook, lj_dispatch_ins // (lua_State *L, const BCIns *pc)
++ |3:
++ | ld BASE, L->base
++ |4: // Re-dispatch to static ins.
++ | lw INS, -4(PC)
++ | decode_OP8 TMP1, INS
++ | add TMP0, DISPATCH, TMP1
++ | decode_RD8a RD, INS
++ | ld TMP1, GG_DISP2STATIC(TMP0)
++ | decode_RA8 RA, INS
++ | decode_RD8b RD
++ | jr TMP1
++ |
++ |->cont_hook: // Continue from hook yield.
++ | addi PC, PC, 4
++ | lw MULTRES, -24(RB) // Restore MULTRES for *M ins.
++ | j <4
++ |
++ |->vm_hotloop: // Hot loop counter underflow.
++ |.if JIT
++ | ld LFUNC:TMP1, FRAME_FUNC(BASE)
++ | addi CARG1, GL, GG_G2J
++ | cleartp LFUNC:TMP1
++ | sd PC, SAVE_PC(sp)
++ | ld TMP1, LFUNC:TMP1->pc
++ | mv CARG2, PC
++ | sd L, (offsetof(jit_State, L))(CARG1)
++ | lbu TMP1, PC2PROTO(framesize)(TMP1)
++ | sd BASE, L->base
++ | slli TMP1, TMP1, 3
++ | add TMP1, BASE, TMP1
++ | sd TMP1, L->top
++ | call_intern vm_hotloop, lj_trace_hot // (jit_State *J, const BCIns *pc)
++ | j <3
++ |.endif
++ |
++ |
++ |->vm_callhook: // Dispatch target for call hooks.
++ | mv CARG2, PC
++ |.if JIT
++ | j >1
++ |.endif
++ |
++ |->vm_hotcall: // Hot call counter underflow.
++ |.if JIT
++ | ori CARG2, PC, 1
++ |1:
++ |.endif
++ | add TMP0, BASE, RC
++ | sd PC, SAVE_PC(sp)
++ | sd BASE, L->base
++ | sub RA, RA, BASE
++ | sd TMP0, L->top
++ | mv CARG1, L
++ | call_intern vm_hotcall, lj_dispatch_call // (lua_State *L, const BCIns *pc)
++ | // Returns ASMFunction.
++ | ld BASE, L->base
++ | ld TMP0, L->top
++ | sd x0, SAVE_PC(sp) // Invalidate for subsequent line hook.
++ | add RA, BASE, RA
++ | sub NARGS8:RC, TMP0, BASE
++ | ld LFUNC:RB, FRAME_FUNC(BASE)
++ | cleartp LFUNC:RB
++ | lw INS, -4(PC)
++ | jr CRET1
++ |
++ |->cont_stitch: // Trace stitching.
++ |.if JIT
++ | // RA = resultptr, RB = meta base
++ | lw INS, -4(PC)
++ | ld TRACE:TMP2, -40(RB) // Save previous trace.
++ | decode_RA8 RC, INS
++ | addi TMP1, MULTRES, -8
++ | cleartp TRACE:TMP2
++ | add RC, BASE, RC // Call base.
++ | beqz TMP1, >2
++ |1: // Move results down.
++ | ld CARG1, 0(RA)
++ | addi TMP1, TMP1, -8
++ | addi RA, RA, 8
++ | sd CARG1, 0(RC)
++ | addi RC, RC, 8
++ | bnez TMP1, <1
++ |2:
++ | decode_RA8 RA, INS
++ | decode_RB8 RB, INS
++ | add RA, RA, RB
++ | add RA, BASE, RA
++ |3:
++ | bltu RC, RA, >8 // More results wanted?
++ |
++ | lhu TMP3, TRACE:TMP2->traceno
++ | lhu RD, TRACE:TMP2->link
++ | bxeq RD, TMP3, ->cont_nop // Blacklisted.
++ | slliw RD, RD, 3
++ | bxnez RD, =>BC_JLOOP // Jump to stitched trace.
++ |
++ | // Stitch a new trace to the previous trace.
++ | addi CARG1, GL, GG_G2J
++ | // addi CARG2, CARG1, 1 // We don't care what's on the verge.
++ | addi CARG2, CARG1, 2047 // jit_State too large.
++ | sw TMP3, (offsetof(jit_State, exitno)-2047)(CARG2)
++ | sd L, (offsetof(jit_State, L)-2047)(CARG2)
++ | sd BASE, L->base
++ | mv CARG2, PC
++ | // (jit_State *J, const BCIns *pc)
++ | call_intern cont_stitch, lj_dispatch_stitch
++ | ld BASE, L->base
++ | j ->cont_nop
++ |
++ |8:
++ | sd TISNIL, 0(RC)
++ | addi RC, RC, 8
++ | j <3
++ |.endif
++ |
++ |->vm_profhook: // Dispatch target for profiler hook.
++#if LJ_HASPROFILE
++ | mv CARG1, L
++ | mv CARG2, PC
++ | sd BASE, L->base
++ | sw MULTRES, TMPD(sp)
++ | // (lua_State *L, const BCIns *pc)
++ | call_intern vm_profhook, lj_dispatch_profile
++ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
++ | addi PC, PC, -4
++ | ld BASE, L->base
++ | j ->cont_nop
++#endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Trace exit handler -------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |.macro savex_, a, b
++ | fsd f..a, a*8(sp)
++ | fsd f..b, b*8(sp)
++ | sd x..a, 32*8+a*8(sp)
++ | sd x..b, 32*8+b*8(sp)
++ |.endmacro
++ |
++ |->vm_exit_handler:
++ |.if JIT
++ | addi sp, sp, -(32*8+32*8)
++ | savex_ 0, 5
++ | savex_ 6, 7
++ | savex_ 8, 9
++ | savex_ 10, 11
++ | savex_ 12, 13
++ | savex_ 14, 15
++ | savex_ 16, 17
++ | savex_ 18, 19
++ | savex_ 20, 21
++ | savex_ 22, 23
++ | savex_ 24, 25
++ | savex_ 26, 27
++ | savex_ 28, 29
++ | savex_ 30, 31
++ | fsd f1, 1*8(sp)
++ | fsd f2, 2*8(sp)
++ | fsd f3, 3*8(sp)
++ | fsd f4, 4*8(sp)
++ | sd x0, 32*8+1*8(sp) // Clear RID_TMP.
++ | ld TMP1, 32*8+32*8(sp) // Load exit pc.
++ | addi TMP2, sp, 32*8+32*8 // Recompute original value of sp.
++ | addxi DISPATCH, GL, GG_G2DISP
++ | sd TMP2, 32*8+2*8(sp) // Store sp in RID_SP
++ | addi CARG1, GL, GG_G2J
++ | li_vmstate EXIT
++ | // addi CARG2, CARG1, 1 // We don't care what's on the verge.
++ | addi CARG2, CARG1, 2047 // jit_State too large.
++ | sub TMP1, TMP1, ra
++ | lw TMP2, 0(ra) // Load trace number.
++ | st_vmstate
++ | srli TMP1, TMP1, 2
++ | ld L, GL->cur_L
++ | ld BASE, GL->jit_base
++ | srli TMP2, TMP2, 12
++ | addi TMP1, TMP1, -2
++ | sd L, (offsetof(jit_State, L)-2047)(CARG2)
++ | sw TMP2, (offsetof(jit_State, parent)-2047)(CARG2) // Store trace number.
++ | sd BASE, L->base
++ | sw TMP1, (offsetof(jit_State, exitno)-2047)(CARG2) // Store exit number.
++ | sd x0, GL->jit_base
++ | mv CARG2, sp
++ | call_intern vm_exit_handler, lj_trace_exit // (jit_State *J, ExitState *ex)
++ | // Returns MULTRES (unscaled) or negated error code.
++ | ld TMP1, L->cframe
++ | ld BASE, L->base
++ | andi sp, TMP1, CFRAME_RAWMASK
++ | ld PC, SAVE_PC(sp) // Get SAVE_PC.
++ | sd L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield).
++ | j >1
++ |.endif
++ |
++ |->vm_exit_interp:
++ |.if JIT
++ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
++ | ld L, SAVE_L(sp)
++ | addxi DISPATCH, GL, GG_G2DISP
++ | sd BASE, L->base
++ |1:
++ | ld LFUNC:RB, FRAME_FUNC(BASE)
++ | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit.
++ | beqz TMP0, >9
++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
++ | slli MULTRES, CRET1, 3
++ | cleartp LFUNC:RB
++ | sw MULTRES, TMPD(sp)
++ | li TISNIL, LJ_TNIL
++ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
++ | slli TMP3, TMP3, 32
++ | ld TMP1, LFUNC:RB->pc
++ | sd x0, GL->jit_base
++ | ld KBASE, PC2PROTO(k)(TMP1)
++ | fmv.d.x TOBIT, TMP3
++ | // Modified copy of ins_next which handles function header dispatch, too.
++ | lw INS, 0(PC)
++ | addi PC, PC, 4
++ | addiw CRET1, CRET1, 17 // Static dispatch?
++ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
++ | sw TISNIL, GL->vmstate
++ | decode_RD8a RD, INS
++ | beqz CRET1, >5
++ | decode_OP8 TMP1, INS
++ | add TMP0, DISPATCH, TMP1
++ | sltiu TMP2, TMP1, BC_FUNCF*8
++ | ld TMP3, 0(TMP0)
++ | decode_RA8 RA, INS
++ | beqz TMP2, >2
++ | decode_RD8b RD
++ | jr TMP3
++ |2:
++ | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
++ | ld TMP1, FRAME_PC(BASE)
++ | bnez TMP2, >3
++ | // Check frame below fast function.
++ | andi TMP0, TMP1, FRAME_TYPE
++ | bnez TMP0, >3 // Trace stitching continuation?
++ | // Otherwise set KBASE for Lua function below fast function.
++ | lw TMP2, -4(TMP1)
++ | decode_RA8 TMP0, TMP2
++ | sub TMP1, BASE, TMP0
++ | ld LFUNC:TMP2, -32(TMP1)
++ | cleartp LFUNC:TMP2
++ | ld TMP1, LFUNC:TMP2->pc
++ | ld KBASE, PC2PROTO(k)(TMP1)
++ |3:
++ | addi RC, MULTRES, -8
++ | add RA, RA, BASE
++ | jr TMP3
++ |
++ |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
++ | ld TMP0, GL_J(trace)(GL)
++ | decode_RD8b RD
++ | add TMP0, TMP0, RD
++ | ld TRACE:TMP2, 0(TMP0)
++ | lw INS, TRACE:TMP2->startins
++ | decode_OP8 TMP1, INS
++ | add TMP0, DISPATCH, TMP1
++ | decode_RD8a RD, INS
++ | ld TMP3, GG_DISP2STATIC(TMP0)
++ | decode_RA8a RA, INS
++ | decode_RD8b RD
++ | decode_RA8b RA
++ | jr TMP3
++ |
++ |9: // Rethrow error from the right C frame.
++ | negw CARG2, CRET1
++ | mv CARG1, L
++ | call_intern vm_exit_interp, lj_err_trace // (lua_State *L, int errcode)
++ |.endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Math helper functions ----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |
++ |// Hard-float round to integer.
++ |// Modifies TMP0, TMP1, FARG1, FARG5, FTMP1, FTMP3, FTMP4
++ |.macro vm_round_hf, func
++ | lui TMP0, 0x43300 // Hiword of 2^52 (double).
++ | slli TMP0, TMP0, 32
++ | fmv.d.x FARG5, TMP0
++ | fabs.d FTMP4, FARG1 // |x|
++ | fmv.x.d TMP1, FARG1
++ | flt.d TMP0, FTMP4, FARG5
++ | fadd.d FTMP3, FTMP4, FARG5 // (|x| + 2^52) - 2^52
++ | fsub.d FTMP3, FTMP3, FARG5
++ | beqz TMP0, >5 // Truncate only if |x| < 2^52.
++ | sltz TMP1, TMP1
++ |.if "func" == "ceil"
++ | lui TMP0, 0xbff00 // Hiword of -1 (double).
++ |.else
++ | lui TMP0, 0x3ff00 // Hiword of +1 (double).
++ |.endif
++ |.if "func" == "trunc"
++ | slli TMP0, TMP0, 32
++ | fmv.d.x FARG5, TMP0
++ | flt.d TMP0, FTMP4, FRET1 // |x| < result?
++ | fsub.d FTMP4, FTMP3, FARG5
++ | beqz TMP0, >1
++ | fmv.d FTMP1, FTMP4
++ | j >2
++ |1:
++ | fmv.d FTMP1, FTMP3
++ |2:
++ | fneg.d FTMP4, FTMP1
++ | beqz TMP1, >3
++ | fmv.d FTMP3, FTMP4
++ | j >4
++ |3:
++ | fmv.d FTMP3, FTMP1
++ |4:
++ | ret
++ |.else
++ | fneg.d FTMP4, FTMP3
++ | slli TMP0, TMP0, 32
++ | fmv.d.x FARG5, TMP0
++ | beqz TMP1, >1
++ | fmv.d FTMP1, FTMP4
++ | j >2
++ |1:
++ | fmv.d FTMP1, FTMP3
++ |2:
++ |.if "func" == "ceil"
++ | flt.d TMP0, FTMP1, FARG1 // x > result?
++ |.else
++ | flt.d TMP0, FARG1, FTMP1 // x < result?
++ |.endif
++ | beqz TMP0, >3
++ | fsub.d FTMP4, FTMP1, FARG5 // If yes, subtract +-1.
++ | fmv.d FRET1, FTMP4
++ | j >4
++ |3:
++ | fmv.d FRET1, FTMP1
++ |4:
++ | ret
++ |.endif
++ |5:
++ | fmv.d FTMP3, FARG1
++ | ret
++ |.endmacro
++ |
++ |
++ |->vm_floor:
++ | vm_round_hf floor
++ |->vm_ceil:
++ | vm_round_hf ceil
++ |->vm_trunc:
++ |.if JIT
++ | vm_round_hf trunc
++ |.endif
++ |
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Miscellaneous functions --------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |// void lj_vm_fence_rw_rw()
++ |->vm_fence_rw_rw:
++ |.if JIT or FFI
++ | .long 0x0330000f
++ | ret
++ |.endif
++ |
++ |.define NEXT_TAB, TAB:CARG1
++ |.define NEXT_IDX, CARG2
++ |.define NEXT_ASIZE, CARG3
++ |.define NEXT_NIL, CARG4
++ |.define NEXT_TMP0, TMP0
++ |.define NEXT_TMP1, TMP1
++ |.define NEXT_TMP2, TMP2
++ |.define NEXT_RES_VK, CRET1
++ |.define NEXT_RES_IDX, CRET2
++ |.define NEXT_RES_PTR, sp
++ |.define NEXT_RES_VAL, 0(sp)
++ |.define NEXT_RES_KEY, 8(sp)
++ |
++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++ |// Next idx returned in CRET2.
++ |->vm_next:
++ |.if JIT
++ | lw NEXT_ASIZE, NEXT_TAB->asize
++ | ld NEXT_TMP0, NEXT_TAB->array
++ | li NEXT_NIL, LJ_TNIL
++ |1: // Traverse array part.
++ | bgeu NEXT_IDX, NEXT_ASIZE, >5
++ | slliw NEXT_TMP1, NEXT_IDX, 3
++ | add NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
++ | li TMP3, LJ_TISNUM
++ | ld NEXT_TMP2, 0(NEXT_TMP1)
++ | slli TMP3, TMP3, 47
++ | or NEXT_TMP1, NEXT_IDX, TMP3
++ | addiw NEXT_IDX, NEXT_IDX, 1
++ | beq NEXT_TMP2, NEXT_NIL, <1
++ | sd NEXT_TMP2, NEXT_RES_VAL
++ | sd NEXT_TMP1, NEXT_RES_KEY
++ | mv NEXT_RES_VK, NEXT_RES_PTR
++ | mv NEXT_RES_IDX, NEXT_IDX
++ | ret
++ |
++ |5: // Traverse hash part.
++ | subw NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
++ | lw NEXT_TMP0, NEXT_TAB->hmask
++ | ld NODE:NEXT_RES_VK, NEXT_TAB->node
++ | slliw NEXT_TMP2, NEXT_RES_IDX, 5
++ | slliw TMP3, NEXT_RES_IDX, 3
++ | subw TMP3, NEXT_TMP2, TMP3
++ | add NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3
++ |6:
++ | bltu NEXT_TMP0, NEXT_RES_IDX, >8
++ | ld NEXT_TMP2, NODE:NEXT_RES_VK->val
++ | addiw NEXT_RES_IDX, NEXT_RES_IDX, 1
++ | bne NEXT_TMP2, NEXT_NIL, >9
++ | // Skip holes in hash part.
++ | addi NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
++ | j <6
++ |
++ |8: // End of iteration. Set the key to nil (not the value).
++ | sd NEXT_NIL, NEXT_RES_KEY
++ | mv NEXT_RES_VK, NEXT_RES_PTR
++ |9:
++ | addw NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
++ | ret
++ |.endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- FFI helper functions -----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |// Handler for callback functions. Callback slot number in x5, g in x7.
++ |->vm_ffi_callback:
++ |.if FFI
++ |.type CTSTATE, CTState, PC
++ | saveregs
++ | ld CTSTATE, GL:x7->ctype_state
++ | mv GL, x7
++ | addxi DISPATCH, x7, GG_G2DISP
++ | srli x5, x5, 12
++ | sw x5, CTSTATE->cb.slot
++ | sd CARG1, CTSTATE->cb.gpr[0]
++ | fsd FARG1, CTSTATE->cb.fpr[0]
++ | sd CARG2, CTSTATE->cb.gpr[1]
++ | fsd FARG2, CTSTATE->cb.fpr[1]
++ | sd CARG3, CTSTATE->cb.gpr[2]
++ | fsd FARG3, CTSTATE->cb.fpr[2]
++ | sd CARG4, CTSTATE->cb.gpr[3]
++ | fsd FARG4, CTSTATE->cb.fpr[3]
++ | sd CARG5, CTSTATE->cb.gpr[4]
++ | fsd FARG5, CTSTATE->cb.fpr[4]
++ | sd CARG6, CTSTATE->cb.gpr[5]
++ | fsd FARG6, CTSTATE->cb.fpr[5]
++ | sd CARG7, CTSTATE->cb.gpr[6]
++ | fsd FARG7, CTSTATE->cb.fpr[6]
++ | sd CARG8, CTSTATE->cb.gpr[7]
++ | fsd FARG8, CTSTATE->cb.fpr[7]
++ | addi TMP0, sp, CFRAME_SPACE
++ | sd TMP0, CTSTATE->cb.stack
++ | sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok.
++ | mv CARG1, CTSTATE
++ | mv CARG2, sp
++ | call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf)
++ | // Returns lua_State *.
++ | ld BASE, L:CRET1->base
++ | ld RC, L:CRET1->top
++ | mv L, CRET1
++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
++ | ld LFUNC:RB, FRAME_FUNC(BASE)
++ | li TISNIL, LJ_TNIL
++ | li TISNUM, LJ_TISNUM
++ | slli TMP3, TMP3, 32
++ | li_vmstate INTERP
++ | subw RC, RC, BASE
++ | cleartp LFUNC:RB
++ | st_vmstate
++ | fmv.d.x TOBIT, TMP3
++ | ins_callt
++ |.endif
++ |
++ |->cont_ffi_callback: // Return from FFI callback.
++ |.if FFI
++ | ld CTSTATE, GL->ctype_state
++ | sd BASE, L->base
++ | sd RB, L->top
++ | sd L, CTSTATE->L
++ | mv CARG1, CTSTATE
++ | mv CARG2, RA
++ | // (CTState *cts, TValue *o)
++ | call_intern cont_ffi_callback, lj_ccallback_leave
++ | fld FRET1, CTSTATE->cb.fpr[0]
++ | ld CRET1, CTSTATE->cb.gpr[0]
++ | fld FRET2, CTSTATE->cb.fpr[1]
++ | ld CRET2, CTSTATE->cb.gpr[1]
++ | j ->vm_leave_unw
++ |.endif
++ |
++ |->vm_ffi_call: // Call C function via FFI.
++ | // Caveat: needs special frame unwinding, see below.
++ |.if FFI
++ | .type CCSTATE, CCallState, CARG1
++ | lw TMP1, CCSTATE->spadj
++ | lbu CARG2, CCSTATE->nsp
++ | lbu CARG3, CCSTATE->nfpr
++ | mv TMP2, sp
++ | sub sp, sp, TMP1
++ | sd ra, -8(TMP2)
++ | sd x18, -16(TMP2)
++ | sd CCSTATE, -24(TMP2)
++ | mv x18, TMP2
++ | addi TMP1, CCSTATE, offsetof(CCallState, stack)
++ | mv TMP2, sp
++ | add TMP3, TMP1, CARG2
++ | beqz CARG2, >2
++ |1:
++ | ld TMP0, 0(TMP1)
++ | addi TMP1, TMP1, 8
++ | sd TMP0, 0(TMP2)
++ | addi TMP2, TMP2, 8
++ | bltu TMP1, TMP3, <1
++ |2:
++ | beqz CARG3, >3
++ | fld FARG1, CCSTATE->fpr[0]
++ | fld FARG2, CCSTATE->fpr[1]
++ | fld FARG3, CCSTATE->fpr[2]
++ | fld FARG4, CCSTATE->fpr[3]
++ | fld FARG5, CCSTATE->fpr[4]
++ | fld FARG6, CCSTATE->fpr[5]
++ | fld FARG7, CCSTATE->fpr[6]
++ | fld FARG8, CCSTATE->fpr[7]
++ |3:
++ | ld CFUNCADDR, CCSTATE->func
++ | ld CARG2, CCSTATE->gpr[1]
++ | ld CARG3, CCSTATE->gpr[2]
++ | ld CARG4, CCSTATE->gpr[3]
++ | ld CARG5, CCSTATE->gpr[4]
++ | ld CARG6, CCSTATE->gpr[5]
++ | ld CARG7, CCSTATE->gpr[6]
++ | ld CARG8, CCSTATE->gpr[7]
++ | ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
++ | jalr CFUNCADDR
++ | ld CCSTATE:TMP1, -24(x18)
++ | ld TMP0, -16(x18)
++ | ld ra, -8(x18)
++ | sd CRET1, CCSTATE:TMP1->gpr[0]
++ | sd CRET2, CCSTATE:TMP1->gpr[1]
++ | fsd FRET1, CCSTATE:TMP1->fpr[0]
++ | fsd FRET2, CCSTATE:TMP1->fpr[1]
++ | mv sp, x18
++ | mv x18, TMP0
++ | ret
++ |.endif
++ |// Note: vm_ffi_call must be the last function in this object file!
++ |
++ |//-----------------------------------------------------------------------
++}
++
++/* Generate the code for a single instruction. */
++static void build_ins(BuildCtx *ctx, BCOp op, int defop)
++{
++ int vk = 0;
++ |=>defop:
++
++ switch (op) {
++
++ /* -- Comparison ops ---------------------------------------------------- */
++
++ /* Remember: all ops branch for a true comparison, fall through otherwise. */
++
++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
++ | // RA = src1*8, RD = src2*8, JMP with RD = target
++ | add RA, BASE, RA
++ | add RD, BASE, RD
++ if (op == BC_ISLT || op == BC_ISGE) {
++ | ld CARG1, 0(RA)
++ | ld CARG2, 0(RD)
++ | gettp CARG3, CARG1
++ | gettp CARG4, CARG2
++ } else {
++ | ld CARG2, 0(RA)
++ | ld CARG1, 0(RD)
++ | gettp CARG3, CARG2
++ | gettp CARG4, CARG1
++ }
++ | lhu TMP2, OFS_RD(PC) // TMP2=jump
++ | addi PC, PC, 4
++ | bne CARG3, TISNUM, >2
++ | decode_BC4b TMP2
++ | bne CARG4, TISNUM, >5
++ | sext.w CARG1, CARG1
++ | sext.w CARG2, CARG2
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | slt TMP1, CARG1, CARG2
++ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
++ if (op == BC_ISLT || op == BC_ISGT) {
++ | neg TMP1, TMP1
++ } else {
++ | addi TMP1, TMP1, -1
++ }
++ | and TMP2, TMP2, TMP1
++ |1:
++ | add PC, PC, TMP2
++ | ins_next
++ |
++ |2: // RA is not an integer.
++ | sltiu TMP1, CARG3, LJ_TISNUM
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | bxeqz TMP1, ->vmeta_comp
++ | sltiu TMP1, CARG4, LJ_TISNUM
++ | decode_BC4b TMP2
++ | beqz TMP1, >4
++ | fmv.d.x FTMP0, CARG1
++ | fmv.d.x FTMP2, CARG2
++ |3: // RA and RD are both numbers.
++ | addw TMP2, TMP2, TMP3
++ if (op == BC_ISLT) {
++ | flt.d TMP3, FTMP0, FTMP2
++ | neg TMP3, TMP3
++ } else if (op == BC_ISGE) {
++ | flt.d TMP3, FTMP0, FTMP2
++ | addi TMP3, TMP3, -1
++ } else if (op == BC_ISLE) {
++ | fle.d TMP3, FTMP2, FTMP0
++ | neg TMP3, TMP3
++ } else if (op == BC_ISGT) {
++ | fle.d TMP3, FTMP2, FTMP0
++ | addi TMP3, TMP3, -1
++ }
++ | and TMP2, TMP2, TMP3
++ | j <1
++ |
++ |4: // RA is a number, RD is not a number.
++ | // RA is a number, RD is an integer. Convert RD to a number.
++ | bxne CARG4, TISNUM, ->vmeta_comp
++ if (op == BC_ISLT || op == BC_ISGE) {
++ | fcvt.d.w FTMP2, CARG2
++ | fmv.d.x FTMP0, CARG1
++ } else {
++ | fcvt.d.w FTMP0, CARG1
++ | fmv.d.x FTMP2, CARG2
++ }
++ | j <3
++ |
++ |5: // RA is an integer, RD is not an integer
++ | sltiu TMP1, CARG4, LJ_TISNUM
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | bxeqz TMP1, ->vmeta_comp
++ | // RA is an integer, RD is a number. Convert RA to a number.
++ if (op == BC_ISLT || op == BC_ISGE) {
++ | fcvt.d.w FTMP0, CARG1
++ | fmv.d.x FTMP2, CARG2
++ } else {
++ | fcvt.d.w FTMP2, CARG2
++ | fmv.d.x FTMP0, CARG1
++ }
++ | j <3
++ break;
++
++ case BC_ISEQV: case BC_ISNEV:
++ vk = op == BC_ISEQV;
++ | // RA = src1*8, RD = src2*8, JMP with RD = target
++ | add RA, BASE, RA
++ | add RD, BASE, RD
++ | addi PC, PC, 4
++ | ld CARG1, 0(RA)
++ | ld CARG2, 0(RD)
++ | lhu TMP2, -4+OFS_RD(PC)
++ | gettp CARG3, CARG1
++ | gettp CARG4, CARG2
++ | sltu TMP0, TISNUM, CARG3
++ | sltu TMP1, TISNUM, CARG4
++ | or TMP0, TMP0, TMP1
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ if (vk) {
++ | beqz TMP0, ->BC_ISEQN_Z
++ } else {
++ | beqz TMP0, ->BC_ISNEN_Z
++ }
++ |// Either or both types are not numbers.
++ |.if FFI
++ | // Check if RA or RD is a cdata.
++ | xori TMP0, CARG3, LJ_TCDATA
++ | xori TMP1, CARG4, LJ_TCDATA
++ | and TMP0, TMP0, TMP1
++ | bxeqz TMP0, ->vmeta_equal_cd
++ |.endif
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | decode_BC4b TMP2
++ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2
++ | bne CARG1, CARG2, >2
++ | // Tag and value are equal.
++ if (vk) {
++ |->BC_ISEQV_Z:
++ | add PC, PC, TMP2
++ }
++ |1:
++ | ins_next
++ |
++ |2: // Check if the tags are the same and it's a table or userdata.
++ | xor TMP3, CARG3, CARG4 // Same type?
++ | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1
++ | beqz TMP3, >3
++ | mv TMP0, x0 // TMP0=0: not same type, or same type table/userdata
++ |3:
++ | cleartp TAB:TMP1, CARG1
++ if (vk) {
++ | beqz TMP0, <1
++ } else {
++ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
++ }
++ | // Different tables or userdatas. Need to check __eq metamethod.
++ | // Field metatable must be at same offset for GCtab and GCudata!
++ | ld TAB:TMP3, TAB:TMP1->metatable
++ if (vk) {
++ | beqz TAB:TMP3, <1 // No metatable?
++ | lbu TMP3, TAB:TMP3->nomm
++ | andi TMP3, TMP3, 1<<MM_eq
++ | li TMP0, 0 // ne = 0
++ | bnez TMP3, <1 // Or 'no __eq' flag set?
++ } else {
++ | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
++ | lbu TMP3, TAB:TMP3->nomm
++ | andi TMP3, TMP3, 1<<MM_eq
++ | li TMP0, 1 // ne = 1
++ | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
++ }
++ | j ->vmeta_equal // Handle __eq metamethod.
++ break;
++
++ case BC_ISEQS: case BC_ISNES:
++ vk = op == BC_ISEQS;
++ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
++ | add RA, BASE, RA
++ | addi PC, PC, 4
++ | ld CARG1, 0(RA)
++ | sub RD, KBASE, RD
++ | lhu TMP2, -4+OFS_RD(PC)
++ | ld CARG2, -8(RD) // KBASE-8-str_const*8
++ |.if FFI
++ | gettp CARG3, CARG1
++ | li TMP1, LJ_TCDATA
++ |.endif
++ | li TMP0, LJ_TSTR
++ | decode_BC4b TMP2
++ | settp CARG2, TMP0
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ |.if FFI
++ | bxeq CARG3, TMP1, ->vmeta_equal_cd
++ |.endif
++ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D
++ | addw TMP2, TMP2, TMP3
++ if (vk) {
++ | seqz TMP4, TMP0
++ } else {
++ | snez TMP4, TMP0
++ }
++ | neg TMP4, TMP4
++ | and TMP2, TMP2, TMP4
++ | add PC, PC, TMP2
++ | ins_next
++ break;
++
++ case BC_ISEQN: case BC_ISNEN:
++ vk = op == BC_ISEQN;
++ | // RA = src*8, RD = num_const*8, JMP with RD = target
++ | add RA, BASE, RA
++ | add RD, KBASE, RD
++ | ld CARG1, 0(RA)
++ | ld CARG2, 0(RD)
++ | lhu TMP2, OFS_RD(PC)
++ | gettp CARG3, CARG1
++ | gettp CARG4, CARG2
++ | addi PC, PC, 4
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ if (vk) {
++ |->BC_ISEQN_Z:
++ } else {
++ |->BC_ISNEN_Z:
++ }
++ | decode_BC4b TMP2
++ | bne CARG3, TISNUM, >4
++ | addw TMP2, TMP2, TMP3
++ | bne CARG4, TISNUM, >6
++ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D
++ |1:
++ if (vk) {
++ | seqz TMP4, TMP0
++ | neg TMP4, TMP4
++ | and TMP2, TMP2, TMP4
++ | add PC, PC, TMP2
++ |2:
++ } else {
++ | snez TMP4, TMP0
++ | neg TMP4, TMP4
++ | and TMP2, TMP2, TMP4
++ |2:
++ | add PC, PC, TMP2
++ }
++ |3:
++ | ins_next
++ |
++ |4: // RA is not an integer.
++ | addw TMP2, TMP2, TMP3
++ |.if FFI
++ | bgeu CARG3, TISNUM, >7
++ |.else
++ | bgeu CARG3, TISNUM, <2
++ |.endif
++ | fmv.d.x FTMP0, CARG1
++ | fmv.d.x FTMP2, CARG2
++ | bne CARG4, TISNUM, >5
++ |// RA is a number, RD is an integer.
++ | fcvt.d.w FTMP2, CARG2
++ |
++ |5: // RA and RD are both numbers.
++ | feq.d TMP0, FTMP0, FTMP2
++ | seqz TMP0, TMP0
++ | j <1
++ |
++ |6: // RA is an integer, RD is a number.
++ |.if FFI
++ | bgeu CARG4, TISNUM, >8
++ |.else
++ | bgeu CARG4, TISNUM, <2
++ |.endif
++ | fcvt.d.w FTMP0, CARG1
++ | fmv.d.x FTMP2, CARG2
++ | j <5
++ |
++ |.if FFI
++ |7: // RA not int, not number
++ | li TMP0, LJ_TCDATA
++ | bne CARG3, TMP0, <2
++ | j ->vmeta_equal_cd
++ |
++ |8: // RD not int, not number
++ | li TMP0, LJ_TCDATA
++ | bne CARG4, TMP0, <2
++ | j ->vmeta_equal_cd
++ |.endif
++ break;
++
++ case BC_ISEQP: case BC_ISNEP:
++ vk = op == BC_ISEQP;
++ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
++ | add RA, BASE, RA
++ | srliw TMP0, RD, 3
++ | ld TMP1, 0(RA)
++ | not TMP0, TMP0 // ~TMP0: ~0 ~1 ~2
++ | lhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target
++ | gettp TMP1, TMP1
++ | addi PC, PC, 4
++ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D
++ |.if FFI
++ | li TMP3, LJ_TCDATA
++ | bxeq TMP1, TMP3, ->vmeta_equal_cd
++ |.endif
++ | decode_BC4b TMP2
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
++ if (vk) {
++ | seqz TMP4, TMP0
++ } else {
++ | snez TMP4, TMP0
++ }
++ | neg TMP4, TMP4
++ | and TMP2, TMP2, TMP4
++ | add PC, PC, TMP2
++ | ins_next
++ break;
++
++ /* -- Unary test and copy ops ------------------------------------------- */
++
++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
++ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
++ | add RD, BASE, RD
++ | lhu TMP2, OFS_RD(PC)
++ | ld TMP0, 0(RD)
++ | addi PC, PC, 4
++ | gettp TMP0, TMP0
++ | add RA, BASE, RA
++ | sltiu TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false
++ | decode_BC4b TMP2
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | ld CRET1, 0(RD)
++ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2
++ if (op == BC_IST || op == BC_ISTC) {
++ | beqz TMP0, >1
++ if (op == BC_ISTC) {
++ | sd CRET1, 0(RA)
++ }
++ } else {
++ | bnez TMP0, >1
++ if (op == BC_ISFC) {
++ | sd CRET1, 0(RA)
++ }
++ }
++ | add PC, PC, TMP2
++ |1:
++ | ins_next
++ break;
++
++ case BC_ISTYPE:
++ | // RA = src*8, RD = -type*8
++ | add TMP0, BASE, RA
++ | srliw TMP1, RD, 3
++ | ld TMP0, 0(TMP0)
++ | gettp TMP0, TMP0
++ | add TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0
++ | bxnez TMP0, ->vmeta_istype
++ | ins_next
++ break;
++ case BC_ISNUM:
++ | // RA = src*8, RD = -(TISNUM-1)*8
++ | add TMP0, BASE, RA
++ | ld TMP0, 0(TMP0)
++ | checknum TMP0, ->vmeta_istype
++ | ins_next
++ break;
++
++ /* -- Unary ops --------------------------------------------------------- */
++
++ case BC_MOV:
++ | // RA = dst*8, RD = src*8
++ | add RD, BASE, RD
++ | add RA, BASE, RA
++ | ld TMP0, 0(RD)
++ | ins_next1
++ | sd TMP0, 0(RA)
++ | ins_next2
++ break;
++ case BC_NOT:
++ | // RA = dst*8, RD = src*8
++ | add RD, BASE, RD
++ | add RA, BASE, RA
++ | ld TMP0, 0(RD)
++ | li TMP1, LJ_TTRUE
++ | ins_next1
++ | gettp TMP0, TMP0
++ | sltu TMP0, TMP1, TMP0
++ | addiw TMP0, TMP0, 1
++ | slli TMP0, TMP0, 47
++ | not TMP0, TMP0
++ | sd TMP0, 0(RA)
++ | ins_next2
++ break;
++ case BC_UNM:
++ | // RA = dst*8, RD = src*8
++ | add RB, BASE, RD
++ | add RA, BASE, RA
++ | ld TMP0, 0(RB)
++ | lui TMP1, 0x80000
++ | gettp CARG3, TMP0
++ | bne CARG3, TISNUM, >1
++ | negw TMP0, TMP0
++ | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
++ | zext.w TMP0, TMP0
++ | settp_b TMP0, TISNUM
++ | j >2
++ |1:
++ | sltiu TMP3, CARG3, LJ_TISNUM
++ | slli TMP1, TMP1, 32
++ | bxeqz TMP3, ->vmeta_unm
++ | xor TMP0, TMP0, TMP1 // sign => ~sign
++ |2:
++ | sd TMP0, 0(RA)
++ | ins_next
++ break;
++ case BC_LEN:
++ | // RA = dst*8, RD = src*8
++ | add CARG2, BASE, RD
++ | ld TMP0, 0(CARG2)
++ | add RA, BASE, RA
++ | gettp TMP1, TMP0
++ | addi TMP2, TMP1, -LJ_TSTR
++ | cleartp STR:CARG1, TMP0
++ | bnez TMP2, >2
++ | lwu CARG1, STR:CARG1->len
++ |1:
++ | settp_b CARG1, TISNUM
++ | sd CARG1, 0(RA)
++ | ins_next
++ |2:
++ | addi TMP2, TMP1, -LJ_TTAB
++ | bxnez TMP2, ->vmeta_len
++#if LJ_52
++ | ld TAB:TMP2, TAB:CARG1->metatable
++ | bnez TAB:TMP2, >9
++ |3:
++#endif
++ |->BC_LEN_Z:
++ | call_intern BC_LEN, lj_tab_len // (GCtab *t)
++ | // Returns uint32_t (but less than 2^31).
++ | j <1
++#if LJ_52
++ |9:
++ | lbu TMP0, TAB:TMP2->nomm
++ | andi TMP0, TMP0, 1<<MM_len
++ | bnez TMP0, <3 // 'no __len' flag set: done.
++ | j ->vmeta_len
++#endif
++ break;
++
++ /* -- Binary ops -------------------------------------------------------- */
++
++ |.macro fpmod, a, b, c
++ | fdiv.d FARG1, b, c
++ | jal ->vm_floor // floor(b/c)
++ | fmul.d a, FRET1, c
++ | fsub.d a, b, a // b - floor(b/c)*c
++ |.endmacro
++ |
++ |.macro ins_arithpre
++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
++ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
++ ||if (vk == 1) {
++ | // RA = dst*8, RB = num_const*8, RC = src1*8
++ | decode_RB8 RC, INS
++ | decode_RDtoRC8 RB, RD
++ ||} else {
++ | // RA = dst*8, RB = src1*8, RC = num_const*8
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ ||}
++ ||switch (vk) {
++ ||case 0: // suffix is VN
++ | add RB, BASE, RB
++ | add RC, KBASE, RC
++ || break;
++ ||case 1: // suffix is NV
++ | add RC, BASE, RC
++ | add RB, KBASE, RB
++ || break;
++ ||default: // CAT or suffix is VV
++ | add RB, BASE, RB
++ | add RC, BASE, RC
++ || break;
++ ||}
++ |.endmacro
++ |
++ |.macro ins_arithfp, fpins, itype1, itype2
++ | fld FTMP0, 0(RB)
++ | sltu itype1, itype1, TISNUM
++ | sltu itype2, itype2, TISNUM
++ | fld FTMP2, 0(RC)
++ | and itype1, itype1, itype2
++ | add RA, BASE, RA
++ | bxeqz itype1, ->vmeta_arith
++ | fpins FRET1, FTMP0, FTMP2
++ | ins_next1
++ | fsd FRET1, 0(RA)
++ | ins_next2
++ |.endmacro
++ |
++ |.macro ins_arithead, itype1, itype2, tval1, tval2
++ | ld tval1, 0(RB)
++ | ld tval2, 0(RC)
++ | // Check for two integers.
++ | gettp itype1, tval1
++ | gettp itype2, tval2
++ |.endmacro
++ |
++ |.macro ins_arithdn, intins, fpins
++ | ins_arithpre
++ | ins_arithead TMP0, TMP1, CARG1, CARG2
++ | bne TMP0, TISNUM, >1
++ | bne TMP1, TISNUM, >1
++ | sext.w CARG3, CARG1
++ | sext.w CARG4, CARG2
++ |.if "intins" == "addw"
++ | intins CRET1, CARG3, CARG4
++ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
++ | xor TMP2, CRET1, CARG4
++ | and TMP1, TMP1, TMP2
++ | add RA, BASE, RA
++ | bxltz TMP1, ->vmeta_arith
++ |.elif "intins" == "subw"
++ | intins CRET1, CARG3, CARG4
++ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
++ | xor TMP2, CARG3, CARG4
++ | and TMP1, TMP1, TMP2
++ | add RA, BASE, RA
++ | bxltz TMP1, ->vmeta_arith
++ |.elif "intins" == "mulw"
++ | mul TMP2, CARG3, CARG4
++ | add RA, BASE, RA
++ | sext.w CRET1, TMP2
++ | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow.
++ |.endif
++ | zext.w CRET1, CRET1
++ | settp_b CRET1, TISNUM
++ | sd CRET1, 0(RA)
++ | ins_next
++ |1: // Check for two numbers.
++ | ins_arithfp, fpins, TMP0, TMP1
++ |.endmacro
++ |
++ |.macro ins_arithdiv, fpins
++ | ins_arithpre
++ | ins_arithead TMP0, TMP1, CARG1, CARG2
++ | ins_arithfp, fpins, TMP0, TMP1
++ |.endmacro
++ |
++ |.macro ins_arithmod, fpins, BC
++ | ins_arithpre
++ | ins_arithead TMP0, TMP1, CARG1, CARG2
++ | bne TMP0, TISNUM, >1
++ | bne TMP1, TISNUM, >1
++ | sext.w CARG1, CARG1
++ | sext.w CARG2, CARG2
++ | add RA, BASE, RA
++ | bxeqz CARG2, ->vmeta_arith
++ | call_intern BC, lj_vm_modi
++ | zext.w CRET1, CRET1
++ | settp_b CRET1, TISNUM
++ | sd CRET1, 0(RA)
++ | ins_next
++ |1: // Check for two numbers.
++ | ins_arithfp, fpins, TMP0, TMP1
++ |.endmacro
++
++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
++ | ins_arithdn addw, fadd.d
++ break;
++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
++ | ins_arithdn subw, fsub.d
++ break;
++ case BC_MULVN: case BC_MULNV: case BC_MULVV:
++ | ins_arithdn mulw, fmul.d
++ break;
++ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
++ | ins_arithdiv fdiv.d
++ break;
++ case BC_MODVN:
++ | ins_arithmod fpmod, BC_MODVN
++ break;
++ case BC_MODNV:
++ | ins_arithmod fpmod, BC_MODNV
++ break;
++ case BC_MODVV:
++ | ins_arithmod fpmod, BC_MODVV
++ break;
++ case BC_POW:
++ | ins_arithpre
++ | ld CARG1, 0(RB)
++ | ld CARG2, 0(RC)
++ | gettp TMP0, CARG1
++ | gettp TMP1, CARG2
++ | sltiu TMP0, TMP0, LJ_TISNUM
++ | sltiu TMP1, TMP1, LJ_TISNUM
++ | and TMP0, TMP0, TMP1
++ | add RA, BASE, RA
++ | bxeqz TMP0, ->vmeta_arith
++ | fld FARG1, 0(RB)
++ | fld FARG2, 0(RC)
++ | call_extern pow
++ | ins_next1
++ | fsd FRET1, 0(RA)
++ | ins_next2
++ break;
++
++ case BC_CAT:
++ | // RA = dst*8, RB = src_start*8, RC = src_end*8
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ | sub CARG3, RC, RB
++ | sd BASE, L->base
++ | add CARG2, BASE, RC
++ | mv MULTRES, RB
++ |->BC_CAT_Z:
++ | srliw CARG3, CARG3, 3
++ | sd PC, SAVE_PC(sp)
++ | mv CARG1, L
++ | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left)
++ | // Returns NULL (finished) or TValue * (metamethod).
++ | ld BASE, L->base
++ | bxnez CRET1, ->vmeta_binop
++ | add RB, BASE, MULTRES
++ | ld TMP0, 0(RB)
++ | add RA, BASE, RA
++ | sd TMP0, 0(RA)
++ | ins_next
++ break;
++
++ /* -- Constant ops ------------------------------------------------------ */
++
++ case BC_KSTR:
++ | // RA = dst*8, RD = str_const*8 (~)
++ | sub TMP1, KBASE, RD
++ | li TMP2, LJ_TSTR
++ | ld TMP0, -8(TMP1) // KBASE-8-str_const*8
++ | add RA, BASE, RA
++ | settp TMP0, TMP2
++ | sd TMP0, 0(RA)
++ | ins_next
++ break;
++ case BC_KCDATA:
++ |.if FFI
++ | // RA = dst*8, RD = cdata_const*8 (~)
++ | sub TMP1, KBASE, RD
++ | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8
++ | li TMP2, LJ_TCDATA
++ | add RA, BASE, RA
++ | settp TMP0, TMP2
++ | sd TMP0, 0(RA)
++ | ins_next
++ |.endif
++ break;
++ case BC_KSHORT:
++ | // RA = dst*8, RD = int16_literal*8
++ | sraiw RD, INS, 16
++ | add RA, BASE, RA
++ | zext.w RD, RD
++ | ins_next1
++ | settp_b RD, TISNUM
++ | sd RD, 0(RA)
++ | ins_next2
++ break;
++ case BC_KNUM:
++ | // RA = dst*8, RD = num_const*8
++ | add RD, KBASE, RD
++ | add RA, BASE, RA
++ | ld TMP0, 0(RD)
++ | ins_next1
++ | sd TMP0, 0(RA)
++ | ins_next2
++ break;
++ case BC_KPRI:
++ | // RA = dst*8, RD = primitive_type*8 (~)
++ | add RA, BASE, RA
++ | slli TMP0, RD, 44 // 44+3
++ | not TMP0, TMP0
++ | ins_next1
++ | sd TMP0, 0(RA)
++ | ins_next2
++ break;
++ case BC_KNIL:
++ | // RA = base*8, RD = end*8
++ | add RA, BASE, RA
++ | sd TISNIL, 0(RA)
++ | addi RA, RA, 8
++ | add RD, BASE, RD
++ |1:
++ | sd TISNIL, 0(RA)
++ | slt TMP0, RA, RD
++ | addi RA, RA, 8
++ | bnez TMP0, <1
++ | ins_next
++ break;
++
++ /* -- Upvalue and function ops ------------------------------------------ */
++
++ case BC_UGET:
++ | // RA = dst*8, RD = uvnum*8
++ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
++ | add RA, BASE, RA
++ | cleartp LFUNC:TMP0
++ | add RD, RD, LFUNC:TMP0
++ | ld UPVAL:TMP0, LFUNC:RD->uvptr
++ | ld TMP1, UPVAL:TMP0->v
++ | ld TMP2, 0(TMP1)
++ | ins_next1
++ | sd TMP2, 0(RA)
++ | ins_next2
++ break;
++ case BC_USETV:
++ | // RA = uvnum*8, RD = src*8
++ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
++ | add RD, BASE, RD
++ | cleartp LFUNC:TMP0
++ | add RA, RA, LFUNC:TMP0
++ | ld UPVAL:TMP0, LFUNC:RA->uvptr
++ | ld CRET1, 0(RD)
++ | lbu TMP3, UPVAL:TMP0->marked
++ | ld CARG2, UPVAL:TMP0->v
++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
++ | lbu TMP0, UPVAL:TMP0->closed
++ | gettp TMP2, CRET1
++ | sd CRET1, 0(CARG2)
++ | or TMP3, TMP3, TMP0
++ | li TMP0, LJ_GC_BLACK|1
++ | addi TMP2, TMP2, -(LJ_TNUMX+1)
++ | beq TMP3, TMP0, >2 // Upvalue is closed and black?
++ |1:
++ | ins_next
++ |
++ |2: // Check if new value is collectable.
++ | sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
++ | cleartp GCOBJ:CRET1, CRET1
++ | beqz TMP0, <1 // tvisgcv(v)
++ | lbu TMP3, GCOBJ:CRET1->gch.marked
++ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
++ | beqz TMP3, <1
++ | // Crossed a write barrier. Move the barrier forward.
++ | mv CARG1, GL
++ | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv)
++ | j <1
++ break;
++ case BC_USETS:
++ | // RA = uvnum*8, RD = str_const*8 (~)
++ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
++ | sub TMP1, KBASE, RD
++ | cleartp LFUNC:TMP0
++ | add RA, RA, LFUNC:TMP0
++ | ld UPVAL:TMP0, LFUNC:RA->uvptr
++ | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
++ | lbu TMP2, UPVAL:TMP0->marked
++ | ld CARG2, UPVAL:TMP0->v
++ | lbu TMP3, STR:TMP1->marked
++ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv)
++ | lbu TMP2, UPVAL:TMP0->closed
++ | li TMP0, LJ_TSTR
++ | settp TMP1, TMP0
++ | sd TMP1, 0(CARG2)
++ | bnez TMP4, >2
++ |1:
++ | ins_next
++ |
++ |2: // Check if string is white and ensure upvalue is closed.
++ | beqz TMP2, <1
++ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str)
++ | beqz TMP0, <1
++ | // Crossed a write barrier. Move the barrier forward.
++ | mv CARG1, GL
++ | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv)
++ | j <1
++ break;
++ case BC_USETN:
++ | // RA = uvnum*8, RD = num_const*8
++ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
++ | add RD, KBASE, RD
++ | cleartp LFUNC:TMP0
++ | add TMP0, RA, LFUNC:TMP0
++ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr
++ | ld TMP1, 0(RD)
++ | ld TMP0, UPVAL:TMP0->v
++ | sd TMP1, 0(TMP0)
++ | ins_next
++ break;
++ case BC_USETP:
++ | // RA = uvnum*8, RD = primitive_type*8 (~)
++ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
++ | slli TMP2, RD, 44
++ | cleartp LFUNC:TMP0
++ | add TMP0, RA, LFUNC:TMP0
++ | not TMP2, TMP2
++ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr
++ | ld TMP1, UPVAL:TMP0->v
++ | sd TMP2, 0(TMP1)
++ | ins_next
++ break;
++
++ case BC_UCLO:
++ | // RA = level*8, RD = target
++ | ld TMP2, L->openupval
++ | branch_RD // Do this first since RD is not saved.
++ | sd BASE, L->base
++ | mv CARG1, L
++ | beqz TMP2, >1
++ | add CARG2, BASE, RA
++ | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level)
++ | ld BASE, L->base
++ |1:
++ | ins_next
++ break;
++
++ case BC_FNEW:
++ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
++ | sub TMP1, KBASE, RD
++ | ld CARG3, FRAME_FUNC(BASE)
++ | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8
++ | sd BASE, L->base
++ | sd PC, SAVE_PC(sp)
++ | cleartp CARG3
++ | mv CARG1, L
++ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
++ | call_intern BC_FNEW, lj_func_newL_gc
++ | // Returns GCfuncL *.
++ | li TMP0, LJ_TFUNC
++ | ld BASE, L->base
++ | settp CRET1, TMP0
++ | add RA, BASE, RA
++ | sd CRET1, 0(RA)
++ | ins_next
++ break;
++
++ /* -- Table ops --------------------------------------------------------- */
++
++ case BC_TNEW:
++ case BC_TDUP:
++ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
++ | ld TMP0, GL->gc.total
++ | ld TMP1, GL->gc.threshold
++ | sd BASE, L->base
++ | sd PC, SAVE_PC(sp)
++ | bgeu TMP0, TMP1, >5
++ |1:
++ if (op == BC_TNEW) {
++ | srliw CARG2, RD, 3
++ | andi CARG2, CARG2, 0x7ff
++ | lzi TMP0, 0x801
++ | addiw TMP2, CARG2, -0x7ff
++ | srliw CARG3, RD, 14
++ | seqz TMP4, TMP2
++ | neg TMP4, TMP4
++ | and TMP0, TMP0, TMP4
++ | not TMP4, TMP4
++ | and CARG2, CARG2, TMP4
++ | or CARG2, CARG2, TMP0
++ | mv CARG1, L
++ | // (lua_State *L, int32_t asize, uint32_t hbits)
++ | call_intern BC_TNEW, lj_tab_new
++ | // Returns Table *.
++ } else {
++ | sub TMP1, KBASE, RD
++ | mv CARG1, L
++ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8
++ | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt)
++ | // Returns Table *.
++ }
++ | li TMP0, LJ_TTAB
++ | ld BASE, L->base
++ | ins_next1
++ | settp CRET1, TMP0
++ | add RA, BASE, RA
++ | sd CRET1, 0(RA)
++ | ins_next2
++ |5:
++ | mv MULTRES, RD
++ | mv CARG1, L
++ if (op == BC_TNEW) {
++ | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L)
++ } else {
++ | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L)
++ }
++ | mv RD, MULTRES
++ | j <1
++ break;
++
++ case BC_GGET:
++ | // RA = dst*8, RD = str_const*8 (~)
++ case BC_GSET:
++ | // RA = src*8, RD = str_const*8 (~)
++ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
++ | sub TMP1, KBASE, RD
++ | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8
++ | cleartp LFUNC:TMP0
++ | ld TAB:RB, LFUNC:TMP0->env
++ | add RA, BASE, RA
++ if (op == BC_GGET) {
++ | j ->BC_TGETS_Z
++ } else {
++ | j ->BC_TSETS_Z
++ }
++ break;
++
++ case BC_TGETV:
++ | // RA = dst*8, RB = table*8, RC = key*8
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add CARG2, BASE, RB
++ | add CARG3, BASE, RC
++ | ld TAB:RB, 0(CARG2)
++ | ld TMP2, 0(CARG3)
++ | add RA, BASE, RA
++ | checktab TAB:RB, ->vmeta_tgetv
++ | gettp TMP3, TMP2
++ | lw TMP0, TAB:RB->asize
++ | bne TMP3, TISNUM, >5 // Integer key?
++ | sext.w TMP2, TMP2
++ | ld TMP1, TAB:RB->array
++ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part?
++ | slliw TMP2, TMP2, 3
++ | add TMP2, TMP1, TMP2
++ | ld CRET1, 0(TMP2)
++ | beq CRET1, TISNIL, >2
++ |1:
++ | sd CRET1, 0(RA)
++ | ins_next
++ |
++ |2: // Check for __index if table value is nil.
++ | ld TAB:TMP2, TAB:RB->metatable
++ | beqz TAB:TMP2, <1 // No metatable: done.
++ | lbu TMP0, TAB:TMP2->nomm
++ | andi TMP0, TMP0, 1<<MM_index
++ | bnez TMP0, <1 // 'no __index' flag set: done.
++ | j ->vmeta_tgetv
++ |
++ |5:
++ | li TMP0, LJ_TSTR
++ | cleartp RC, TMP2
++ | bxne TMP3, TMP0, ->vmeta_tgetv // String key?
++ | j ->BC_TGETS_Z
++ break;
++ case BC_TGETS:
++ | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add CARG2, BASE, RB
++ | sub CARG3, KBASE, RC
++ | ld TAB:RB, 0(CARG2)
++ | add RA, BASE, RA
++ | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8
++ | checktab TAB:RB, ->vmeta_tgets1
++ |->BC_TGETS_Z:
++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
++ | lw TMP0, TAB:RB->hmask
++ | lw TMP1, STR:RC->sid
++ | ld NODE:TMP2, TAB:RB->node
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
++ | slliw TMP0, TMP1, 5
++ | slliw TMP1, TMP1, 3
++ | subw TMP1, TMP0, TMP1
++ | li TMP3, LJ_TSTR
++ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
++ | settp STR:RC, TMP3 // Tagged key to look for.
++ |1:
++ | ld CARG1, NODE:TMP2->key
++ | ld CARG2, NODE:TMP2->val
++ | ld NODE:TMP1, NODE:TMP2->next
++ | ld TAB:TMP3, TAB:RB->metatable
++ | bne CARG1, RC, >4
++ | beq CARG2, TISNIL, >5 // Key found, but nil value?
++ |3:
++ | sd CARG2, 0(RA)
++ | ins_next
++ |
++ |4: // Follow hash chain.
++ | mv NODE:TMP2, NODE:TMP1
++ | bnez NODE:TMP1, <1
++ | // End of hash chain: key not found, nil result.
++ |
++ |5: // Check for __index if table value is nil.
++ | mv CARG2, TISNIL
++ | beqz TAB:TMP3, <3 // No metatable: done.
++ | lbu TMP0, TAB:TMP3->nomm
++ | andi TMP0, TMP0, 1<<MM_index
++ | bnez TMP0, <3 // 'no __index' flag set: done.
++ | j ->vmeta_tgets
++ break;
++ case BC_TGETB:
++ | // RA = dst*8, RB = table*8, RC = index*8
++ | decode_RB8 RB, INS
++ | add CARG2, BASE, RB
++ | decode_RDtoRC8 RC, RD
++ | ld TAB:RB, 0(CARG2)
++ | add RA, BASE, RA
++ | srliw TMP0, RC, 3
++ | checktab TAB:RB, ->vmeta_tgetb
++ | lw TMP1, TAB:RB->asize
++ | ld TMP2, TAB:RB->array
++ | bxgeu TMP0, TMP1, ->vmeta_tgetb
++ | add RC, TMP2, RC
++ | ld CRET1, 0(RC)
++ | beq CRET1, TISNIL, >5
++ |1:
++ | sd CRET1, 0(RA)
++ | ins_next
++ |
++ |5: // Check for __index if table value is nil.
++ | ld TAB:TMP2, TAB:RB->metatable
++ | beqz TAB:TMP2, <1 // No metatable: done.
++ | lbu TMP1, TAB:TMP2->nomm
++ | andi TMP1, TMP1, 1<<MM_index
++ | bnez TMP1, <1 // 'no __index' flag set: done.
++ | j ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
++ break;
++ case BC_TGETR:
++ | // RA = dst*8, RB = table*8, RC = key*8
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add RB, BASE, RB
++ | add RC, BASE, RC
++ | ld TAB:CARG1, 0(RB)
++ | lw CARG2, 0(RC)
++ | add RA, BASE, RA
++ | cleartp TAB:CARG1
++ | lw TMP0, TAB:CARG1->asize
++ | ld TMP1, TAB:CARG1->array
++ | bxgeu CARG2, TMP0, ->vmeta_tgetr // In array part?
++ | slliw TMP2, CARG2, 3
++ | add TMP3, TMP1, TMP2
++ | ld TMP1, 0(TMP3)
++ |->BC_TGETR_Z:
++ | ins_next1
++ | sd TMP1, 0(RA)
++ | ins_next2
++ break;
++
++ case BC_TSETV:
++ | // RA = src*8, RB = table*8, RC = key*8
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add CARG2, BASE, RB
++ | add CARG3, BASE, RC
++ | ld TAB:RB, 0(CARG2)
++ | ld TMP2, 0(CARG3)
++ | add RA, BASE, RA
++ | checktab TAB:RB, ->vmeta_tsetv
++ | sext.w RC, TMP2
++ | checkint TMP2, >5
++ | lw TMP0, TAB:RB->asize
++ | ld TMP1, TAB:RB->array
++ | bxgeu RC, TMP0, ->vmeta_tsetv // Integer key and in array part?
++ | slliw TMP2, RC, 3
++ | add TMP1, TMP1, TMP2
++ | lbu TMP3, TAB:RB->marked
++ | ld TMP0, 0(TMP1)
++ | ld CRET1, 0(RA)
++ | beq TMP0, TISNIL, >3
++ |1:
++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
++ | sd CRET1, 0(TMP1)
++ | bnez TMP2, >7
++ |2:
++ | ins_next
++ |
++ |3: // Check for __newindex if previous value is nil.
++ | ld TAB:TMP2, TAB:RB->metatable
++ | beqz TAB:TMP2, <1 // No metatable: done.
++ | lbu TMP2, TAB:TMP2->nomm
++ | andi TMP2, TMP2, 1<<MM_newindex
++ | bnez TMP2, <1 // 'no __newindex' flag set: done.
++ | j ->vmeta_tsetv
++ |5:
++ | gettp TMP0, TMP2
++ | addi TMP0, TMP0, -LJ_TSTR
++ | bxnez TMP0, ->vmeta_tsetv
++ | cleartp STR:RC, TMP2
++ | j ->BC_TSETS_Z // String key?
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMP3, TMP0, <2
++ break;
++ case BC_TSETS:
++ | // RA = src*8, RB = table*8, RC = str_const*8 (~)
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add CARG2, BASE, RB
++ | sub CARG3, KBASE, RC
++ | ld TAB:RB, 0(CARG2)
++ | ld RC, -8(CARG3) // KBASE-8-str_const*8
++ | add RA, BASE, RA
++ | cleartp STR:RC
++ | checktab TAB:RB, ->vmeta_tsets1
++ |->BC_TSETS_Z:
++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
++ | lw TMP0, TAB:RB->hmask
++ | lw TMP1, STR:RC->sid
++ | ld NODE:TMP2, TAB:RB->node
++ | sb x0, TAB:RB->nomm // Clear metamethod cache.
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
++ | slliw TMP0, TMP1, 5
++ | slliw TMP1, TMP1, 3
++ | subw TMP1, TMP0, TMP1
++ | li TMP3, LJ_TSTR
++ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
++ | settp STR:RC, TMP3 // Tagged key to look for.
++ | fld FTMP0, 0(RA)
++ |1:
++ | ld TMP0, NODE:TMP2->key
++ | ld CARG2, NODE:TMP2->val
++ | ld NODE:TMP1, NODE:TMP2->next
++ | lbu TMP3, TAB:RB->marked
++ | bne TMP0, RC, >5
++ | ld TAB:TMP0, TAB:RB->metatable
++ | beq CARG2, TISNIL, >4 // Key found, but nil value?
++ |2:
++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table)
++ | fsd FTMP0, NODE:TMP2->val
++ | bnez TMP3, >7
++ |3:
++ | ins_next
++ |
++ |4: // Check for __newindex if previous value is nil.
++ | beqz TAB:TMP0, <2 // No metatable: done.
++ | lbu TMP0, TAB:TMP0->nomm
++ | andi TMP0, TMP0, 1<<MM_newindex
++ | bnez TMP0, <2 // 'no __newindex' flag set: done.
++ | j ->vmeta_tsets
++ |
++ |5: // Follow hash chain.
++ | mv NODE:TMP2, NODE:TMP1
++ | bnez NODE:TMP1, <1
++ | // End of hash chain: key not found, add a new one
++ |
++ | // But check for __newindex first.
++ | ld TAB:TMP2, TAB:RB->metatable
++ | addi CARG3, GL, offsetof(global_State, tmptv)
++ | beqz TAB:TMP2, >6 // No metatable: continue.
++ | lbu TMP0, TAB:TMP2->nomm
++ | andi TMP0, TMP0, 1<<MM_newindex
++ | bxeqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
++ |6:
++ | sd RC, 0(CARG3)
++ | sd BASE, L->base
++ | mv CARG2, TAB:RB
++ | sd PC, SAVE_PC(sp)
++ | mv CARG1, L
++ | // (lua_State *L, GCtab *t, TValue *k)
++ | call_intern BC_TSETS, lj_tab_newkey
++ | // Returns TValue *.
++ | ld BASE, L->base
++ | fsd FTMP0, 0(CRET1)
++ | j <3 // No 2nd write barrier needed.
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMP3, TMP0, <3
++ break;
++ case BC_TSETB:
++ | // RA = src*8, RB = table*8, RC = index*8
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add CARG2, BASE, RB
++ | add RA, BASE, RA
++ | ld TAB:RB, 0(CARG2)
++ | srliw TMP0, RC, 3
++ | checktab RB, ->vmeta_tsetb
++ | lw TMP1, TAB:RB->asize
++ | ld TMP2, TAB:RB->array
++ | bxgeu TMP0, TMP1, ->vmeta_tsetb
++ | add RC, TMP2, RC
++ | ld TMP1, 0(RC)
++ | lbu TMP3, TAB:RB->marked
++ | beq TMP1, TISNIL, >5
++ |1:
++ | ld CRET1, 0(RA)
++ | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table)
++ | sd CRET1, 0(RC)
++ | bnez TMP1, >7
++ |2:
++ | ins_next
++ |
++ |5: // Check for __newindex if previous value is nil.
++ | ld TAB:TMP2, TAB:RB->metatable
++ | beqz TAB:TMP2, <1 // No metatable: done.
++ | lbu TMP1, TAB:TMP2->nomm
++ | andi TMP1, TMP1, 1<<MM_newindex
++ | bnez TMP1, <1 // 'no __newindex' flag set: done.
++ | j ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMP3, TMP0, <2
++ break;
++ case BC_TSETR:
++ | // RA = dst*8, RB = table*8, RC = key*8
++ | decode_RB8 RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add CARG1, BASE, RB
++ | add CARG3, BASE, RC
++ | ld TAB:CARG2, 0(CARG1)
++ | lw CARG3, 0(CARG3)
++ | cleartp TAB:CARG2
++ | lbu TMP3, TAB:CARG2->marked
++ | lw TMP0, TAB:CARG2->asize
++ | ld TMP1, TAB:CARG2->array
++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
++ | add RA, BASE, RA
++ | bnez TMP2, >7
++ |2:
++ | bxgeu CARG3, TMP0, ->vmeta_tsetr // In array part?
++ | slliw TMP2, CARG3, 3
++ | add CRET1, TMP1, TMP2
++ |->BC_TSETR_Z:
++ | ld TMP1, 0(RA)
++ | ins_next1
++ | sd TMP1, 0(CRET1)
++ | ins_next2
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:CARG2, TMP3, CRET1, <2
++ break;
++
++ case BC_TSETM:
++ | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
++ | add RA, BASE, RA
++ |1:
++ | add TMP3, KBASE, RD
++ | ld TAB:CARG2, -8(RA) // Guaranteed to be a table.
++ | addiw TMP0, MULTRES, -8
++ | lw TMP3, 0(TMP3) // Integer constant is in lo-word.
++ | srliw CARG3, TMP0, 3
++ | beqz TMP0, >4 // Nothing to copy?
++ | cleartp TAB:CARG2
++ | addw CARG3, CARG3, TMP3
++ | lw TMP2, TAB:CARG2->asize
++ | slliw TMP1, TMP3, 3
++ | lbu TMP3, TAB:CARG2->marked
++ | ld CARG1, TAB:CARG2->array
++ | bltu TMP2, CARG3, >5
++ | add TMP2, RA, TMP0
++ | add TMP1, TMP1, CARG1
++ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
++ |3: // Copy result slots to table.
++ | ld CRET1, 0(RA)
++ | addi RA, RA, 8
++ | sd CRET1, 0(TMP1)
++ | addi TMP1, TMP1, 8
++ | bltu RA, TMP2, <3
++ | bnez TMP0, >7
++ |4:
++ | ins_next
++ |
++ |5: // Need to resize array part.
++ | sd BASE, L->base
++ | sd PC, SAVE_PC(sp)
++ | mv BASE, RD
++ | mv CARG1, L
++ | // (lua_State *L, GCtab *t, int nasize)
++ | call_intern BC_TSETM, lj_tab_reasize
++ | // Must not reallocate the stack.
++ | mv RD, BASE
++ | ld BASE, L->base // Reload BASE for lack of a saved register.
++ | j <1
++ |
++ |7: // Possible table write barrier for any value. Skip valiswhite check.
++ | barrierback TAB:CARG2, TMP3, TMP0, <4
++ break;
++
++ /* -- Calls and vararg handling ----------------------------------------- */
++
++ case BC_CALLM:
++ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
++ | decode_RDtoRC8 NARGS8:RC, RD
++ | addw NARGS8:RC, NARGS8:RC, MULTRES
++ | j ->BC_CALL_Z
++ break;
++ case BC_CALL:
++ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
++ | decode_RDtoRC8 NARGS8:RC, RD
++ |->BC_CALL_Z:
++ | mv TMP2, BASE
++ | add BASE, BASE, RA
++ | ld LFUNC:RB, 0(BASE)
++ | addi BASE, BASE, 16
++ | addiw NARGS8:RC, NARGS8:RC, -8
++ | checkfunc RB, ->vmeta_call
++ | ins_call
++ break;
++
++ case BC_CALLMT:
++ | // RA = base*8, (RB = 0,) RC = extra_nargs*8
++ | addw NARGS8:RD, NARGS8:RD, MULTRES
++ | j ->BC_CALLT_Z1
++ break;
++ case BC_CALLT:
++ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
++ |->BC_CALLT_Z1:
++ | add RA, BASE, RA
++ | ld LFUNC:RB, 0(RA)
++ | mv NARGS8:RC, RD
++ | ld TMP1, FRAME_PC(BASE)
++ | addi RA, RA, 16
++ | addiw NARGS8:RC, NARGS8:RC, -8
++ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt
++ |->BC_CALLT_Z:
++ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'.
++ | lbu TMP3, LFUNC:CARG3->ffid
++ | xori TMP2, TMP1, FRAME_VARG
++ | bnez TMP0, >7
++ |1:
++ | sd LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
++ | sltiu CARG4, TMP3, 2 // (> FF_C) Calling a fast function?
++ | mv TMP2, BASE
++ | mv RB, CARG3
++ | mv TMP3, NARGS8:RC
++ | beqz NARGS8:RC, >3
++ |2:
++ | ld CRET1, 0(RA)
++ | addi RA, RA, 8
++ | addiw TMP3, TMP3, -8
++ | sd CRET1, 0(TMP2)
++ | addi TMP2, TMP2, 8
++ | bnez TMP3, <2
++ |3:
++ | or TMP0, TMP0, CARG4
++ | beqz TMP0, >5
++ |4:
++ | ins_callt
++ |
++ |5: // Tailcall to a fast function with a Lua frame below.
++ | lw INS, -4(TMP1)
++ | decode_RA8 RA, INS
++ | sub TMP1, BASE, RA
++ | ld TMP1, -32(TMP1)
++ | cleartp LFUNC:TMP1
++ | ld TMP1, LFUNC:TMP1->pc
++ | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
++ | j <4
++ |
++ |7: // Tailcall from a vararg function.
++ | andi CARG4, TMP2, FRAME_TYPEP
++ | sub TMP2, BASE, TMP2 // Relocate BASE down.
++ | bnez CARG4, <1 // Vararg frame below?
++ | mv BASE, TMP2
++ | ld TMP1, FRAME_PC(TMP2)
++ | andi TMP0, TMP1, FRAME_TYPE
++ | j <1
++ break;
++
++ case BC_ITERC:
++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
++ | mv TMP2, BASE // Save old BASE for vmeta_call.
++ | add BASE, BASE, RA
++ | ld RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1.
++ | ld CARG1, -16(BASE)
++ | ld CARG2, -8(BASE)
++ | li NARGS8:RC, 16 // Iterators get 2 arguments.
++ | sd RB, 0(BASE) // Copy callable.
++ | sd CARG1, 16(BASE) // Copy state.
++ | sd CARG2, 24(BASE) // Copy control var.
++ | addi BASE, BASE, 16
++ | checkfunc RB, ->vmeta_call
++ | ins_call
++ break;
++
++ case BC_ITERN:
++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
++ |.if JIT
++ | hotloop
++ |.endif
++ |->vm_IITERN:
++ | add RA, BASE, RA
++ | ld TAB:RB, -16(RA)
++ | lw RC, -8(RA) // Get index from control var.
++ | cleartp TAB:RB
++ | addi PC, PC, 4
++ | lw TMP0, TAB:RB->asize
++ | ld TMP1, TAB:RB->array
++ | slli CARG3, TISNUM, 47
++ |1: // Traverse array part.
++ | bleu TMP0, RC, >5 // Index points after array part?
++ | slliw TMP3, RC, 3
++ | add TMP3, TMP1, TMP3
++ | ld CARG1, 0(TMP3)
++ | lhu RD, -4+OFS_RD(PC) // ITERL RD
++ | or TMP2, RC, CARG3
++ | addiw RC, RC, 1
++ | beq CARG1, TISNIL, <1 // Skip holes in array part.
++ | sd TMP2, 0(RA)
++ | sd CARG1, 8(RA)
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | decode_BC4b RD
++ | add RD, RD, TMP3
++ | sw RC, -8(RA) // Update control var.
++ | add PC, PC, RD
++ |3:
++ | ins_next
++ |
++ |5: // Traverse hash part.
++ | lw TMP1, TAB:RB->hmask
++ | subw RC, RC, TMP0
++ | ld TMP2, TAB:RB->node
++ |6:
++ | bltu TMP1, RC, <3 // End of iteration? Branch to ITERL+1.
++ | slliw TMP3, RC, 5
++ | slliw RB, RC, 3
++ | subw TMP3, TMP3, RB
++ | add NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8)
++ | ld CARG1, 0(NODE:TMP3)
++ | lhu RD, -4+OFS_RD(PC) // ITERL RD
++ | addiw RC, RC, 1
++ | beq CARG1, TISNIL, <6 // Skip holes in hash part.
++ | ld CARG2, NODE:TMP3->key
++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | sd CARG1, 8(RA)
++ | addw RC, RC, TMP0
++ | decode_BC4b RD
++ | addw RD, RD, TMP3
++ | sd CARG2, 0(RA)
++ | add PC, PC, RD
++ | sw RC, -8(RA) // Update control var.
++ | j <3
++ break;
++
++ case BC_ISNEXT:
++ | // RA = base*8, RD = target (points to ITERN)
++ | add RA, BASE, RA
++ | srliw TMP0, RD, 1
++ | ld CFUNC:CARG1, -24(RA)
++ | add TMP0, PC, TMP0
++ | ld CARG2, -16(RA)
++ | ld CARG3, -8(RA)
++ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
++ | checkfunc CFUNC:CARG1, >5
++ | gettp CARG2, CARG2
++ | addi CARG2, CARG2, -LJ_TTAB
++ | lbu TMP1, CFUNC:CARG1->ffid
++ | addi CARG3, CARG3, -LJ_TNIL
++ | or TMP3, CARG2, CARG3
++ | addi TMP1, TMP1, -FF_next_N
++ | or TMP3, TMP3, TMP1
++ | lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff
++ | bnez TMP3, >5
++ | add PC, TMP0, TMP2
++ | addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)
++ | slli TMP1, TMP1, 32
++ | sd TMP1, -8(RA)
++ |1:
++ | ins_next
++ |5: // Despecialize bytecode if any of the checks fail.
++ | li TMP3, BC_JMP
++ | li TMP1, BC_ITERC
++ | sb TMP3, -4+OFS_OP(PC)
++ | add PC, TMP0, TMP2
++ |.if JIT
++ | lb TMP0, OFS_OP(PC)
++ | li TMP3, BC_ITERN
++ | lhu TMP2, OFS_RD(PC)
++ | bne TMP0, TMP3, >6
++ |.endif
++ | sb TMP1, OFS_OP(PC)
++ | j <1
++ |.if JIT
++ |6: // Unpatch JLOOP.
++ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL.
++ | slliw TMP2, TMP2, 3
++ | add TMP0, TMP0, TMP2
++ | ld TRACE:TMP2, 0(TMP0)
++ | lw TMP0, TRACE:TMP2->startins
++ | andi TMP0, TMP0, -256
++ | or TMP0, TMP0, TMP1
++ | sw TMP0, 0(PC)
++ | j <1
++ |.endif
++ break;
++
++ case BC_VARG:
++ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
++ | ld TMP0, FRAME_PC(BASE)
++ | decode_RDtoRC8 RC, RD
++ | decode_RB8 RB, INS
++ | add RC, BASE, RC
++ | add RA, BASE, RA
++ | addi RC, RC, FRAME_VARG
++ | add TMP2, RA, RB
++ | addi TMP3, BASE, -16 // TMP3 = vtop
++ | sub RC, RC, TMP0 // RC = vbase
++ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
++ | sub TMP1, TMP3, RC
++ | beqz RB, >5 // Copy all varargs?
++ | addi TMP2, TMP2, -16
++ |1: // Copy vararg slots to destination slots.
++ | ld CARG1, 0(RC)
++ | sltu TMP0, RC, TMP3
++ | addi RC, RC, 8
++ | bnez TMP0, >2
++ | mv CARG1, TISNIL
++ |2:
++ | sd CARG1, 0(RA)
++ | sltu TMP0, RA, TMP2
++ | addi RA, RA, 8
++ | bnez TMP0, <1
++ |3:
++ | ins_next
++ |
++ |5: // Copy all varargs.
++ | ld TMP0, L->maxstack
++ | li MULTRES, 8 // MULTRES = (0+1)*8
++ | blez TMP1, <3 // No vararg slots?
++ | add TMP2, RA, TMP1
++ | addi MULTRES, TMP1, 8
++ | bltu TMP0, TMP2, >7
++ |6:
++ | ld CRET1, 0(RC)
++ | addi RC, RC, 8
++ | sd CRET1, 0(RA)
++ | addi RA, RA, 8
++ | bltu RC, TMP3, <6 // More vararg slots?
++ | j <3
++ |
++ |7: // Grow stack for varargs.
++ | sd RA, L->top
++ | sub RA, RA, BASE
++ | sd BASE, L->base
++ | sub BASE, RC, BASE // Need delta, because BASE may change.
++ | sd PC, SAVE_PC(sp)
++ | srliw CARG2, TMP1, 3
++ | mv CARG1, L
++ | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n)
++ | mv RC, BASE
++ | ld BASE, L->base
++ | add RA, BASE, RA
++ | add RC, BASE, RC
++ | addi TMP3, BASE, -16
++ | j <6
++ break;
++
++ /* -- Returns ----------------------------------------------------------- */
++
++ case BC_RETM:
++ | // RA = results*8, RD = extra_nresults*8
++ | addw RD, RD, MULTRES
++ | j ->BC_RET_Z1
++ break;
++
++ case BC_RET:
++ | // RA = results*8, RD = (nresults+1)*8
++ |->BC_RET_Z1:
++ | ld PC, FRAME_PC(BASE)
++ | add RA, BASE, RA
++ | mv MULTRES, RD
++ |1:
++ | andi TMP0, PC, FRAME_TYPE
++ | xori TMP1, PC, FRAME_VARG
++ | bnez TMP0, ->BC_RETV_Z
++ |
++ |->BC_RET_Z:
++ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
++ | lw INS, -4(PC)
++ | addi TMP2, BASE, -16
++ | addi RC, RD, -8
++ | decode_RA8 TMP0, INS
++ | decode_RB8 RB, INS
++ | sub BASE, TMP2, TMP0
++ | add TMP3, TMP2, RB
++ | beqz RC, >3
++ |2:
++ | ld CRET1, 0(RA)
++ | addi RA, RA, 8
++ | addi RC, RC, -8
++ | sd CRET1, 0(TMP2)
++ | addi TMP2, TMP2, 8
++ | bnez RC, <2
++ |3:
++ | addi TMP3, TMP3, -8
++ |5:
++ | bltu TMP2, TMP3, >6
++ | ld LFUNC:TMP1, FRAME_FUNC(BASE)
++ | cleartp LFUNC:TMP1
++ | ld TMP1, LFUNC:TMP1->pc
++ | ld KBASE, PC2PROTO(k)(TMP1)
++ | ins_next
++ |
++ |6: // Fill up results with nil.
++ | sd TISNIL, 0(TMP2)
++ | addi TMP2, TMP2, 8
++ | j <5
++ |
++ |->BC_RETV_Z: // Non-standard return case.
++ | andi TMP2, TMP1, FRAME_TYPEP
++ | bxnez TMP2, ->vm_return
++ | // Return from vararg function: relocate BASE down.
++ | sub BASE, BASE, TMP1
++ | ld PC, FRAME_PC(BASE)
++ | j <1
++ break;
++
++ case BC_RET0: case BC_RET1:
++ | // RA = results*8, RD = (nresults+1)*8
++ | ld PC, FRAME_PC(BASE)
++ | add RA, BASE, RA
++ | mv MULTRES, RD
++ | andi TMP0, PC, FRAME_TYPE
++ | xori TMP1, PC, FRAME_VARG
++ | bnez TMP0, ->BC_RETV_Z
++ | lw INS, -4(PC)
++ | addi TMP2, BASE, -16
++ if (op == BC_RET1) {
++ | ld CRET1, 0(RA)
++ }
++ | decode_RB8 RB, INS
++ | decode_RA8 RA, INS
++ | sub BASE, TMP2, RA
++ if (op == BC_RET1) {
++ | sd CRET1, 0(TMP2)
++ }
++ |5:
++ | bltu RD, RB, >6
++ | ld TMP1, FRAME_FUNC(BASE)
++ | cleartp LFUNC:TMP1
++ | ld TMP1, LFUNC:TMP1->pc
++ | ins_next1
++ | ld KBASE, PC2PROTO(k)(TMP1)
++ | ins_next2
++ |
++ |6: // Fill up results with nil.
++ | addi TMP2, TMP2, 8
++ | addi RD, RD, 8
++ if (op == BC_RET1) {
++ | sd TISNIL, 0(TMP2)
++ } else {
++ | sd TISNIL, -8(TMP2)
++ }
++ | j <5
++ break;
++
++ /* -- Loops and branches ------------------------------------------------ */
++
++ case BC_FORL:
++ |.if JIT
++ | hotloop
++ |.endif
++ | // Fall through. Assumes BC_IFORL follows.
++ break;
++
++ case BC_JFORI:
++ case BC_JFORL:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_FORI:
++ case BC_IFORL:
++ | // RA = base*8, RD = target (after end of loop or start of loop)
++ vk = (op == BC_IFORL || op == BC_JFORL);
++ | add RA, BASE, RA
++ | ld CARG1, FORL_IDX*8(RA) // CARG1 = IDX
++ | ld CARG2, FORL_STEP*8(RA) // CARG2 = STEP
++ | ld CARG3, FORL_STOP*8(RA) // CARG3 = STOP
++ | gettp CARG4, CARG1
++ | gettp CARG5, CARG2
++ | gettp CARG6, CARG3
++ if (op != BC_JFORL) {
++ | srliw RD, RD, 1
++ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J<<2
++ | add TMP2, RD, TMP2
++ }
++ | bne CARG4, TISNUM, >3
++ | sext.w CARG4, CARG1 // start
++ | sext.w CARG3, CARG3 // stop
++ if (!vk) { // init
++ | bxne CARG6, TISNUM, ->vmeta_for
++ | bxne CARG5, TISNUM, ->vmeta_for
++ | bfextri TMP0, CARG2, 31, 31 // sign
++ | slt CARG2, CARG3, CARG4
++ | slt TMP1, CARG4, CARG3
++ | neg TMP4, TMP0
++ | and TMP1, TMP1, TMP4
++ | not TMP4, TMP4
++ | and CARG2, CARG2, TMP4
++ | or CARG2, CARG2, TMP1 // CARG2=0: +,start <= stop or -,start >= stop
++ } else {
++ | sext.w CARG5, CARG2 // step
++ | addw CARG1, CARG4, CARG5 // start + step
++ | xor TMP3, CARG1, CARG4 // y^a
++ | xor TMP1, CARG1, CARG5 // y^b
++ | and TMP3, TMP3, TMP1
++ | slt TMP1, CARG1, CARG3 // start+step < stop ?
++ | slt CARG3, CARG3, CARG1 // stop < start+step ?
++ | sltz TMP0, CARG5 // step < 0 ?
++ | sltz TMP3, TMP3 // ((y^a) & (y^b)) < 0: overflow.
++ | neg TMP4, TMP0
++ | and TMP1, TMP1, TMP4
++ | not TMP4, TMP4
++ | and CARG3, CARG3, TMP4
++ | or CARG3, CARG3, TMP1
++ | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue
++ | zext.w CARG1, CARG1
++ | settp_b CARG1, TISNUM
++ | sd CARG1, FORL_IDX*8(RA)
++ }
++ |1:
++ if (op == BC_FORI) {
++ | neg TMP4, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS
++ | and TMP2, TMP2, TMP4
++ | add PC, PC, TMP2
++ } else if (op == BC_JFORI) {
++ | add PC, PC, TMP2
++ | lhu RD, -4+OFS_RD(PC)
++ } else if (op == BC_IFORL) {
++ | addi TMP4, CARG2, -1 // CARG2!=0: next INS; CARG2==0: jump back
++ | and TMP2, TMP2, TMP4
++ | add PC, PC, TMP2
++ }
++ | ins_next1
++ | sd CARG1, FORL_EXT*8(RA)
++ |2:
++ if (op == BC_JFORI) {
++ | decode_RD8b RD
++ | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop
++ } else if (op == BC_JFORL) {
++ | beqz CARG2, =>BC_JLOOP
++ }
++ | ins_next2
++ |
++ |3: // FP loop.
++ | fld FTMP0, FORL_IDX*8(RA) // start
++ | fld FTMP1, FORL_STOP*8(RA) // stop
++ | ld TMP0, FORL_STEP*8(RA) // step
++ | sltz CARG2, TMP0 // step < 0 ?
++ | neg CARG2, CARG2
++ if (!vk) {
++ | sltiu TMP3, CARG4, LJ_TISNUM // start is number ?
++ | sltiu TMP0, CARG5, LJ_TISNUM // step is number ?
++ | sltiu TMP1, CARG6, LJ_TISNUM // stop is number ?
++ | and TMP3, TMP3, TMP1
++ | and TMP0, TMP0, TMP3
++ | bxeqz TMP0, ->vmeta_for // if start or step or stop isn't number
++ | flt.d TMP3, FTMP0, FTMP1 // start < stop ?
++ | flt.d TMP4, FTMP1, FTMP0 // stop < start ?
++ | and TMP3, TMP3, CARG2
++ | not CARG2, CARG2
++ | and TMP4, TMP4, CARG2
++ | or CARG2, TMP3, TMP4 // CARG2=0:+,start<stop or -,start>stop
++ | j <1
++ } else {
++ | fld FTMP3, FORL_STEP*8(RA)
++ | fadd.d FTMP0, FTMP0, FTMP3 // start + step
++ | flt.d TMP3, FTMP0, FTMP1 // start + step < stop ?
++ | flt.d TMP4, FTMP1, FTMP0
++ | and TMP3, TMP3, CARG2
++ | not CARG2, CARG2
++ | and TMP4, TMP4, CARG2
++ | or CARG2, TMP3, TMP4
++ if (op == BC_IFORL) {
++ | addi TMP3, CARG2, -1
++ | and TMP2, TMP2, TMP3
++ | add PC, PC, TMP2
++ }
++ | fsd FTMP0, FORL_IDX*8(RA)
++ | ins_next1
++ | fsd FTMP0, FORL_EXT*8(RA)
++ | j <2
++ }
++ break;
++
++ case BC_ITERL:
++ |.if JIT
++ | hotloop
++ |.endif
++ | // Fall through. Assumes BC_IITERL follows.
++ break;
++
++ case BC_JITERL:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_IITERL:
++ | // RA = base*8, RD = target
++ | add RA, BASE, RA
++ | ld TMP1, 0(RA)
++ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil.
++ if (op == BC_JITERL) {
++ | sd TMP1,-8(RA)
++ | j =>BC_JLOOP
++ } else {
++ | branch_RD // Otherwise save control var + branch.
++ | sd TMP1, -8(RA)
++ }
++ |1:
++ | ins_next
++ break;
++
++ case BC_LOOP:
++ | // RA = base*8, RD = target (loop extent)
++ | // Note: RA/RD is only used by trace recorder to determine scope/extent
++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
++ |.if JIT
++ | hotloop
++ |.endif
++ | // Fall through. Assumes BC_ILOOP follows.
++ break;
++
++ case BC_ILOOP:
++ | // RA = base*8, RD = target (loop extent)
++ | ins_next
++ break;
++
++ case BC_JLOOP:
++ |.if JIT
++ | // RA = base*8 (ignored), RD = traceno*8
++ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL.
++ | add TMP0, TMP0, RD
++ | // Traces on RISC-V don't store the trace number, so use 0.
++ | sd x0, GL->vmstate
++ | ld TRACE:TMP1, 0(TMP0)
++ | sd BASE, GL->jit_base // store Current JIT code L->base
++ | ld TMP1, TRACE:TMP1->mcode
++ | sd L, GL->tmpbuf.L
++ | jr TMP1
++ |.endif
++ break;
++
++ case BC_JMP:
++ | // RA = base*8 (only used by trace recorder), RD = target
++ | branch_RD // PC + (jump - 0x8000)<<2
++ | ins_next
++ break;
++
++ /* -- Function headers -------------------------------------------------- */
++
++ case BC_FUNCF:
++ |.if JIT
++ | hotcall
++ |.endif
++ case BC_FUNCV: /* NYI: compiled vararg functions. */
++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
++ break;
++
++ case BC_JFUNCF:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_IFUNCF:
++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
++ | ld TMP2, L->maxstack
++ | lbu TMP1, -4+PC2PROTO(numparams)(PC)
++ | ld KBASE, -4+PC2PROTO(k)(PC)
++ | bxltu TMP2, RA, ->vm_growstack_l
++ | slliw TMP1, TMP1, 3 // numparams*8
++ |2:
++ | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters.
++ if (op == BC_JFUNCF) {
++ | decode_RD8 RD, INS
++ | j =>BC_JLOOP
++ } else {
++ | ins_next
++ }
++ |
++ |3: // Clear missing parameters.
++ | add TMP0, BASE, NARGS8:RC
++ | sd TISNIL, 0(TMP0)
++ | addiw NARGS8:RC, NARGS8:RC, 8
++ | j <2
++ break;
++
++ case BC_JFUNCV:
++#if !LJ_HASJIT
++ break;
++#endif
++ | NYI // NYI: compiled vararg functions
++ break; /* NYI: compiled vararg functions. */
++
++ case BC_IFUNCV:
++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
++ | li TMP0, LJ_TFUNC
++ | add TMP1, BASE, RC
++ | ld TMP2, L->maxstack
++ | settp LFUNC:RB, TMP0
++ | add TMP0, RA, RC
++ | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
++ | addi TMP2, TMP2, -8
++ | addi TMP3, RC, 16+FRAME_VARG
++ | ld KBASE, -4+PC2PROTO(k)(PC)
++ | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG.
++ | bxgeu TMP0, TMP2, ->vm_growstack_l
++ | lbu TMP2, -4+PC2PROTO(numparams)(PC)
++ | mv RA, BASE
++ | mv RC, TMP1
++ | ins_next1
++ | addi BASE, TMP1, 16
++ | beqz TMP2, >2
++ |1:
++ | ld TMP0, 0(RA)
++ | sltu CARG2, RA, RC // Less args than parameters?
++ | mv CARG1, TMP0
++ | addi RA, RA, 8
++ | addi TMP1, TMP1, 8
++ | addiw TMP2, TMP2, -1
++ | beqz CARG2, >3
++ | neg TMP4, CARG2 // Clear old fixarg slot (help the GC).
++ | and TMP3, TISNIL, TMP4
++ | not TMP4, TMP4
++ | and CARG1, CARG1, TMP4
++ | or CARG1, CARG1, TMP3
++ | sd CARG1, -8(RA)
++ | sd TMP0, 8(TMP1)
++ | bnez TMP2, <1
++ |2:
++ | ins_next2
++ |3:
++ | neg TMP4, CARG2 // Clear missing fixargs.
++ | and TMP0, TMP0, TMP4
++ | not TMP4, TMP4
++ | and TMP3, TISNIL, TMP4
++ | or TMP0, TMP0, TMP3
++ | sd TMP0, 8(TMP1)
++ | bnez TMP2, <1
++ | j <2
++ break;
++
++ case BC_FUNCC:
++ case BC_FUNCCW:
++ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
++ if (op == BC_FUNCC) {
++ | ld CARG4, CFUNC:RB->f
++ } else {
++ | ld CARG4, GL->wrapf
++ }
++ | add TMP1, RA, NARGS8:RC
++ | ld TMP2, L->maxstack
++ | add RC, BASE, NARGS8:RC
++ | sd BASE, L->base // base of currently excuting function
++ | sd RC, L->top
++ | bxgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack.
++ | li_vmstate C // li TMP0, ~LJ_VMST_C
++ if (op == BC_FUNCCW) {
++ | ld CARG2, CFUNC:RB->f
++ }
++ | mv CARG1, L
++ | st_vmstate // sw TMP0, GL->vmstate
++ | jalr CARG4 // (lua_State *L [, lua_CFunction f])
++ | // Returns nresults.
++ | ld BASE, L->base
++ | ld TMP1, L->top
++ | sd L, GL->cur_L
++ | slliw RD, CRET1, 3
++ | li_vmstate INTERP
++ | ld PC, FRAME_PC(BASE) // Fetch PC of caller.
++ | sub RA, TMP1, RD // RA = L->top - nresults*8
++ | st_vmstate
++ | j ->vm_returnc
++ break;
++
++ /* ---------------------------------------------------------------------- */
++
++ default:
++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
++ exit(2);
++ break;
++ }
++}
++
++static int build_backend(BuildCtx *ctx)
++{
++ int op;
++
++ dasm_growpc(Dst, BC__MAX);
++
++ build_subroutines(ctx);
++
++ |.code_op
++ for (op = 0; op < BC__MAX; op++)
++ build_ins(ctx, (BCOp)op, op);
++
++ return BC__MAX;
++}
++
++/* Emit pseudo frame-info for all assembler functions. */
++static void emit_asm_debug(BuildCtx *ctx)
++{
++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
++ int i;
++ switch (ctx->mode) {
++ case BUILD_elfasm:
++ fprintf(ctx->fp, "\t.section .debug_frame,\"\", at progbits\n");
++ fprintf(ctx->fp,
++ ".Lframe0:\n"
++ "\t.4byte .LECIE0-.LSCIE0\n"
++ ".LSCIE0:\n"
++ "\t.4byte 0xffffffff\n"
++ "\t.byte 0x1\n"
++ "\t.string \"\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -4\n"
++ "\t.byte 1\n" /* Return address is in ra. */
++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */
++ "\t.align 3\n"
++ ".LECIE0:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE0:\n"
++ "\t.4byte .LEFDE0-.LASFDE0\n"
++ ".LASFDE0:\n"
++ "\t.4byte .Lframe0\n"
++ "\t.8byte .Lbegin\n"
++ "\t.8byte %d\n"
++ "\t.byte 0xe\n\t.uleb128 %d\n"
++ "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */,
++ fcofs, CFRAME_SIZE);
++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */
++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7));
++ fprintf(ctx->fp,
++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */
++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */);
++ for (i = 27; i >= 18; i--) /* offset f31-f18 */
++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19));
++ fprintf(ctx->fp,
++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */
++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */
++ "\t.align 3\n"
++ ".LEFDE0:\n\n");
++#if LJ_HASFFI
++ fprintf(ctx->fp,
++ ".LSFDE1:\n"
++ "\t.4byte .LEFDE1-.LASFDE1\n"
++ ".LASFDE1:\n"
++ "\t.4byte .Lframe0\n"
++ "\t.4byte lj_vm_ffi_call\n"
++ "\t.4byte %d\n"
++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */
++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */
++ "\t.byte 0xd\n\t.uleb128 0x12\n"
++ "\t.align 3\n"
++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
++#endif
++#if !LJ_NO_UNWIND
++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\", at progbits\n");
++ fprintf(ctx->fp,
++ ".Lframe1:\n"
++ "\t.4byte .LECIE1-.LSCIE1\n"
++ ".LSCIE1:\n"
++ "\t.4byte 0\n"
++ "\t.byte 0x1\n"
++ "\t.string \"zPR\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -4\n"
++ "\t.byte 1\n" /* Return address is in ra. */
++ "\t.uleb128 6\n" /* augmentation length */
++ "\t.byte 0x1b\n"
++ "\t.4byte lj_err_unwind_dwarf-.\n"
++ "\t.byte 0x1b\n"
++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */
++ "\t.align 2\n"
++ ".LECIE1:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE2:\n"
++ "\t.4byte .LEFDE2-.LASFDE2\n"
++ ".LASFDE2:\n"
++ "\t.4byte .LASFDE2-.Lframe1\n"
++ "\t.4byte .Lbegin-.\n"
++ "\t.4byte %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0xe\n\t.uleb128 %d\n"
++ "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */
++ fcofs, CFRAME_SIZE);
++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */
++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7));
++ fprintf(ctx->fp,
++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */
++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */);
++ for (i = 27; i >= 18; i--) /* offset f31-f18 */
++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19));
++ fprintf(ctx->fp,
++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */
++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */
++ "\t.align 2\n"
++ ".LEFDE2:\n\n");
++#if LJ_HASFFI
++ fprintf(ctx->fp,
++ ".Lframe2:\n"
++ "\t.4byte .LECIE2-.LSCIE2\n"
++ ".LSCIE2:\n"
++ "\t.4byte 0\n"
++ "\t.byte 0x1\n"
++ "\t.string \"zR\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -4\n"
++ "\t.byte 1\n" /* Return address is in ra. */
++ "\t.uleb128 1\n" /* augmentation length */
++ "\t.byte 0x1b\n"
++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */
++ "\t.align 2\n"
++ ".LECIE2:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE3:\n"
++ "\t.4byte .LEFDE3-.LASFDE3\n"
++ ".LASFDE3:\n"
++ "\t.4byte .LASFDE3- .Lframe2\n"
++ "\t.4byte lj_vm_ffi_call-.\n"
++ "\t.4byte %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */
++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */
++ "\t.byte 0xd\n\t.uleb128 0x12\n"
++ "\t.align 2\n"
++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
++#endif
++#endif
++ break;
++ default:
++ break;
++ }
++}
+--- a/src/jit/bcsave.lua
++++ b/src/jit/bcsave.lua
+@@ -102,6 +102,7 @@
+ mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
+ mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
+ mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
++ riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, },
+ s390x = { e = "be", b = 64, m = 22, },
+ }
+
diff -Nru luajit-2.1.0+openresty20240815/debian/patches/0004_support_loong64.patch luajit-2.1.0+openresty20240815/debian/patches/0004_support_loong64.patch
--- luajit-2.1.0+openresty20240815/debian/patches/0004_support_loong64.patch 1970-01-01 01:00:00.000000000 +0100
+++ luajit-2.1.0+openresty20240815/debian/patches/0004_support_loong64.patch 2024-11-02 10:02:32.000000000 +0100
@@ -0,0 +1,10126 @@
+From 231ff652c227f4ae08a51b8feba0364553c2cbde Mon Sep 17 00:00:00 2001
+From: Xiaolin Zhao <zhaoxiaolin at loongson.cn>
+Date: Thu, 28 Mar 2024 07:37:07 +0000
+Subject: [PATCH] Add support for LoongArch64
+
+---
+ Makefile | 1 +
+ dynasm/dasm_loongarch64.h | 443 ++++
+ dynasm/dasm_loongarch64.lua | 979 ++++++++
+ src/Makefile | 10 +-
+ src/host/buildvm.c | 2 +
+ src/host/buildvm_asm.c | 9 +
+ src/jit/bcsave.lua | 1 +
+ src/jit/dis_loongarch64.lua | 697 ++++++
+ src/lib_jit.c | 2 +
+ src/lj_arch.h | 32 +
+ src/lj_asm.c | 4 +
+ src/lj_asm_loongarch64.h | 1990 +++++++++++++++
+ src/lj_ccall.c | 152 +-
+ src/lj_ccall.h | 17 +-
+ src/lj_ccallback.c | 58 +-
+ src/lj_emit_loongarch64.h | 306 +++
+ src/lj_frame.h | 9 +
+ src/lj_gdbjit.c | 12 +
+ src/lj_jit.h | 15 +-
+ src/lj_target.h | 4 +-
+ src/lj_target_loongarch64.h | 313 +++
+ src/lj_trace.c | 6 +-
+ src/lj_vmmath.c | 3 +-
+ src/vm_loongarch64.dasc | 4625 +++++++++++++++++++++++++++++++++++
+ 24 files changed, 9674 insertions(+), 16 deletions(-)
+ create mode 100644 dynasm/dasm_loongarch64.h
+ create mode 100644 dynasm/dasm_loongarch64.lua
+ create mode 100644 src/jit/dis_loongarch64.lua
+ create mode 100644 src/lj_asm_loongarch64.h
+ create mode 100644 src/lj_emit_loongarch64.h
+ create mode 100644 src/lj_target_loongarch64.h
+ create mode 100644 src/vm_loongarch64.dasc
+
+Index: luajit-2.1.0+openresty20240815/Makefile
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/Makefile
++++ luajit-2.1.0+openresty20240815/Makefile
+@@ -101,6 +101,7 @@
+ dis_mips64.lua dis_mips64el.lua \
+ dis_mips64r6.lua dis_mips64r6el.lua \
+ dis_riscv.lua dis_riscv64.lua \
++ dis_loongarch64.lua \
+ vmdef.lua
+
+ ifeq (,$(findstring Windows,$(OS)))
+Index: luajit-2.1.0+openresty20240815/dynasm/dasm_loongarch64.h
+===================================================================
+--- /dev/null
++++ luajit-2.1.0+openresty20240815/dynasm/dasm_loongarch64.h
+@@ -0,0 +1,443 @@
++/*
++** DynASM LoongArch encoding engine.
++** Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++** Released under the MIT license. See dynasm.lua for full copyright notice.
++*/
++
++#include <stddef.h>
++#include <stdarg.h>
++#include <string.h>
++#include <stdlib.h>
++
++#define DASM_ARCH "loongarch64"
++
++#ifndef DASM_EXTERN
++#define DASM_EXTERN(a,b,c,d) 0
++#endif
++
++/* Action definitions. */
++enum {
++ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
++ /* The following actions need a buffer position. */
++ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
++ /* The following actions also have an argument. */
++ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMM2,
++ DASM__MAX
++};
++
++/* Maximum number of section buffer positions for a single dasm_put() call. */
++#define DASM_MAXSECPOS 25
++
++/* DynASM encoder status codes. Action list offset or number are or'ed in. */
++#define DASM_S_OK 0x00000000
++#define DASM_S_NOMEM 0x01000000
++#define DASM_S_PHASE 0x02000000
++#define DASM_S_MATCH_SEC 0x03000000
++#define DASM_S_RANGE_I 0x11000000
++#define DASM_S_RANGE_SEC 0x12000000
++#define DASM_S_RANGE_LG 0x13000000
++#define DASM_S_RANGE_PC 0x14000000
++#define DASM_S_RANGE_REL 0x15000000
++#define DASM_S_UNDEF_LG 0x21000000
++#define DASM_S_UNDEF_PC 0x22000000
++
++/* Macros to convert positions (8 bit section + 24 bit index). */
++#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
++#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
++#define DASM_SEC2POS(sec) ((sec)<<24)
++#define DASM_POS2SEC(pos) ((pos)>>24)
++#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
++
++/* Action list type. */
++typedef const unsigned int *dasm_ActList;
++
++/* Per-section structure. */
++typedef struct dasm_Section {
++ int *rbuf; /* Biased buffer pointer (negative section bias). */
++ int *buf; /* True buffer pointer. */
++ size_t bsize; /* Buffer size in bytes. */
++ int pos; /* Biased buffer position. */
++ int epos; /* End of biased buffer position - max single put. */
++ int ofs; /* Byte offset into section. */
++} dasm_Section;
++
++/* Core structure holding the DynASM encoding state. */
++struct dasm_State {
++ size_t psize; /* Allocated size of this structure. */
++ dasm_ActList actionlist; /* Current actionlist pointer. */
++ int *lglabels; /* Local/global chain/pos ptrs. */
++ size_t lgsize;
++ int *pclabels; /* PC label chains/pos ptrs. */
++ size_t pcsize;
++ void **globals; /* Array of globals. */
++ dasm_Section *section; /* Pointer to active section. */
++ size_t codesize; /* Total size of all code sections. */
++ int maxsection; /* 0 <= sectionidx < maxsection. */
++ int status; /* Status code. */
++ dasm_Section sections[1]; /* All sections. Alloc-extended. */
++};
++
++/* The size of the core structure depends on the max. number of sections. */
++#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
++
++
++/* Initialize DynASM state. */
++void dasm_init(Dst_DECL, int maxsection)
++{
++ dasm_State *D;
++ size_t psz = 0;
++ Dst_REF = NULL;
++ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
++ D = Dst_REF;
++ D->psize = psz;
++ D->lglabels = NULL;
++ D->lgsize = 0;
++ D->pclabels = NULL;
++ D->pcsize = 0;
++ D->globals = NULL;
++ D->maxsection = maxsection;
++ memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
++}
++
++/* Free DynASM state. */
++void dasm_free(Dst_DECL)
++{
++ dasm_State *D = Dst_REF;
++ int i;
++ for (i = 0; i < D->maxsection; i++)
++ if (D->sections[i].buf)
++ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
++ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
++ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
++ DASM_M_FREE(Dst, D, D->psize);
++}
++
++/* Setup global label array. Must be called before dasm_setup(). */
++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
++{
++ dasm_State *D = Dst_REF;
++ D->globals = gl;
++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
++}
++
++/* Grow PC label array. Can be called after dasm_setup(), too. */
++void dasm_growpc(Dst_DECL, unsigned int maxpc)
++{
++ dasm_State *D = Dst_REF;
++ size_t osz = D->pcsize;
++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
++ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
++}
++
++/* Setup encoder. */
++void dasm_setup(Dst_DECL, const void *actionlist)
++{
++ dasm_State *D = Dst_REF;
++ int i;
++ D->actionlist = (dasm_ActList)actionlist;
++ D->status = DASM_S_OK;
++ D->section = &D->sections[0];
++ memset((void *)D->lglabels, 0, D->lgsize);
++ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
++ for (i = 0; i < D->maxsection; i++) {
++ D->sections[i].pos = DASM_SEC2POS(i);
++ D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
++ D->sections[i].ofs = 0;
++ }
++}
++
++
++#ifdef DASM_CHECKS
++#define CK(x, st) \
++ do { if (!(x)) { \
++ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
++#define CKPL(kind, st) \
++ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
++ D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
++#else
++#define CK(x, st) ((void)0)
++#define CKPL(kind, st) ((void)0)
++#endif
++
++static int dasm_imm2(unsigned int n)
++{
++ if ((n >> 21) == 0)
++ return n;
++ else if ((n >> 26) == 0)
++ return n;
++ else
++ return -1;
++}
++
++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
++void dasm_put(Dst_DECL, int start, ...)
++{
++ va_list ap;
++ dasm_State *D = Dst_REF;
++ dasm_ActList p = D->actionlist + start;
++ dasm_Section *sec = D->section;
++ int pos = sec->pos, ofs = sec->ofs;
++ int *b;
++
++ if (pos >= sec->epos) {
++ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
++ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
++ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
++ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
++ }
++
++ b = sec->rbuf;
++ b[pos++] = start;
++
++ va_start(ap, start);
++ while (1) {
++ unsigned int ins = *p++;
++ unsigned int action = (ins >> 16) - 0xff00;
++ if (action >= DASM__MAX) {
++ ofs += 4;
++ } else {
++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
++ switch (action) {
++ case DASM_STOP: goto stop;
++ case DASM_SECTION:
++ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
++ D->section = &D->sections[n]; goto stop;
++ case DASM_ESC: p++; ofs += 4; break;
++ case DASM_REL_EXT: break;
++ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
++ case DASM_REL_LG:
++ n = (ins & 2047) - 10; pl = D->lglabels + n;
++ /* Bkwd rel or global. */
++ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
++ pl += 10; n = *pl;
++ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
++ goto linkrel;
++ case DASM_REL_PC:
++ pl = D->pclabels + n; CKPL(pc, PC);
++ putrel:
++ n = *pl;
++ if (n < 0) { /* Label exists. Get label pos and store it. */
++ b[pos] = -n;
++ } else {
++ linkrel:
++ b[pos] = n; /* Else link to rel chain, anchored at label. */
++ *pl = pos;
++ }
++ pos++;
++ break;
++ case DASM_LABEL_LG:
++ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
++ case DASM_LABEL_PC:
++ pl = D->pclabels + n; CKPL(pc, PC);
++ putlabel:
++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
++ }
++ *pl = -pos; /* Label exists now. */
++ b[pos++] = ofs; /* Store pass1 offset estimate. */
++ break;
++ case DASM_IMM:
++#ifdef DASM_CHECKS
++ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
++#endif
++ n >>= ((ins>>10)&31);
++#ifdef DASM_CHECKS
++ if (ins & 0x8000)
++ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
++ else
++ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
++#endif
++ b[pos++] = n;
++ break;
++ case DASM_IMM2:
++ CK(dasm_imm2((unsigned int)n) != -1, RANGE_I);
++ b[pos++] = n;
++ break;
++ }
++ }
++ }
++stop:
++ va_end(ap);
++ sec->pos = pos;
++ sec->ofs = ofs;
++}
++#undef CK
++
++/* Pass 2: Link sections, shrink aligns, fix label offsets. */
++int dasm_link(Dst_DECL, size_t *szp)
++{
++ dasm_State *D = Dst_REF;
++ int secnum;
++ int ofs = 0;
++
++#ifdef DASM_CHECKS
++ *szp = 0;
++ if (D->status != DASM_S_OK) return D->status;
++ {
++ int pc;
++ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
++ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
++ }
++#endif
++
++ { /* Handle globals not defined in this translation unit. */
++ int idx;
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
++ int n = D->lglabels[idx];
++ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
++ }
++ }
++
++ /* Combine all code sections. No support for data sections (yet). */
++ for (secnum = 0; secnum < D->maxsection; secnum++) {
++ dasm_Section *sec = D->sections + secnum;
++ int *b = sec->rbuf;
++ int pos = DASM_SEC2POS(secnum);
++ int lastpos = sec->pos;
++
++ while (pos != lastpos) {
++ dasm_ActList p = D->actionlist + b[pos++];
++ while (1) {
++ unsigned int ins = *p++;
++ unsigned int action = (ins >> 16) - 0xff00;
++ switch (action) {
++ case DASM_STOP: case DASM_SECTION: goto stop;
++ case DASM_ESC: p++; break;
++ case DASM_REL_EXT: break;
++ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
++ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
++ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
++ case DASM_IMM: case DASM_IMM2: pos++; break;
++ }
++ }
++ stop: (void)0;
++ }
++ ofs += sec->ofs; /* Next section starts right after current section. */
++ }
++
++ D->codesize = ofs; /* Total size of all code sections */
++ *szp = ofs;
++ return DASM_S_OK;
++}
++
++#ifdef DASM_CHECKS
++#define CK(x, st) \
++ do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
++#else
++#define CK(x, st) ((void)0)
++#endif
++
++/* Pass 3: Encode sections. */
++int dasm_encode(Dst_DECL, void *buffer)
++{
++ dasm_State *D = Dst_REF;
++ char *base = (char *)buffer;
++ unsigned int *cp = (unsigned int *)buffer;
++ int secnum;
++
++ /* Encode all code sections. No support for data sections (yet). */
++ for (secnum = 0; secnum < D->maxsection; secnum++) {
++ dasm_Section *sec = D->sections + secnum;
++ int *b = sec->buf;
++ int *endb = sec->rbuf + sec->pos;
++
++ while (b != endb) {
++ dasm_ActList p = D->actionlist + *b++;
++ while (1) {
++ unsigned int ins = *p++;
++ unsigned int action = (ins >> 16) - 0xff00;
++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
++ switch (action) {
++ case DASM_STOP: case DASM_SECTION: goto stop;
++ case DASM_ESC: *cp++ = *p++; break;
++ case DASM_REL_EXT:
++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
++ goto patchrel;
++ case DASM_ALIGN:
++ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
++ break;
++ case DASM_REL_LG:
++ if (n < 0) {
++ n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4);
++ goto patchrel;
++ }
++ /* fallthrough */
++ case DASM_REL_PC:
++ CK(n >= 0, UNDEF_PC);
++ n = *DASM_POS2PTR(D, n);
++ if (ins & 2048)
++ n = (n + (int)(size_t)base) & 0x0fffffff;
++ else
++ n = n - (int)((char *)cp - base) + 4;
++ patchrel: {
++ unsigned int e = 16 + ((ins >> 12) & 15);
++ CK((n & 3) == 0 &&
++ ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
++ if (!(ins & 0xf800)) { /* BEQ, BNE, BLT, BGE, BLTU, BGEU */
++ cp[-1] |= (((n >> 2) & 0xffff) << 10);
++ } else if ((ins & 0x5000)) { /* BEQZ, BNEZ, BCEQZ, BCNEZ */
++ cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x1f0000) >> 16);
++ } else if ((ins & 0xa000)) { /* B, BL */
++ cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x3ff0000) >> 16);
++ }
++ }
++ break;
++ case DASM_LABEL_LG:
++ ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
++ break;
++ case DASM_LABEL_PC: break;
++ case DASM_IMM2: {
++ //cp[-1] |= ((n>>3) & 4); n &= 0x1f;
++ unsigned int imm2n = dasm_imm2((unsigned int)n);
++ cp[-1] |= ((imm2n&0x3ff0000) | ((imm2n&0xffff))>>10);
++ }
++ break;
++ /* fallthrough */
++ case DASM_IMM:
++ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
++ break;
++ default: *cp++ = ins; break;
++ }
++ }
++ stop: (void)0;
++ }
++ }
++
++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
++ return DASM_S_PHASE;
++ return DASM_S_OK;
++}
++#undef CK
++
++/* Get PC label offset. */
++int dasm_getpclabel(Dst_DECL, unsigned int pc)
++{
++ dasm_State *D = Dst_REF;
++ if (pc*sizeof(int) < D->pcsize) {
++ int pos = D->pclabels[pc];
++ if (pos < 0) return *DASM_POS2PTR(D, -pos);
++ if (pos > 0) return -1; /* Undefined. */
++ }
++ return -2; /* Unused or out of range. */
++}
++
++#ifdef DASM_CHECKS
++/* Optional sanity checker to call between isolated encoding steps. */
++int dasm_checkstep(Dst_DECL, int secmatch)
++{
++ dasm_State *D = Dst_REF;
++ if (D->status == DASM_S_OK) {
++ int i;
++ for (i = 1; i <= 9; i++) {
++ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
++ D->lglabels[i] = 0;
++ }
++ }
++ if (D->status == DASM_S_OK && secmatch >= 0 &&
++ D->section != &D->sections[secmatch])
++ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
++ return D->status;
++}
++#endif
++
+Index: luajit-2.1.0+openresty20240815/dynasm/dasm_loongarch64.lua
+===================================================================
+--- /dev/null
++++ luajit-2.1.0+openresty20240815/dynasm/dasm_loongarch64.lua
+@@ -0,0 +1,979 @@
++------------------------------------------------------------------------------
++-- DynASM LoongArch module.
++--
++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++-- See dynasm.lua for full copyright notice.
++------------------------------------------------------------------------------
++
++-- Module information:
++local _info = {
++ arch = "loongarch64",
++ description = "DynASM LoongArch64 module",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
++ author = "Mike Pall",
++ license = "MIT",
++}
++
++-- Exported glue functions for the arch-specific module.
++local _M = { _info = _info }
++
++-- Cache library functions.
++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
++local assert, setmetatable = assert, setmetatable
++local _s = string
++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
++local match, gmatch = _s.match, _s.gmatch
++local concat, sort = table.concat, table.sort
++local bit = bit or require("bit")
++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
++local tohex = bit.tohex
++
++-- Inherited tables and callbacks.
++local g_opt, g_arch
++local wline, werror, wfatal, wwarn
++
++-- Action name list.
++-- CHECK: Keep this in sync with the C code!
++local action_names = {
++ "STOP", "SECTION", "ESC", "REL_EXT",
++ "ALIGN", "REL_LG", "LABEL_LG",
++ "REL_PC", "LABEL_PC", "IMM", "IMM2",
++}
++
++-- Maximum number of section buffer positions for dasm_put().
++-- CHECK: Keep this in sync with the C code!
++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
++
++-- Action name -> action number.
++local map_action = {}
++for n,name in ipairs(action_names) do
++ map_action[name] = n-1
++end
++
++-- Action list buffer.
++local actlist = {}
++
++-- Argument list for next dasm_put(). Start with offset 0 into action list.
++local actargs = { 0 }
++
++-- Current number of section buffer positions for dasm_put().
++local secpos = 1
++
++------------------------------------------------------------------------------
++
++-- Dump action names and numbers.
++local function dumpactions(out)
++ out:write("DynASM encoding engine action codes:\n")
++ for n,name in ipairs(action_names) do
++ local num = map_action[name]
++ out:write(format(" %-10s %02X %d\n", name, num, num))
++ end
++ out:write("\n")
++end
++
++-- Write action list buffer as a huge static C array.
++local function writeactions(out, name)
++ local nn = #actlist
++ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
++ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
++ for i = 1,nn-1 do
++ assert(out:write("0x", tohex(actlist[i]), ",\n"))
++ end
++ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
++end
++
++------------------------------------------------------------------------------
++
++-- Add word to action list.
++local function wputxw(n)
++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++ actlist[#actlist+1] = n
++end
++
++-- Add action to list with optional arg. Advance buffer pos, too.
++local function waction(action, val, a, num)
++ local w = assert(map_action[action], "bad action name `"..action.."'")
++ wputxw(0xff000000 + w * 0x10000 + (val or 0))
++ if a then actargs[#actargs+1] = a end
++ if a or num then secpos = secpos + (num or 1) end
++end
++
++-- Flush action list (intervening C code or buffer pos overflow).
++local function wflush(term)
++ if #actlist == actargs[1] then return end -- Nothing to flush.
++ if not term then waction("STOP") end -- Terminate action list.
++ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
++ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
++ secpos = 1 -- The actionlist offset occupies a buffer position, too.
++end
++
++-- Put escaped word.
++local function wputw(n)
++ if n >= 0xff000000 then waction("ESC") end
++ wputxw(n)
++end
++
++-- Reserve position for word.
++local function wpos()
++ local pos = #actlist+1
++ actlist[pos] = ""
++ return pos
++end
++
++-- Store word to reserved position.
++local function wputpos(pos, n)
++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++ actlist[pos] = n
++end
++
++------------------------------------------------------------------------------
++
++-- Global label name -> global label number. With auto assignment on 1st use.
++local next_global = 20
++local map_global = setmetatable({}, { __index = function(t, name)
++ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
++ local n = next_global
++ if n > 2047 then werror("too many global labels") end
++ next_global = n + 1
++ t[name] = n
++ return n
++end})
++
++-- Dump global labels.
++local function dumpglobals(out, lvl)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("Global labels:\n")
++ for i=20,next_global-1 do
++ out:write(format(" %s\n", t[i]))
++ end
++ out:write("\n")
++end
++
++-- Write global label enum.
++local function writeglobals(out, prefix)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("enum {\n")
++ for i=20,next_global-1 do
++ out:write(" ", prefix, t[i], ",\n")
++ end
++ out:write(" ", prefix, "_MAX\n};\n")
++end
++
++-- Write global label names.
++local function writeglobalnames(out, name)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("static const char *const ", name, "[] = {\n")
++ for i=20,next_global-1 do
++ out:write(" \"", t[i], "\",\n")
++ end
++ out:write(" (const char *)0\n};\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Extern label name -> extern label number. With auto assignment on 1st use.
++local next_extern = 0
++local map_extern_ = {}
++local map_extern = setmetatable({}, { __index = function(t, name)
++ -- No restrictions on the name for now.
++ local n = next_extern
++ if n > 2047 then werror("too many extern labels") end
++ next_extern = n + 1
++ t[name] = n
++ map_extern_[n] = name
++ return n
++end})
++
++-- Dump extern labels.
++local function dumpexterns(out, lvl)
++ out:write("Extern labels:\n")
++ for i=0,next_extern-1 do
++ out:write(format(" %s\n", map_extern_[i]))
++ end
++ out:write("\n")
++end
++
++-- Write extern label names.
++local function writeexternnames(out, name)
++ out:write("static const char *const ", name, "[] = {\n")
++ for i=0,next_extern-1 do
++ out:write(" \"", map_extern_[i], "\",\n")
++ end
++ out:write(" (const char *)0\n};\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Arch-specific maps.
++local map_archdef = { sp="r3", ra="r1" } -- Ext. register name -> int. name.
++
++local map_type = {} -- Type name -> { ctype, reg }
++local ctypenum = 0 -- Type number (for Dt... macros).
++
++-- Reverse defines for registers.
++function _M.revdef(s)
++ if s == "r3" then return "sp"
++ elseif s == "r1" then return "ra" end
++ return s
++end
++
++------------------------------------------------------------------------------
++
++-- Template strings for LoongArch instructions.
++local map_op = {
++ ["clo.w_2"] = "00001000DJ",
++ ["clz.w_2"] = "00001400DJ",
++ ["cto.w_2"] = "00001800DJ",
++ ["ctz.w_2"] = "00001c00DJ",
++ ["clo.d_2"] = "00002000DJ",
++ ["clz.d_2"] = "00002400DJ",
++ ["cto.d_2"] = "00002800DJ",
++ ["ctz.d_2"] = "00002c00DJ",
++ ["revb.2h_2"] = "00003000DJ",
++ ["revb.4h_2"] = "00003400DJ",
++ ["revb.2w_2"] = "00003800DJ",
++ ["revb.d_2"] = "00003c00DJ",
++ ["revh.2w_2"] = "00004000DJ",
++ ["revh.d_2"] = "00004400DJ",
++ ["bitrev.4b_2"] = "00004800DJ",
++ ["bitrev.8b_2"] = "00004c00DJ",
++ ["bitrev.w_2"] = "00005000DJ",
++ ["bitrev.d_2"] = "00005400DJ",
++ ["ext.w.h_2"] = "00005800DJ",
++ ["ext.w.b_2"] = "00005c00DJ",
++
++ ["add.w_3"] = "00100000DJK",
++ ["add.d_3"] = "00108000DJK",
++ ["sub.w_3"] = "00110000DJK",
++ ["sub.d_3"] = "00118000DJK",
++ slt_3 = "00120000DJK",
++ sltu_3 = "00128000DJK",
++ maskeqz_3 = "00130000DJK",
++ masknez_3 = "00138000DJK",
++
++ nor_3 = "00140000DJK",
++ and_3 = "00148000DJK",
++ or_3 = "00150000DJK",
++ xor_3 = "00158000DJK",
++ orn_3 = "00160000DJK",
++ andn_3 = "00168000DJK",
++ ["sll.w_3"] = "00170000DJK",
++ ["srl.w_3"] = "00178000DJK",
++ ["sra.w_3"] = "00180000DJK",
++ ["sll.d_3"] = "00188000DJK",
++ ["srl.d_3"] = "00190000DJK",
++ ["sra.d_3"] = "00198000DJK",
++ ["rotr.w_3"] = "001b0000DJK",
++ ["rotr.d_3"] = "001b8000DJK",
++ ["mul.w_3"] = "001c0000DJK",
++ ["mulh.w_3"] = "001c8000DJK",
++ ["mulh.wu_3"] = "001d0000DJK",
++ ["mul.d_3"] = "001d8000DJK",
++ ["mulh.d_3"] = "001e0000DJK",
++ ["mulh.du_3"] = "001e8000DJK",
++ ["mulw.d.w_3"] = "001f0000DJK",
++ ["mulw.d.wu_3"] = "001f8000DJK",
++
++ ["fabs.h_2"] = "01140000FG",
++ ["fabs.s_2"] = "01140400FG",
++ ["fabs.d_2"] = "01140800FG",
++ ["fneg.h_2"] = "01141000FG",
++ ["fneg.s_2"] = "01141400FG",
++ ["fneg.d_2"] = "01141800FG",
++ ["flogb.h_2"] = "01142000FG",
++ ["flogb.s_2"] = "01142400FG",
++ ["flogb.d_2"] = "01142800FG",
++ ["fclass.h_2"] = "01143000FG",
++ ["fclass.s_2"] = "01143400FG",
++ ["fclass.d_2"] = "01143800FG",
++ ["fsqrt.h_2"] = "01144000FG",
++ ["fsqrt.s_2"] = "01144400FG",
++ ["fsqrt.d_2"] = "01144800FG",
++ ["frecip.h_2"] = "01145000FG",
++ ["frecip.s_2"] = "01145400FG",
++ ["frecip.d_2"] = "01145800FG",
++ ["frsqrt.h_2"] = "01146000FG",
++ ["frsqrt.s_2"] = "01146400FG",
++ ["frsqrt.d_2"] = "01146800FG",
++ ["frecipe.h_2"] = "01147000FG",
++ ["frecipe.s_2"] = "01147400FG",
++ ["frecipe.d_2"] = "01147800FG",
++ ["frsqrte.h_2"] = "01148000FG",
++ ["frsqrte.s_2"] = "01148400FG",
++ ["frsqrte.d_2"] = "01148800FG",
++
++ ["fmov.h_2"] = "01149000FG",
++ ["fmov.s_2"] = "01149400FG",
++ ["fmov.d_2"] = "01149800FG",
++ ["movgr2fr.h_2"] = "0114a000FJ",
++ ["movgr2fr.w_2"] = "0114a400FJ",
++ ["movgr2fr.d_2"] = "0114a800FJ",
++ ["movgr2frh.w_2"] = "0114ac00FJ",
++ ["movfr2gr.h_2"] = "0114b000DG",
++ ["movfr2gr.s_2"] = "0114b400DG",
++ ["movfr2gr.d_2"] = "0114b800DG",
++ ["movfrh2gr.s_2"] = "0114bc00DG",
++ movgr2fcsr_2 = "0114c000SG",
++ movfcsr2gr_2 = "0114c800FR",
++ movfr2cf_2 = "0114d000EG",
++ movcf2fr_2 = "0114d400FA",
++ movgr2cf_2 = "0114d800EG",
++ movcf2gr_2 = "0114dc00DA",
++ ["fcvt.ld.d_2"] = "0114e000FG",
++ ["fcvt.ud.d_2"] = "0114e400FG",
++ ["fcvt.s.d_2"] = "01191800FG",
++ ["fcvt.d.s_2"] = "01192400FG",
++ ["ftintrm.w.s_2"] = "011a0400FG",
++ ["ftintrm.w.d_2"] = "011a0800FG",
++ ["ftintrm.l.s_2"] = "011a2400FG",
++ ["ftintrm.l.d_2"] = "011a2800FG",
++ ["ftintrp.w.s_2"] = "011a4400FG",
++ ["ftintrp.w.d_2"] = "011a4800FG",
++ ["ftintrp.l.s_2"] = "011a6400FG",
++ ["ftintrp.l.d_2"] = "011a6800FG",
++ ["ftintrz.w.s_2"] = "011a8400FG",
++ ["ftintrz.w.d_2"] = "011a8800FG",
++ ["ftintrz.l.s_2"] = "011aa400FG",
++ ["ftintrz.l.d_2"] = "011aa800FG",
++ ["ftintrne.w.s_2"] = "011ac400FG",
++ ["ftintrne.w.d_2"] = "011ac800FG",
++ ["ftintrne.l.s_2"] = "011ae400FG",
++ ["ftintrne.l.d_2"] = "011ae800FG",
++ ["ftint.w.s_2"] = "011b0400FG",
++ ["ftint.w.d_2"] = "011b0800FG",
++ ["ftint.l.s_2"] = "011b2400FG",
++ ["ftint.l.d_2"] = "011b2800FG",
++ ["ffint.s.w_2"] = "011d1000FG",
++ ["ffint.s.l_2"] = "011d1800FG",
++ ["ffint.d.w_2"] = "011d2000FG",
++ ["ffint.d.l_2"] = "011d2800FG",
++ ["frint.s_2"] = "011e4400FG",
++ ["frint.d_2"] = "011e4800FG",
++
++ ["fadd.h_3"] = "01000000FGH",
++ ["fadd.s_3"] = "01008000FGH",
++ ["fadd.d_3"] = "01010000FGH",
++ ["fsub.h_3"] = "01020000FGH",
++ ["fsub.s_3"] = "01028000FGH",
++ ["fsub.d_3"] = "01030000FGH",
++ ["fmul.h_3"] = "01040000FGH",
++ ["fmul.s_3"] = "01048000FGH",
++ ["fmul.d_3"] = "01050000FGH",
++ ["fdiv.h_3"] = "01060000FGH",
++ ["fdiv.s_3"] = "01068000FGH",
++ ["fdiv.d_3"] = "01070000FGH",
++ ["fmax.h_3"] = "01080000FGH",
++ ["fmax.s_3"] = "01088000FGH",
++ ["fmax.d_3"] = "01090000FGH",
++ ["fmin.h_3"] = "010a0000FGH",
++ ["fmin.s_3"] = "010a8000FGH",
++ ["fmin.d_3"] = "010b0000FGH",
++ ["fmaxa.h_3"] = "010c0000FGH",
++ ["fmaxa.s_3"] = "010c8000FGH",
++ ["fmaxa.d_3"] = "010d0000FGH",
++ ["fmina.h_3"] = "010e0000FGH",
++ ["fmina.s_3"] = "010e8000FGH",
++ ["fmina.d_3"] = "010f0000FGH",
++ ["fscaleb.h_3"] = "01100000FGH",
++ ["fscaleb.s_3"] = "01108000FGH",
++ ["fscaleb.d_3"] = "01110000FGH",
++ ["fcopysign.h_3"] = "01120000FGH",
++ ["fcopysign.s_3"] = "01128000FGH",
++ ["fcopysign.d_3"] = "01130000FGH",
++
++ ["fmadd.s_4"] = "08100000FGHi",
++ ["fmadd.d_4"] = "08200000FGHi",
++ ["fnmadd.d_4"] = "08a00000FGHi",
++ ["fmsub.s_4"] = "08500000FGHi",
++ ["fmsub.d_4"] = "08600000FGHi",
++ ["fnmsub.d_4"] = "08e00000FGHi",
++
++ ["alsl.w_4"] = "00040000DJKQ",
++ ["alsl.wu_4"] = "00060000DJKQ",
++ ["alsl.d_4"] = "002c0000DJKQ",
++ ["bytepick.w_4"] = "00080000DJKQ",
++ ["bytepick.d_4"] = "000c0000DJKB",
++
++ ["div.w_3"] = "00200000DJK",
++ ["mod.w_3"] = "00208000DJK",
++ ["div.wu_3"] = "00210000DJK",
++ ["mod.wu_3"] = "00218000DJK",
++ ["div.d_3"] = "00220000DJK",
++ ["mod.d_3"] = "00228000DJK",
++ ["div.du_3"] = "00230000DJK",
++ ["mod.du_3"] = "00238000DJK",
++ ["crc.w.b.w_3"] = "00240000DJK",
++ ["crc.w.h.w_3"] = "00248000DJK",
++ ["crc.w.w.w_3"] = "00250000DJK",
++ ["crc.w.d.w_3"] = "00258000DJK",
++ ["crcc.w.b.w_3"] = "00260000DJK",
++ ["crcc.w.h.w_3"] = "00268000DJK",
++ ["crcc.w.w.w_3"] = "00270000DJK",
++ ["crcc.w.d.w_3"] = "00278000DJK",
++
++ break_1 = "002a0000C",
++ syscall_1 = "002b0000C",
++
++ ["slli.w_3"] = "00408000DJU",
++ ["slli.d_3"] = "00410000DJV",
++ ["srli.w_3"] = "00448000DJU",
++ ["srli.d_3"] = "00450000DJV",
++ ["srai.w_3"] = "00488000DJU",
++ ["srai.d_3"] = "00490000DJV",
++ ["rotri.w_3"] = "004c8000DJU",
++ ["rotri.d_3"] = "004d0000DJV",
++
++ ["bstrins.w_4"] = "00600000DJMU",
++ ["bstrpick.w_4"] = "00608000DJMU",
++ ["bstrins.d_4"] = "00800000DJNV",
++ ["bstrpick.d_4"] = "00c00000DJNV",
++ slti_3 = "02000000DJX",
++ sltui_3 = "02400000DJX",
++ ["addi.w_3"] = "02800000DJX",
++ ["addi.d_3"] = "02c00000DJX",
++ ["lu52i.d_3"] = "03000000DJX",
++ andi_3 = "03400000DJT",
++ ori_3 = "03800000DJT",
++ xori_3 = "03c00000DJT",
++ ["lu12i.w_2"] = "14000000DZ",
++ ["lu32i.d_2"] = "16000000DZ",
++ pcaddi_2 = "18000000DZ",
++ pcalau12i_2 = "1a000000DZ",
++ pcaddu12i_2 = "1c000000DZ",
++ pcaddu18i_2 = "1e000000DZ",
++
++ ["ldx.b_3"] = "38000000DJK",
++ ["ldx.h_3"] = "38040000DJK",
++ ["ldx.w_3"] = "38080000DJK",
++ ["ldx.d_3"] = "380c0000DJK",
++ ["stx.b_3"] = "38100000DJK",
++ ["stx.h_3"] = "38140000DJK",
++ ["stx.w_3"] = "38180000DJK",
++ ["stx.d_3"] = "381c0000DJK",
++ ["ldx.bu_3"] = "38200000DJK",
++ ["ldx.hu_3"] = "38240000DJK",
++ ["ldx.wu_3"] = "38280000DJK",
++ ["fldx.s_3"] = "38300000FJK",
++ ["fldx.d_3"] = "38340000FJK",
++ ["fstx.s_3"] = "38380000FJK",
++ ["fstx.d_3"] = "383c0000FJK",
++ ["fldgt.s_3"] = "38740000FJK",
++ ["fldgt.d_3"] = "38748000FJK",
++ ["fldle.s_3"] = "38750000FJK",
++ ["fldle.d_3"] = "38758000FJK",
++ ["fstgt.s_3"] = "38760000FJK",
++ ["fstgt.d_3"] = "38768000FJK",
++ ["fstle.s_3"] = "38770000FJK",
++ ["fstle.d_3"] = "38778000FJK",
++ ["ldgt.b_3"] = "38780000DJK",
++ ["ldgt.h_3"] = "38788000DJK",
++ ["ldgt.w_3"] = "38790000DJK",
++ ["ldgt.d_3"] = "38798000DJK",
++ ["ldle.b_3"] = "387a0000DJK",
++ ["ldle.h_3"] = "387a8000DJK",
++ ["ldle.w_3"] = "387b0000DJK",
++ ["ldle.d_3"] = "387b8000DJK",
++ ["stgt.b_3"] = "387c0000DJK",
++ ["stgt.h_3"] = "387c8000DJK",
++ ["stgt.w_3"] = "387d0000DJK",
++ ["stgt.d_3"] = "387d8000DJK",
++ ["stle.b_3"] = "387e0000DJK",
++ ["stle.h_3"] = "387e8000DJK",
++ ["stle.w_3"] = "387f0000DJK",
++ ["stle.d_3"] = "387f8000DJK",
++
++ ["ll.w_3"] = "20000000DJW",
++ ["sc.w_3"] = "21000000DJW",
++ ["ll.d_3"] = "22000000DJW",
++ ["sc.d_3"] = "23000000DJW",
++ ["ldptr.w_3"] = "24000000DJW",
++ ["stptr.w_3"] = "25000000DJW",
++ ["ldptr.d_3"] = "26000000DJW",
++ ["stptr.d_3"] = "27000000DJW",
++
++ ["ld.b_2"] = "28000000Do",
++ ["ld.h_2"] = "28400000Do",
++ ["ld.w_2"] = "28800000Do",
++ ["ld.d_2"] = "28c00000Do",
++ ["st.b_2"] = "29000000Do",
++ ["st.h_2"] = "29400000Do",
++ ["st.w_2"] = "29800000Do",
++ ["st.d_2"] = "29c00000Do",
++ ["ld.bu_2"] = "2a000000Do",
++ ["ld.hu_2"] = "2a400000Do",
++ ["ld.wu_2"] = "2a800000Do",
++ ["ldx.d_3"] = "380c0000DJK",
++ ["stx.d_3"] = "381c0000DJK",
++ ["fld.s_2"] = "2b000000Fo",
++ ["fst.s_2"] = "2b400000Fo",
++ ["fld.d_2"] = "2b800000Fo",
++ ["fst.d_2"] = "2bc00000Fo",
++
++ ["fcmp.caf.s_3"] = "0c100000EGH",
++ ["fcmp.saf.s_3"] = "0c108000EGH",
++ ["fcmp.clt.s_3"] = "0c110000EGH",
++ ["fcmp.slt.s_3"] = "0c118000EGH",
++ ["fcmp.ceq.s_3"] = "0c120000EGH",
++ ["fcmp.seq.s_3"] = "0c128000EGH",
++ ["fcmp.cle.s_3"] = "0c130000EGH",
++ ["fcmp.sle.s_3"] = "0c138000EGH",
++ ["fcmp.cun.s_3"] = "0c140000EGH",
++ ["fcmp.sun.s_3"] = "0c148000EGH",
++ ["fcmp.cult.s_3"] = "0c150000EGH",
++ ["fcmp.sult.s_3"] = "0c158000EGH",
++ ["fcmp.cueq.s_3"] = "0c160000EGH",
++ ["fcmp.sueq.s_3"] = "0c168000EGH",
++ ["fcmp.cule.s_3"] = "0c170000EGH",
++ ["fcmp.sule.s_3"] = "0c178000EGH",
++ ["fcmp.cne.s_3"] = "0c180000EGH",
++ ["fcmp.sne.s_3"] = "0c188000EGH",
++ ["fcmp.cor.s_3"] = "0c1a0000EGH",
++ ["fcmp.sor.s_3"] = "0c1a8000EGH",
++ ["fcmp.cune.s_3"] = "0c1c0000EGH",
++ ["fcmp.sune.s_3"] = "0c1c8000EGH",
++ ["fcmp.caf.d_3"] = "0c200000EGH",
++ ["fcmp.saf.d_3"] = "0c208000EGH",
++ ["fcmp.clt.d_3"] = "0c210000EGH",
++ ["fcmp.slt.d_3"] = "0c218000EGH",
++ ["fcmp.ceq.d_3"] = "0c220000EGH",
++ ["fcmp.seq.d_3"] = "0c228000EGH",
++ ["fcmp.cle.d_3"] = "0c230000EGH",
++ ["fcmp.sle.d_3"] = "0c238000EGH",
++ ["fcmp.cun.d_3"] = "0c240000EGH",
++ ["fcmp.sun.d_3"] = "0c248000EGH",
++ ["fcmp.cult.d_3"] = "0c250000EGH",
++ ["fcmp.sult.d_3"] = "0c258000EGH",
++ ["fcmp.cueq.d_3"] = "0c260000EGH",
++ ["fcmp.sueq.d_3"] = "0c268000EGH",
++ ["fcmp.cule.d_3"] = "0c270000EGH",
++ ["fcmp.sule.d_3"] = "0c278000EGH",
++ ["fcmp.cne.d_3"] = "0c280000EGH",
++ ["fcmp.sne.d_3"] = "0c288000EGH",
++ ["fcmp.cor.d_3"] = "0c2a0000EGH",
++ ["fcmp.sor.d_3"] = "0c2a8000EGH",
++ ["fcmp.cune.d_3"] = "0c2c0000EGH",
++ ["fcmp.sune.d_3"] = "0c2c8000EGH",
++
++ fsel_4 = "0d000000FGHI",
++
++ ["addu16i.d_3"] = "10000000DJY",
++ beqz_2 = "40000000JL",
++ bnez_2 = "44000000JL",
++ bceqz_2 = "48000000AL",
++ bcnez_2 = "48000100AL",
++ jirl_3 = "4c000000DJa",
++ b_1 = "50000000P",
++ bl_1 = "54000000P",
++ beq_3 = "58000000JDO",
++ bne_3 = "5c000000JDO",
++ blt_3 = "60000000JDO",
++ bge_3 = "64000000JDO",
++ bltu_3 = "68000000JDO",
++ bgeu_3 = "6c000000JDO",
++}
++
++------------------------------------------------------------------------------
++
++local function parse_gpr(expr)
++ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
++ local tp = map_type[tname or expr]
++ if tp then
++ local reg = ovreg or tp.reg
++ if not reg then
++ werror("type `"..(tname or expr).."' needs a register override")
++ end
++ expr = reg
++ end
++ local r = match(expr, "^r([1-3]?[0-9])$")
++ if r then
++ r = tonumber(r)
++ if r <= 31 then return r, tp end
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++local function parse_fpr(expr)
++ local r = match(expr, "^f([1-3]?[0-9])$")
++ if r then
++ r = tonumber(r)
++ if r <= 31 then return r end
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++local function parse_fcsr(expr)
++ local r = match(expr, "^fcsr([0-3])$")
++ if r then
++ r = tonumber(r)
++ return r
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++local function parse_fcc(expr)
++ local r = match(expr, "^fcc([0-7])$")
++ if r then
++ r = tonumber(r)
++ return r
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++local function parse_imm(imm, bits, shift, scale, signed, action)
++ local n = tonumber(imm)
++ if n then
++ local m = sar(n, scale)
++ if shl(m, scale) == n then
++ if signed then
++ local s = sar(m, bits-1)
++ if s == 0 or s == 1 then return shl(m, shift)
++ elseif s == -1 then return shl(m + shl(1, bits), shift) end
++ else
++ if sar(m, bits) == 0 then return shl(m, shift) end
++ end
++ end
++ werror("out of range immediate1 `"..imm.."'")
++ elseif match(imm, "^[rf]([1-3]?[0-9])$") or
++ match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
++ werror("expected immediate operand, got register")
++ else
++ waction(action or "IMM",
++ (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
++ return 0
++ end
++end
++
++local function parse_imm21or26(imm, i)
++ local n = tonumber(imm)
++ if n then
++ -- signed
++ local m = sar(n, 0)
++ if shl(m, 0) == n then
++ local s = sar(m, i-1)
++ if s == 0 then
++ return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0)
++ elseif s == -1 then
++ return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0)
++ end
++ end
++ werror("out of range immediate2 `"..imm.."'")
++ else
++ waction("IMM2", 0, imm)
++ return 0
++ end
++end
++
++local function parse_disp(disp)
++ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
++ if imm then
++ local r = shl(parse_gpr(reg), 5)
++ local extname = match(imm, "^extern%s+(%S+)$")
++ if extname then
++ waction("REL_EXT", map_extern[extname], nil, 1)
++ return r
++ else
++ return r + parse_imm(imm, 12, 10, 0, true)
++ end
++ end
++ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
++ if reg and tailr ~= "" then
++ local r, tp = parse_gpr(reg)
++ if tp then
++ waction("IMM", 32768+12*32+10, format(tp.ctypefmt, tailr))
++ return shl(r, 5)
++ end
++ end
++ werror("bad displacement `"..disp.."'")
++end
++
++local function parse_label(label, def)
++ local prefix = sub(label, 1, 2)
++ -- =>label (pc label reference)
++ if prefix == "=>" then
++ return "PC", 0, sub(label, 3)
++ end
++ -- ->name (global label reference)
++ if prefix == "->" then
++ return "LG", map_global[sub(label, 3)]
++ end
++ if def then
++ -- [1-9] (local label definition)
++ if match(label, "^[1-9]$") then
++ return "LG", 10+tonumber(label)
++ end
++ else
++ -- [<>][1-9] (local label reference)
++ local dir, lnum = match(label, "^([<>])([1-9])$")
++ if dir then -- Fwd: 1-9, Bkwd: 11-19.
++ return "LG", lnum + (dir == ">" and 0 or 10)
++ end
++ -- extern label (extern label reference)
++ local extname = match(label, "^extern%s+(%S+)$")
++ if extname then
++ return "EXT", map_extern[extname]
++ end
++ end
++ werror("bad label `"..label.."'")
++end
++
++local function branch_type(op)
++ if shr(op, 26) == 0x16 or shr(op, 26) == 0x17 or shr(op, 26) == 0x18 or
++ shr(op, 26) == 0x19 or shr(op, 26) == 0x1a or shr(op, 26) == 0x1b then
++ return 0 -- BEQ, BNE, BLT, BGE, BLTU, BGEU
++ elseif shr(op, 26) == 0x10 or shr(op, 26) == 0x11 or shr(op, 26) == 0x12 then
++ return 0x5000 -- BEQZ, BNEZ, BCEQZ, BCNEZ
++ elseif band(op, 0xf8000000) == 0x50000000 then return 0xa000 --B, BL
++ else
++ assert(false, "unknown branch type")
++ end
++end
++
++------------------------------------------------------------------------------
++
++-- Handle opcodes defined with template strings.
++map_op[".template__"] = function(params, template, nparams)
++ if not params then return sub(template, 9) end
++ local op = tonumber(sub(template, 1, 8), 16)
++ local n = 1
++
++ -- Limit number of section buffer positions used by a single dasm_put().
++ -- A single opcode needs a maximum of 2 positions (ins/ext).
++ if secpos+2 > maxsecpos then wflush() end
++ local pos = wpos()
++
++ -- Process each character.
++ for p in gmatch(sub(template, 9), ".") do
++ if p == "D" then
++ op = op + shl(parse_gpr(params[n]), 0); n = n + 1
++ elseif p == "J" then
++ op = op + shl(parse_gpr(params[n]), 5); n = n + 1
++ elseif p == "K" then
++ op = op + shl(parse_gpr(params[n]), 10); n = n + 1
++ elseif p == "F" then
++ op = op + shl(parse_fpr(params[n]), 0); n = n + 1
++ elseif p == "G" then
++ op = op + shl(parse_fpr(params[n]), 5); n = n + 1
++ elseif p == "H" then
++ op = op + shl(parse_fpr(params[n]), 10); n = n + 1
++ elseif p == "i" then
++ op = op + shl(parse_fpr(params[n]), 15); n = n + 1
++ elseif p == "I" then
++ op = op + shl(parse_fcc(params[n]), 15); n = n + 1
++ elseif p == "A" then
++ op = op + shl(parse_fcc(params[n]), 5); n = n + 1
++ elseif p == "E" then
++ op = op + shl(parse_fcc(params[n]), 0); n = n + 1
++ elseif op == "S" then
++ op = op + shl(parse_fcsr(params[n]), 0); n = n + 1
++ elseif op == "R" then
++ op = op + shl(parse_fcsr(params[n]), 5); n = n + 1
++ elseif p == "U" then
++ op = op + parse_imm(params[n], 5, 10, 0, false); n = n + 1
++ elseif p == "V" then
++ op = op + parse_imm(params[n], 6, 10, 0, false); n = n + 1
++ elseif p == "W" then
++ op = op + parse_imm(params[n], 14, 10, 0, true); n = n + 1
++ elseif p == "X" then
++ op = op + parse_imm(params[n], 12, 10, 0, true); n = n + 1
++ elseif p == "o" then
++ op = op + parse_disp(params[n]); n = n + 1
++ elseif p == "Y" then
++ op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1
++ elseif p == "Z" then
++ op = op + parse_imm(params[n], 20, 5, 0, true); n = n + 1
++ elseif p == "T" then
++ op = op + parse_imm(params[n], 12, 10, 0, false); n = n + 1
++ elseif p == "C" then
++ op = op + parse_imm(params[n], 15, 0, 0, false); n = n + 1
++ elseif p == "Q" then
++ op = op + parse_imm(params[n], 2, 15, 0, false); n = n + 1
++ elseif p == "B" then
++ op = op + parse_imm(params[n], 3, 15, 0, false); n = n + 1
++ elseif p == "M" then
++ op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1
++ elseif p == "N" then
++ op = op + parse_imm(params[n], 6, 16, 0, false); n = n + 1
++ elseif p == "O" or p == "L" or p == "P" then
++ local mode, m, s = parse_label(params[n], false)
++ local v = branch_type(op)
++ waction("REL_"..mode, m+v, s, 1)
++ n = n + 1
++ elseif p == "a" then
++ op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1
++ else
++ assert(false)
++ end
++ end
++ wputpos(pos, op)
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcode to mark the position where the action list is to be emitted.
++map_op[".actionlist_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeactions(out, name) end)
++end
++
++-- Pseudo-opcode to mark the position where the global enum is to be emitted.
++map_op[".globals_1"] = function(params)
++ if not params then return "prefix" end
++ local prefix = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeglobals(out, prefix) end)
++end
++
++-- Pseudo-opcode to mark the position where the global names are to be emitted.
++map_op[".globalnames_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeglobalnames(out, name) end)
++end
++
++-- Pseudo-opcode to mark the position where the extern names are to be emitted.
++map_op[".externnames_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeexternnames(out, name) end)
++end
++
++------------------------------------------------------------------------------
++
++-- Label pseudo-opcode (converted from trailing colon form).
++map_op[".label_1"] = function(params)
++ if not params then return "[1-9] | ->global | =>pcexpr" end
++ if secpos+1 > maxsecpos then wflush() end
++ local mode, n, s = parse_label(params[1], true)
++ if mode == "EXT" then werror("bad label definition") end
++ waction("LABEL_"..mode, n, s, 1)
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcodes for data storage.
++map_op[".long_*"] = function(params)
++ if not params then return "imm..." end
++ for _,p in ipairs(params) do
++ local n = tonumber(p)
++ if not n then werror("bad immediate `"..p.."'") end
++ if n < 0 then n = n + 2^32 end
++ wputw(n)
++ if secpos+2 > maxsecpos then wflush() end
++ end
++end
++
++-- Alignment pseudo-opcode.
++map_op[".align_1"] = function(params)
++ if not params then return "numpow2" end
++ if secpos+1 > maxsecpos then wflush() end
++ local align = tonumber(params[1])
++ if align then
++ local x = align
++ -- Must be a power of 2 in the range (2 ... 256).
++ for i=1,8 do
++ x = x / 2
++ if x == 1 then
++ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
++ return
++ end
++ end
++ end
++ werror("bad alignment")
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcode for (primitive) type definitions (map to C types).
++map_op[".type_3"] = function(params, nparams)
++ if not params then
++ return nparams == 2 and "name, ctype" or "name, ctype, reg"
++ end
++ local name, ctype, reg = params[1], params[2], params[3]
++ if not match(name, "^[%a_][%w_]*$") then
++ werror("bad type name `"..name.."'")
++ end
++ local tp = map_type[name]
++ if tp then
++ werror("duplicate type `"..name.."'")
++ end
++ -- Add #type to defines. A bit unclean to put it in map_archdef.
++ map_archdef["#"..name] = "sizeof("..ctype..")"
++ -- Add new type and emit shortcut define.
++ local num = ctypenum + 1
++ map_type[name] = {
++ ctype = ctype,
++ ctypefmt = format("Dt%X(%%s)", num),
++ reg = reg,
++ }
++ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
++ ctypenum = num
++end
++map_op[".type_2"] = map_op[".type_3"]
++
++-- Dump type definitions.
++local function dumptypes(out, lvl)
++ local t = {}
++ for name in pairs(map_type) do t[#t+1] = name end
++ sort(t)
++ out:write("Type definitions:\n")
++ for _,name in ipairs(t) do
++ local tp = map_type[name]
++ local reg = tp.reg or ""
++ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
++ end
++ out:write("\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Set the current section.
++function _M.section(num)
++ waction("SECTION", num)
++ wflush(true) -- SECTION is a terminal action.
++end
++
++------------------------------------------------------------------------------
++
++-- Dump architecture description.
++function _M.dumparch(out)
++ out:write(format("DynASM %s version %s, released %s\n\n",
++ _info.arch, _info.version, _info.release))
++ dumpactions(out)
++end
++
++-- Dump all user defined elements.
++function _M.dumpdef(out, lvl)
++ dumptypes(out, lvl)
++ dumpglobals(out, lvl)
++ dumpexterns(out, lvl)
++end
++
++------------------------------------------------------------------------------
++
++-- Pass callbacks from/to the DynASM core.
++function _M.passcb(wl, we, wf, ww)
++ wline, werror, wfatal, wwarn = wl, we, wf, ww
++ return wflush
++end
++
++-- Setup the arch-specific module.
++function _M.setup(arch, opt)
++ g_arch, g_opt = arch, opt
++end
++
++-- Merge the core maps and the arch-specific maps.
++function _M.mergemaps(map_coreop, map_def)
++ setmetatable(map_op, { __index = map_coreop })
++ setmetatable(map_def, { __index = map_archdef })
++ return map_op, map_def
++end
++
++return _M
++
++------------------------------------------------------------------------------
++
+Index: luajit-2.1.0+openresty20240815/src/Makefile
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/Makefile
++++ luajit-2.1.0+openresty20240815/src/Makefile
+@@ -53,6 +53,7 @@
+ CCOPT_ppc=
+ CCOPT_mips=
+ CCOPT_riscv64=
++CCOPT_loongarch64= -fwrapv
+ #
+ #CCDEBUG=
+ # Uncomment the next line to generate debug information:
+@@ -247,6 +248,10 @@
+ ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= s390x
+ else
++ifneq (,$(findstring LJ_TARGET_LOONGARCH64 ,$(TARGET_TESTARCH)))
++ TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
++ TARGET_LJARCH= loongarch64
++else
+ ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+ ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
+ TARGET_ARCH= -D__AARCH64EB__=1
+@@ -283,6 +288,7 @@
+ endif
+ endif
+ endif
++endif
+
+ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
+ TARGET_SYS= PS3
+@@ -349,7 +355,9 @@
+ # Find out whether the target toolchain always generates unwind tables.
+ TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o)
+ ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
+- TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
++ ifeq (,$(findstring LJ_TARGET_LOONGARCH64 ,$(TARGET_TESTARCH)))
++ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
++ endif
+ endif
+ endif
+ ifneq (SunOS,$(TARGET_SYS))
+Index: luajit-2.1.0+openresty20240815/src/host/buildvm.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/host/buildvm.c
++++ luajit-2.1.0+openresty20240815/src/host/buildvm.c
+@@ -71,6 +71,8 @@
+ #include "../dynasm/dasm_riscv.h"
+ #elif LJ_TARGET_S390X
+ #include "../dynasm/dasm_s390x.h"
++#elif LJ_TARGET_LOONGARCH64
++#include "../dynasm/dasm_loongarch64.h"
+ #else
+ #error "No support for this architecture (yet)"
+ #endif
+Index: luajit-2.1.0+openresty20240815/src/host/buildvm_asm.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/host/buildvm_asm.c
++++ luajit-2.1.0+openresty20240815/src/host/buildvm_asm.c
+@@ -229,6 +229,15 @@
+ ins, sym);
+ exit(1);
+ }
++#elif LJ_TARGET_LOONGARCH64
++ if ((ins >> 26) == 21) {
++ fprintf(ctx->fp, "\tbl %s\n", sym);
++ } else {
++ fprintf(stderr,
++ "Error: unsupported opcode %08x for %s symbol relocation.\n",
++ ins, sym);
++ exit(1);
++ }
+ #else
+ #error "missing relocation support for this architecture"
+ #endif
+Index: luajit-2.1.0+openresty20240815/src/jit/bcsave.lua
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/jit/bcsave.lua
++++ luajit-2.1.0+openresty20240815/src/jit/bcsave.lua
+@@ -104,6 +104,7 @@
+ mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
+ riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, },
+ s390x = { e = "be", b = 64, m = 22, },
++ loongarch64 = { e = "le", b = 64, m = 258, f = 0x3},
+ }
+
+ local map_os = {
+Index: luajit-2.1.0+openresty20240815/src/jit/dis_loongarch64.lua
+===================================================================
+--- /dev/null
++++ luajit-2.1.0+openresty20240815/src/jit/dis_loongarch64.lua
+@@ -0,0 +1,697 @@
++----------------------------------------------------------------------------
++-- LuaJIT LoongArch64 disassembler module.
++--
++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
++-- Released under the MIT/X license. See Copyright Notice in luajit.h
++----------------------------------------------------------------------------
++-- This is a helper module used by the LuaJIT machine code dumper module.
++--
++-- It disassembles most LoongArch instructions.
++-- NYI: SIMD instructions.
++------------------------------------------------------------------------------
++
++local type = type
++local byte, format = string.byte, string.format
++local match, gmatch = string.match, string.gmatch
++local concat = table.concat
++local bit = require("bit")
++local band, bor, bnot, tohex = bit.band, bit.bor, bit.bnot, bit.tohex
++local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
++
++------------------------------------------------------------------------------
++-- Opcode maps
++------------------------------------------------------------------------------
++
++local map_18_0 = { -- 18-20:0, 10-17
++ shift = 10, mask = 255,
++ [4] = "clo.wDJ",
++ [5] = "clz.wDJ",
++ [6] = "cto.wDJ",
++ [7] = "ctz.wDJ",
++ [8] = "clo.dDJ",
++ [9] = "clz.dDJ",
++ [10] = "cto.dDJ",
++ [11] = "ctz.dDJ",
++ [12] = "revb.2hDJ",
++ [13] = "revb.4hDJ",
++ [14] = "revb.2wDJ",
++ [15] = "revb.dDJ",
++ [16] = "revh.2wDJ",
++ [17] = "revh.dDJ",
++ [18] = "bitrev.4bDJ",
++ [19] = "bitrev.8bDJ",
++ [20] = "bitrev.wDJ",
++ [21] = "bitrev.dDJ",
++ [22] = "ext.w.hDJ",
++ [23] = "ext.w.bDJ",
++}
++
++local map_18_4 = { -- 18-20:4, 15-17
++ shift = 15, mask = 7,
++ [0] = "add.wDJK",
++ [1] = "add.dDJK",
++ [2] = "sub.wDJK",
++ [3] = "sub.dDJK",
++ [4] = "sltDJK",
++ [5] = "sltuDJK",
++ [6] = "maskeqzDJK",
++ [7] = "masknezDJK",
++}
++
++local map_18_5 = { -- 18-20:5, 15-17
++ shift = 15, mask = 7,
++ [0] = "norDJK",
++ [1] = "andDJK",
++ [2] = "orDJK",
++ [3] = "xorDJK",
++ [4] = "ornDJK",
++ [5] = "andnDJK",
++ [6] = "sll.wDJK",
++ [7] = "srl.wDJK",
++}
++
++local map_18_6 = { -- 18-20:6, 15-17
++ shift = 15, mask = 7,
++ [0] = "sra.wDJK",
++ [1] = "sll.dDJK",
++ [2] = "srl.dDJK",
++ [3] = "sra.dDJK",
++ [6] = "rotr.wDJK",
++ [7] = "rotr.dDJK",
++}
++
++local map_18_7 = { -- 18-20:7, 15-17
++ shift = 15, mask = 7,
++ [0] = "mul.wDJK",
++ [1] = "mulh.wDJK",
++ [2] = "mulh.wuDJK",
++ [3] = "mul.dDJK",
++ [4] = "mulh.dDJK",
++ [5] = "mulh.duDJK",
++ [6] = "mulw.d.wDJK",
++ [7] = "mulw.d.wuDJK",
++}
++
++local map_farith2 = {
++ shift = 10, mask = 31,
++ [1] = "fabs.sFG",
++ [2] = "fabs.dFG",
++ [5] = "fneg.sFG",
++ [6] = "fneg.dFG",
++ [9] = "flogb.sFG",
++ [10] = "flogb.dFG",
++ [13] = "fclass.sFG",
++ [14] = "fclass.dFG",
++ [17] = "fsqrt.sFG",
++ [18] = "fsqrt.dFG",
++ [21] = "frecip.sFG",
++ [22] = "frecip.dFG",
++ [25] = "frsqrt.sFG",
++ [26] = "frsqrt.dFG",
++ [29] = "frecipe.sFG",
++ [30] = "frecipe.dFG",
++ [33] = "frsqrte.sFG",
++ [34] = "frsqrte.dFG",
++}
++
++local map_fmov = {
++ shift = 10, mask = 31,
++ [5] = "fmov.sFG",
++ [6] = "fmov.dFG",
++ [9] = "movgr2fr.wFJ",
++ [10] = "movgr2fr.dFJ",
++ [11] = "movgr2frh.wFJ",
++ [13] = "movfr2gr.sDG",
++ [14] = "movfr2gr.dDG",
++ [15] = "movfrh2gr.sDG",
++ [16] = "movgr2fcsrSJ",
++ [18] = "movfcsr2grDR",
++ [20] = { shift = 3, mask = 3, [0] = "movfr2cfEG", },
++ [21] = { shift = 8, mask = 3, [0] = "movcf2frFA", },
++ [22] = { shift = 3, mask = 3, [0] = "movgr2cfEJ", },
++ [23] = { shift = 8, mask = 3, [0] = "movcf2grDA", },
++}
++
++local map_fconvert = { -- 15-20: 110010
++ shift = 10, mask = 31,
++ [6] = "fcvt.s.dFG", [9] = "fcvt.d.sFG",
++}
++
++local map_fconvert1 = { -- 15-20: 110100
++ shift = 10, mask = 31,
++ [1] = "ftintrm.w.sFG",
++ [2] = "ftintrm.w.dFG",
++ [9] = "ftintrm.l.sFG",
++ [10] = "ftintrm.l.dFG",
++ [17] = "ftintrp.w.sFG",
++ [18] = "ftintrp.w.dFG",
++ [25] = "ftintrp.l.sFG",
++ [26] = "ftintrp.l.dFG",
++}
++
++local map_fconvert2 = { -- 15-20: 110101
++ shift = 10, mask = 31,
++ [1] = "ftintrz.w.sFG",
++ [2] = "ftintrz.w.dFG",
++ [9] = "ftintrz.l.sFG",
++ [10] = "ftintrz.l.dFG",
++ [17] = "ftintrne.w.sFG",
++ [18] = "ftintrne.w.dFG",
++ [25] = "ftintrne.l.sFG",
++ [26] = "ftintrne.l.dFG",
++}
++
++local map_fconvert3 = { -- 15-20: 110110
++ shift = 10, mask = 31,
++ [1] = "ftint.w.sFG",
++ [2] = "ftint.w.dFG",
++ [9] = "ftint.l.sFG",
++ [10] = "ftint.l.dFG",
++}
++
++local map_fconvert4 = { -- 15-20: 111010
++ shift = 10, mask = 31,
++ [4] = "ffint.s.wFG",
++ [6] = "ffint.s.lFG",
++ [8] = "ffint.d.wFG",
++ [10] = "ffint.d.lFG",
++}
++
++local map_fconvert5 = { -- 15-20: 111100
++ shift = 10, mask = 31,
++ [17] = "frint.sFG",
++ [18] = "frint.dFG",
++}
++
++local map_farith = { -- 22-25:4, 15-21
++ shift = 15, mask = 127,
++ [1] = "fadd.sFGH",
++ [2] = "fadd.dFGH",
++ [5] = "fsub.sFGH",
++ [6] = "fsub.dFGH",
++ [9] = "fmul.sFGH",
++ [10] = "fmul.dFGH",
++ [13] = "fdiv.sFGH",
++ [14] = "fdiv.dFGH",
++ [17] = "fmax.sFGH",
++ [18] = "fmax.dFGH",
++ [21] = "fmin.sFGH",
++ [22] = "fmin.dFGH",
++ [25] = "fmaxa.sFGH",
++ [26] = "fmaxa.dFGH",
++ [29] = "fmina.sFGH",
++ [30] = "fmina.dFGH",
++ [33] = "fscaleb.sFGH",
++ [34] = "fscaleb.dFGH",
++ [37] = "fcopysign.sFGH",
++ [38] = "fcopysign.dFGH",
++ [40] = map_farith2, [41] = map_fmov,
++ [50] = map_fconvert, [52] = map_fconvert1,
++ [53] = map_fconvert2, [54] = map_fconvert3,
++ [58] = map_fconvert4, [60] = map_fconvert5,
++}
++
++local map_21_0 = { --21st:0, 18-20
++ shift = 18, mask = 7,
++ [0] = map_18_0,
++ [1] = { shift = 17, mask = 1, [0] = "alsl.wDJKQ", "alsl.wuDJKQ", },
++ [2] = {shift = 17, mask = 1, [0] = "bytepick.wDJKQ", },
++ [3] = "bytepick.dDJKB",
++ [4] = map_18_4,
++ [5] = map_18_5,
++ [6] = map_18_6,
++ [7] = map_18_7,
++}
++
++local map_21_1 = { --21st:1, 22nd:0, 15-20
++ shift = 21, mask = 1,
++ [1] = {
++ shift = 18, mask = 7,
++ [0] = {
++ shift = 15, mask = 7,
++ [0] = "div.wDJK",
++ [1] = "mod.wDJK",
++ [2] = "div.wuDJK",
++ [3] = "mod.wuDJK",
++ [4] = "div.dDJK",
++ [5] = "mod.dDJK",
++ [6] = "div.duDJK",
++ [7] = "mod.duDJK",
++ },
++ [1] = {
++ shift = 18, mask = 7,
++ [0] = "crc.w.b.wDJK",
++ [1] = "crc.w.h.wDJK",
++ [2] = "crc.w.w.wDJK",
++ [3] = "crc.w.d.wDJK",
++ [4] = "crcc.w.b.wDJK",
++ [5] = "crcc.w.h.wDJK",
++ [6] = "crcc.w.w.wDJK",
++ [7] = "crcc.w.d.wDJK",
++ },
++ [2] = {
++ shift = 15, mask = 7,
++ [4] = breakC, [6] = syscallC,
++ },
++ [3] = { shift = 17, mask = 1, [0] = "alsl.dDJKQ", },
++ },
++}
++
++local map_22_0 = {
++ shift = 21, mask = 1,
++ [0] = map_21_0,
++ [1] = map_21_1,
++}
++
++local map_shift = { -- 22nd:1, 21st:0
++ shift = 16, mask = 31,
++ [0] = { shift = 15, mask = 1, [1] = "slli.wDJU", },
++ [1] = "slli.dDJV",
++ [4] = { shift = 15, mask = 1, [1] = "srli.wDJU", },
++ [5] = "srli.dDJV",
++ [8] = { shift = 15, mask = 1, [1] = "srai.wDJU", },
++ [9] = "srai.dDJV",
++ [12] = { shift = 15, mask = 1, [1] = "rotri.wDJU", },
++ [13] = "rotri.dDJV",
++}
++
++local map_22_1 = { -- 22nd:1
++ shift = 21, mask = 1,
++ [0] = map_shift,
++ [1] = { shift = 15, mask = 1, [0] = "bstrins.wDJMU", [1] = "bstrpick.wDJMU", },
++}
++
++local map_26_0 = {
++ shift = 22, mask = 15,
++ [0] = map_22_0,
++ [1] = map_22_1,
++ [2] = "bstrins.dDJNV",
++ [3] = "bstrpick.dDJNV",
++ [4] = map_farith,
++ [8] = "sltiDJX",
++ [9] = "sltuiDJX",
++ [10] = "addi.wDJX",
++ [11] = "addi.dDJX",
++ [12] = "lu52i.dDJX",
++ [13] = "andiDJT",
++ [14] = "oriDJT",
++ [15] = "xoriDJT",
++}
++
++local map_long_i_5 = { -- Long immediate fixed-point arithmetic.
++ shift = 25, mask = 1,
++ [0] = "lu12i.wDZ",
++ [1] = "lu32i.dDZ",
++}
++
++local map_long_i_6 = {
++ shift = 25, mask = 1,
++ [0] = "pcaddiDZ",
++ [1] = "pcalau12iDZ",
++}
++
++local map_long_i_7 = {
++ shift = 25, mask = 1,
++ [0] = "pcaddu12iDZ",
++ [1] = "pcaddu18iDZ",
++}
++
++local map_ldst0_14 = {
++ shift = 15, mask = 2047,
++ [0] = "ldx.bDJK", [8] = "ldx.hDJK", [16] = "ldx.wDJK",
++ [24] = "ldx.dDJK", [32] = "stx.bDJK", [40] = "stx.hDJK",
++ [48] = "stx.wDJK", [56] = "stx.dDJK", [64] = "ldx.buDJK",
++ [72] = "ldx.huDJK", [80] = "ldx.wuDJK", [96] = "fldx.sFJK",
++ [104] = "fldx.dFJK", [112] = "fstx.sFJK", [120] = "fstx.dFJK",
++ [232] = "fldgt.sFJK", [233] = "fldgt.dFJK", [234] = "fldle.sFJK",
++ [235] = "fldle.dFJK", [236] = "fstgt.sFJK", [237] = "fstgt.dFJK",
++ [238] = "fstle.sFJK", [239] = "fstle.dFJK", [240] = "ldgt.bDJK",
++ [241] = "ldgt.hDJK", [242] = "ldgt.wDJK", [243] = "ldgt.dDJK",
++ [244] = "ldle.bDJK", [245] = "ldle.hDJK", [246] = "ldle.wDJK",
++ [247] = "ldle.dDJK", [248] = "stgt.bDJK", [249] = "stgt.hDJK",
++ [250] = "stgt.wDJK", [251] = "stgt.dDJK", [252] = "stle.bDJK",
++ [253] = "stle.hDJK", [254] = "stle.wDJK", [255] = "stle.dDJK",
++}
++
++local map_ldst1_8 = {
++ shift = 24, mask = 3,
++ [0] = "ll.wDJW",
++ [1] = "sc.wDJW",
++ [2] = "ll.dDJW",
++ [3] = "sc.dDJW",
++}
++
++local map_ldst1_9 = {
++ shift = 24, mask = 3,
++ [0] = "ldptr.wDJW",
++ [1] = "stptr.wDJW",
++ [2] = "ldptr.dDJW",
++ [3] = "stptr.dDJW",
++}
++
++local map_ldst1_10 = {
++ shift = 22, mask = 15,
++ [0] = "ld.bDJX",
++ [1] = "ld.hDJX",
++ [2] = "ld.wDo",
++ [3] = "ld.dDo",
++ [4] = "st.bDo",
++ [5] = "st.hDo",
++ [6] = "st.wDo",
++ [7] = "st.dDo",
++ [8] = "ld.buDo",
++ [9] = "ld.huDo",
++ [10] = "ld.wuDJX",
++ [12] = "fld.sFo",
++ [13] = "fst.sFo",
++ [14] = "fld.dFo",
++ [15] = "fst.dFo",
++}
++
++local map_fcmp0 = {
++ shift = 15, mask = 31,
++ [0] = "fcmp.caf.sEGH",
++ [1] = "fcmp.saf.sEGH",
++ [2] = "fcmp.clt.sEGH",
++ [3] = "fcmp.slt.sEGH",
++ [4] = "fcmp.ceq.sEGH",
++ [5] = "fcmp.seq.sEGH",
++ [6] = "fcmp.cle.sEGH",
++ [7] = "fcmp.sle.sEGH",
++ [8] = "fcmp.cun.sEGH",
++ [9] = "fcmp.sun.sEGH",
++ [10] = "fcmp.cult.sEGH",
++ [11] ="fcmp.sult.sEGH",
++ [12] = "fcmp.cueq.sEGH",
++ [13] = "fcmp.sueq.sEGH",
++ [14] = "fcmp.cule.sEGH",
++ [15] = "fcmp.sule.sEGH",
++ [16] = "fcmp.cne.sEGH",
++ [17] = "fcmp.sne.sEGH",
++ [20] = "fcmp.cor.sEGH",
++ [21] = "fcmp.sor.sEGH",
++ [24] = "fcmp.cune.sEGH",
++ [25] = "fcmp.sune.sEGH",
++}
++
++local map_fcmp1 = {
++ shift = 15, mask = 31,
++ [0] = "fcmp.caf.dEGH",
++ [1] = "fcmp.saf.dEGH",
++ [2] = "fcmp.clt.dEGH",
++ [3] = "fcmp.slt.dEGH",
++ [4] = "fcmp.ceq.dEGH",
++ [5] = "fcmp.seq.dEGH",
++ [6] = "fcmp.cle.dEGH",
++ [7] = "fcmp.sle.dEGH",
++ [8] = "fcmp.cun.dEGH",
++ [9] = "fcmp.sun.dEGH",
++ [10] = "fcmp.cult.dEGH",
++ [11] = "fcmp.sult.dEGH",
++ [12] = "fcmp.cueq.dEGH",
++ [13] = "fcmp.sueq.dEGH",
++ [14] = "fcmp.cule.dEGH",
++ [15] = "fcmp.sule.dEGH",
++ [16] = "fcmp.cne.dEGH",
++ [17] = "fcmp.sne.dEGH",
++ [20] = "fcmp.cor.dEGH",
++ [21] = "fcmp.sor.dEGH",
++ [24] = "fcmp.cune.dEGH",
++ [25] = "fcmp.sune.dEGH",
++}
++
++local map_fcmp = {
++ shift = 20, mask = 63,
++ [1] = { shift = 3, mask = 3, [0] = map_fcmp0, },
++ [2] = { shift = 3, mask = 3, [0] = map_fcmp1, },
++ [16] = { shift = 18, mask = 3, [0] = "fselFGHI", },
++}
++
++local map_fp = {
++ shift = 20, mask = 15,
++ [1] = "fmadd.sFGHi",
++ [2] = "fmadd.dFGHi",
++ [4] = "fmsub.sFGHi",
++ [5] = "fmsub.dFGHi",
++ [10] = "fnmadd.dFGHi",
++ [14] = "fnmsub.dFGHi",
++}
++
++local map_init = {
++ shift = 26, mask = 63,
++ [0] = map_26_0,
++ [2] = map_fp,
++ [3] = map_fcmp,
++ [4] = "addu16i.dDJY",
++ [5] = map_long_i_5,
++ [6] = map_long_i_6,
++ [7] = map_long_i_7,
++ [8] = map_ldst1_8,
++ [9] = map_ldst1_9,
++ [10] = map_ldst1_10,
++ [14] = map_ldst0_14,
++ [16] = "beqzJL",
++ [17] = "bnezJL",
++ [18] = { shift = 8, mask = 3, [0] = "bceqzAL", "bcnezAL", },
++ [19] = "jirlDJa",
++ [20] = "bP",
++ [21] = "blP",
++ [22] = "beqJDO",
++ [23] = "bneJDO",
++ [24] = "bltJDO",
++ [25] = "bgeJDO",
++ [26] = "bltuJDO",
++ [27] = "bgeuJDO",
++}
++
++------------------------------------------------------------------------------
++
++local map_gpr = {
++ [0] = "r0", "ra", "r2", "sp", "r4", "r5", "r6", "r7",
++ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
++ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
++ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
++}
++
++------------------------------------------------------------------------------
++
++-- Output a nicely formatted line with an opcode and operands.
++local function putop(ctx, text, operands)
++ local pos = ctx.pos
++ local extra = ""
++ if ctx.rel then
++ local sym = ctx.symtab[ctx.rel]
++ if sym then extra = "\t->"..sym end
++ end
++ if ctx.hexdump > 0 then
++ ctx.out(format("%08x %s %-7s %s%s\n",
++ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
++ else
++ ctx.out(format("%08x %-7s %s%s\n",
++ ctx.addr+pos, text, concat(operands, ", "), extra))
++ end
++ ctx.pos = pos + 4
++end
++
++-- Fallback for unknown opcodes.
++local function unknown(ctx)
++ return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
++end
++
++local function get_le(ctx)
++ local pos = ctx.pos
++ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
++ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
++end
++
++-- Decode imm.
++local function decode_si_imm(imm, bits, scale, signed, mask)
++ local n = tonumber(imm)
++ if n then
++ local m = arshift(n, scale)
++ if lshift(m, scale) == n then
++ if signed then
++ local s = arshift(band(m, mask), bits-1)
++ if s == 0 then
++ return m
++ elseif s == 1 then
++ return -(band(bnot(m), mask)+1)
++ end
++ else
++ if arshift(m, bits) == 0 then
++ return m
++ end
++ end
++ end
++ end
++end
++
++-- Disassemble a single instruction.
++local function disass_ins(ctx)
++ local op = ctx:get()
++ local operands = {}
++ local last = nil
++ ctx.op = op
++ ctx.rel = nil
++
++ local opat = ctx.map_pri[rshift(op, 26)]
++ while type(opat) ~= "string" do
++ if not opat then return unknown(ctx) end
++ opat = opat[band(rshift(op, opat.shift), opat.mask)]
++ end
++ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
++ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
++ if altname then pat = pat2 end
++
++ for p in gmatch(pat, ".") do
++ local x = nil
++ if p == "D" then
++ x = map_gpr[band(rshift(op, 0), 31)]
++ elseif p == "J" then
++ x = map_gpr[band(rshift(op, 5), 31)]
++ elseif p == "K" then
++ x = map_gpr[band(rshift(op, 10), 31)]
++ elseif p == "F" then
++ x = "f"..band(rshift(op, 0), 31)
++ elseif p == "G" then
++ x = "f"..band(rshift(op, 5), 31)
++ elseif p == "H" then
++ x = "f"..band(rshift(op, 10), 31)
++ elseif p == "i" then
++ x = "f"..band(rshift(op, 15), 31)
++ elseif p == "S" then
++ x = "fcsr"..band(rshift(op, 0), 31)
++ elseif p == "R" then
++ x = "fcsr"..band(rshift(op, 5), 31)
++ elseif p == "E" then
++ x = "fcc"..band(rshift(op, 0), 7)
++ elseif p == "A" then
++ x = "fcc"..band(rshift(op, 5), 7)
++ elseif p == "I" then
++ x = "fcc"..band(rshift(op, 15), 7)
++ elseif p == "Q" then -- sa2
++ x = band(rshift(op, 15), 3)
++ ctx.rel = x
++ x = format("%d", x)
++ elseif p == "B" then -- sa3
++ x = band(rshift(op, 15), 7)
++ ctx.rel = x
++ x = format("%d", x)
++ elseif p == "M" then -- msbw
++ x = band(rshift(op, 16), 31)
++ ctx.rel = x
++ x = format("%d(0x%x)", x, x)
++ elseif p == "N" then -- msbd
++ x = band(rshift(op, 16), 63)
++ ctx.rel = x
++ x = format("%d(0x%x)", x, x)
++ elseif p == "U" then -- ui5
++ x = band(rshift(op, 10), 31)
++ ctx.rel = x
++ x = format("%d(0x%x)", x, x)
++ elseif p == "V" then -- ui6
++ x = band(rshift(op, 10), 63)
++ ctx.rel = x
++ x = format("%d(0x%x)", x, x)
++ elseif p == "T" then -- ui12
++ x = band(rshift(op, 10), 4095)
++ ctx.rel = x
++ x = format("%d(0x%x)", x, x)
++ elseif p == "W" then -- si14
++ x = band(rshift(op, 10), 16383)
++ x = decode_si_imm(x, 14, 0, true, 0x3fff)
++ ctx.rel = x
++ x = format("%d(0x%04x)", x, band(x, 0x3fff))
++ elseif p == "X" then -- si12
++ x = band(rshift(op, 10), 4095)
++ x = decode_si_imm(x, 12, 0, true, 0xfff)
++ ctx.rel = x
++ x = format("%d(0x%03x)", x, band(x, 0xfff))
++ elseif p == "o" then
++ local disp = band((rshift(op, 10)), 0xfff)
++ operands[#operands] = format("%s, %d", last, disp)
++ elseif p == "Y" then -- si16
++ x = band(rshift(op, 10), 65535)
++ x = decode_si_imm(x, 16, 0, true, 0xffff)
++ ctx.rel = x
++ x = format("%d(0x%04x)", x, band(x, 0xffff))
++ elseif p == "Z" then -- si20
++ x = band(rshift(op, 10), 1048575)
++ x = decode_si_imm(x, 20, 0, true, 0xfffff)
++ ctx.rel = x
++ x = format("%d(0x%05x)", x, band(x, 0xfffff))
++ elseif p == "C" then -- code
++ x = band(rshift(op, 0), 32767)
++ elseif p == "O" then -- offs[15:0]
++ x = band(rshift(op, 10), 65535)
++ x = decode_si_imm(x, 16, 0, true, 0xffff)
++ ctx.rel = x
++ x = format("%d(0x%04x)", x, band(x, 0xffff))
++ elseif p == "L" then -- offs[15:0] + offs[20:16]
++ x = lshift(band(op, 31), 16) + band(rshift(op, 10), 65535)
++ x = decode_si_imm(x, 21, 0, true, 0x1fffff)
++ ctx.rel = x
++ x = format("%d(0x%06x)", x, band(x, 0x1fffff))
++ elseif p == "P" then -- offs[15:0] + offs[25:16]
++ x = lshift(band(op, 1023), 16) + band(rshift(op, 10), 65535)
++ x = decode_si_imm(x, 26, 0, true, 0x3ffffff)
++ ctx.rel = x
++ x = format("%d(0x%07x)", x, band(x, 0x3ffffff))
++ elseif p == "a" then
++ x = band(rshift(op, 10), 65535)
++ x = decode_si_imm(x, 16, 0, true, 0xffff)
++ ctx.rel = x
++ x = format("%d(0x%04x)", x, band(x, 0xffff))
++ else
++ assert(false)
++ end
++ if x then operands[#operands+1] = x; last = x end
++ end
++
++ return putop(ctx, name, operands)
++end
++
++------------------------------------------------------------------------------
++
++-- Disassemble a block of code.
++local function disass_block(ctx, ofs, len)
++ if not ofs then ofs = 0 end
++ local stop = len and ofs+len or #ctx.code
++ stop = stop - stop % 4
++ ctx.pos = ofs - ofs % 4
++ ctx.rel = nil
++ while ctx.pos < stop do disass_ins(ctx) end
++end
++
++-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
++local function create(code, addr, out)
++ local ctx = {}
++ ctx.code = code
++ ctx.addr = addr or 0
++ ctx.out = out or io.write
++ ctx.symtab = {}
++ ctx.disass = disass_block
++ ctx.hexdump = 8
++ ctx.get = get_le
++ ctx.map_pri = map_init
++ return ctx
++end
++
++-- Simple API: disassemble code (a string) at address and output via out.
++local function disass(code, addr, out)
++ create(code, addr, out):disass()
++end
++
++-- Return register name for RID.
++local function regname(r)
++ if r < 32 then return map_gpr[r] end
++ return "f"..(r-32)
++end
++
++-- Public module functions.
++return {
++ create = create,
++ disass = disass,
++ regname = regname
++}
++
+Index: luajit-2.1.0+openresty20240815/src/lib_jit.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lib_jit.c
++++ luajit-2.1.0+openresty20240815/src/lib_jit.c
+@@ -859,6 +859,8 @@
+ #endif
+ #elif LJ_TARGET_S390X
+ /* No optional CPU features to detect (for now). */
++#elif LJ_TARGET_LOONGARCH64
++ /* No optional CPU features to detect (for now). */
+ #else
+ #error "Missing CPU detection for this architecture"
+ #endif
+Index: luajit-2.1.0+openresty20240815/src/lj_arch.h
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_arch.h
++++ luajit-2.1.0+openresty20240815/src/lj_arch.h
+@@ -35,6 +35,8 @@
+ #define LUAJIT_ARCH_s390x 8
+ #define LUAJIT_ARCH_riscv64 9
+ #define LUAJIT_ARCH_RISCV64 9
++#define LUAJIT_ARCH_LOONGARCH64 10
++#define LUAJIT_ARCH_loongarch64 10
+
+
+ /* Target OS. */
+@@ -74,6 +76,8 @@
+ #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
+ #elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
+ #define LUAJIT_TARGET LUAJIT_ARCH_RISCV64
++#elif defined(__loongarch64)
++#define LUAJIT_TARGET LUAJIT_ARCH_LOONGARCH64
+ #else
+ #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
+ #endif
+@@ -495,6 +499,20 @@
+ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR, no ROLI */
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+
++#elif LUAJIT_TARGET == LUAJIT_ARCH_LOONGARCH64
++#define LJ_ARCH_NAME "loongarch64"
++#define LJ_ARCH_BITS 64
++#define LJ_ARCH_ENDIAN LUAJIT_LE
++#define LJ_TARGET_LOONGARCH64 1
++#define LJ_TARGET_GC64 1
++#define LJ_TARGET_EHRETREG 4
++#define LJ_TARGET_EHRAREG 1
++#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
++#define LJ_TARGET_MASKSHIFT 1
++#define LJ_TARGET_MASKROT 1
++#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
++
+ #else
+ #error "No support for RISC-V 64 Soft-float/Single-float"
+ #endif
+@@ -529,6 +547,16 @@
+ #error "Need at least GCC 4.8 or newer"
+ #endif
+ #endif
++#elif LJ_TARGET_LOONGARCH64
++#if __clang__
++#if ((__clang_major__ < 8) || ((__clang_major__ == 8) && __clang_minor__ < 0)) && !defined(__NX_TOOLCHAIN_MAJOR__)
++#error "Need at least Clang 8.0 or newer"
++#endif
++#else
++#if (__GNUC__ < 8) || ((__GNUC__ == 8) && __GNUC_MINOR__ < 3)
++#error "Need at least GCC 8.3 or newer"
++#endif
++#endif
+ #elif !LJ_TARGET_PS3
+ #if __clang__
+ #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5))
+@@ -586,6 +614,10 @@
+ #if !defined(__riscv_float_abi_double)
+ #error "Only RISC-V 64 double float supported for now"
+ #endif
++#elif LJ_TARGET_LOONGARCH64
++#if !(defined(_ABILP64) && _LOONGARCH_SIM == _ABILP64)
++#error "Only LOONGARCH lp64d ABI is supported"
++#endif
+ #endif
+ #endif
+
+Index: luajit-2.1.0+openresty20240815/src/lj_asm.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_asm.c
++++ luajit-2.1.0+openresty20240815/src/lj_asm.c
+@@ -229,6 +229,8 @@
+ #include "lj_emit_mips.h"
+ #elif LJ_TARGET_RISCV64
+ #include "lj_emit_riscv.h"
++#elif LJ_TARGET_LOONGARCH64
++#include "lj_emit_loongarch64.h"
+ #else
+ #error "Missing instruction emitter for target CPU"
+ #endif
+@@ -1714,6 +1716,8 @@
+ #include "lj_asm_riscv64.h"
+ #elif LJ_TARGET_S390X
+ #include "lj_asm_s390x.h"
++#elif LJ_TARGET_LOONGARCH64
++#include "lj_asm_loongarch64.h"
+ #else
+ #error "Missing assembler for target CPU"
+ #endif
+Index: luajit-2.1.0+openresty20240815/src/lj_asm_loongarch64.h
+===================================================================
+--- /dev/null
++++ luajit-2.1.0+openresty20240815/src/lj_asm_loongarch64.h
+@@ -0,0 +1,1990 @@
++/*
++** LoongArch IR assembler (SSA IR -> machine code).
++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++/* -- Register allocator extensions --------------------------------------- */
++
++/* Allocate a register with a hint. */
++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
++{
++ Reg r = IR(ref)->r;
++ if (ra_noreg(r)) {
++ if (!ra_hashint(r) && !iscrossref(as, ref))
++ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
++ r = ra_allocref(as, ref, allow);
++ }
++ ra_noweak(as, r);
++ return r;
++}
++
++/* Allocate two source registers for three-operand instructions. */
++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
++{
++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
++ Reg left = irl->r, right = irr->r;
++ if (ra_hasreg(left)) {
++ ra_noweak(as, left);
++ if (ra_noreg(right))
++ right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
++ else
++ ra_noweak(as, right);
++ } else if (ra_hasreg(right)) {
++ ra_noweak(as, right);
++ left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
++ } else if (ra_hashint(right)) {
++ right = ra_allocref(as, ir->op2, allow);
++ left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
++ } else {
++ left = ra_allocref(as, ir->op1, allow);
++ right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
++ }
++ return left | (right << 8);
++}
++
++/* -- Guard handling ------------------------------------------------------ */
++
++/* Setup exit stub after the end of each trace. */
++static void asm_exitstub_setup(ASMState *as)
++{
++ MCode *mxp = as->mctop;
++ if (as->mcp == mxp)
++ --as->mcp;
++ /* st.w TMP, sp, 0; li TMP, traceno; jirl ->vm_exit_handler;*/
++ *--mxp = LOONGI_JIRL | RID_R0 | LOONGF_J(RID_R20) | 0<<10;
++ emit_dj32i(as, RID_TMP, RID_ZERO, as->T->traceno);
++ *--mxp = *as->mcp;
++ *--mxp = LOONGI_LU52I_D | RID_R20 | LOONGF_J(RID_R20)
++ | LOONGF_I((((uintptr_t)(void *)lj_vm_exit_handler)>>52)&0xfff);
++ *--mxp = LOONGI_LU32I_D | RID_R20
++ | LOONGF_I20((((uintptr_t)(void *)lj_vm_exit_handler)>>32)&0xfffff);
++ *--mxp = LOONGI_ORI | RID_R20 | LOONGF_J(RID_R20)
++ | LOONGF_I(((uintptr_t)(void *)lj_vm_exit_handler)&0xfff);
++ *--mxp = LOONGI_LU12I_W | RID_R20
++ | LOONGF_I20((((uintptr_t)(void *)lj_vm_exit_handler)&0xfffff000)>>12);
++ *--mxp = LOONGI_ST_W | LOONGF_D(RID_TMP) | LOONGF_J(RID_SP);
++ as->mctop = mxp;
++}
++
++/* Keep this in-sync with exitstub_trace_addr(). */
++#define asm_exitstub_addr(as) ((as)->mctop)
++
++/* Emit conditional branch to exit for guard. */
++static void asm_guard(ASMState *as, LOONGIns loongi, Reg rj, Reg rd)
++{
++ MCode *target = asm_exitstub_addr(as);
++ MCode *p = as->mcp;
++ if (LJ_UNLIKELY(p == as->invmcp)) {
++ as->invmcp = NULL;
++ as->loopinv = 1;
++ as->mcp = p;
++ loongi = loongi ^ ((loongi>>28) == 4 ? 0x00000100u : 0x04000000u); /* Invert cond. BEQ BNE BGE BLZ*/
++ target = p - 1; /* Patch target later in asm_loop_fixup. */
++ }
++ emit_branch(as, loongi, rj, rd, target);
++ emit_dji(as, LOONGI_ADDI_D, RID_TMP, RID_ZERO, as->snapno);
++}
++
++static void asm_guard21(ASMState *as, LOONGIns loongi, Reg rj)
++{
++ MCode *target = asm_exitstub_addr(as);
++ MCode *p = as->mcp;
++ if (LJ_UNLIKELY(p == as->invmcp)) {
++ as->invmcp = NULL;
++ as->loopinv = 1;
++ as->mcp = p;
++ loongi = loongi ^ ((loongi>>28) == 4 ? 0x00000100u : 0x04000000u); /* Invert cond. BCEQZ BCNEZ*/
++ target = p - 1; /* Patch target later in asm_loop_fixup. */
++ }
++ emit_branch21(as, loongi, rj, target);
++ emit_dji(as, LOONGI_ADDI_D, RID_TMP, RID_ZERO, as->snapno);
++}
++
++/* -- Operand fusion ------------------------------------------------------ */
++
++/* Limit linear search to this distance. Avoids O(n^2) behavior. */
++#define CONFLICT_SEARCH_LIM 31
++
++/* Check if there's no conflicting instruction between curins and ref. */
++static int noconflict(ASMState *as, IRRef ref, IROp conflict)
++{
++ IRIns *ir = as->ir;
++ IRRef i = as->curins;
++ if (i > ref + CONFLICT_SEARCH_LIM)
++ return 0; /* Give up, ref is too far away. */
++ while (--i > ref)
++ if (ir[i].o == conflict)
++ return 0; /* Conflict found. */
++ return 1; /* Ok, no conflict. */
++}
++
++/* Fuse the array base of colocated arrays. */
++static int32_t asm_fuseabase(ASMState *as, IRRef ref)
++{
++ IRIns *ir = IR(ref);
++ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
++ !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
++ return (int32_t)sizeof(GCtab);
++ return 0;
++}
++
++/* Fuse array/hash/upvalue reference into register+offset operand. */
++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
++{
++ IRIns *ir = IR(ref);
++ if (ra_noreg(ir->r)) {
++ if (ir->o == IR_AREF) {
++ if (mayfuse(as, ref)) {
++ if (irref_isk(ir->op2)) {
++ IRRef tab = IR(ir->op1)->op1;
++ int32_t ofs = asm_fuseabase(as, tab);
++ IRRef refa = ofs ? tab : ir->op1;
++ ofs += 8*IR(ir->op2)->i;
++ if (checki16(ofs)) {
++ *ofsp = ofs;
++ return ra_alloc1(as, refa, allow);
++ }
++ }
++ }
++ } else if (ir->o == IR_HREFK) {
++ if (mayfuse(as, ref)) {
++ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
++ if (checki16(ofs)) {
++ *ofsp = ofs;
++ return ra_alloc1(as, ir->op1, allow);
++ }
++ }
++ } else if (ir->o == IR_UREFC) {
++ if (irref_isk(ir->op1)) {
++ GCfunc *fn = ir_kfunc(IR(ir->op1));
++ intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv;
++ intptr_t jgl = (intptr_t)J2G(as->J);
++ if ((uintptr_t)(ofs-jgl) < 65536) {
++ *ofsp = ofs-jgl-32768;
++ return RID_JGL;
++ } else {
++ *ofsp = (int16_t)ofs;
++ return ra_allock(as, ofs-(int16_t)ofs, allow);
++ }
++ }
++ } else if (ir->o == IR_TMPREF) {
++ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
++ return RID_JGL;
++ }
++ }
++ *ofsp = 0;
++ return ra_alloc1(as, ref, allow);
++}
++
++/* Fuse XLOAD/XSTORE reference into load/store operand. */
++static void asm_fusexref(ASMState *as, LOONGIns loongi, Reg rd, IRRef ref,
++ RegSet allow, int32_t ofs)
++{
++ IRIns *ir = IR(ref);
++ Reg base;
++ if (ra_noreg(ir->r) && canfuse(as, ir)) {
++ intptr_t ofs2;
++ if (ir->o == IR_ADD) {
++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
++ checki12(ofs2))) {
++ ref = ir->op1;
++ ofs = (int32_t)ofs2;
++ }
++ } else if (ir->o == IR_STRREF) {
++ ofs2 = 4096;
++ lj_assertA(ofs == 0, "bad usage");
++ ofs = (int32_t)sizeof(GCstr);
++ if (irref_isk(ir->op2)) {
++ ofs2 = ofs + get_kval(as, ir->op2);
++ ref = ir->op1;
++ } else if (irref_isk(ir->op1)) {
++ ofs2 = ofs + get_kval(as, ir->op1);
++ ref = ir->op2;
++ }
++ if (!checki12(ofs2)) {
++ /* NYI: Fuse ADD with constant. */
++ Reg right, left = ra_alloc2(as, ir, allow);
++ right = (left >> 8); left &= 255;
++ emit_dji(as, loongi, rd, RID_TMP, ofs&0xfff);
++ emit_djk(as, LOONGI_ADD_D, RID_TMP, left, right);
++ return;
++ }
++ ofs = ofs2;
++ }
++ }
++ base = ra_alloc1(as, ref, allow);
++ emit_dji(as, loongi, rd, base, ofs&0xfff);
++}
++
++/* Fuse FP multiply-add/sub. */
++
++static int asm_fusemadd(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongir)
++{
++ IRRef lref = ir->op1, rref = ir->op2;
++ IRIns *irm;
++ if (lref != rref &&
++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
++ ra_noreg(irm->r)) ||
++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
++ (rref = lref, loongi = loongir, ra_noreg(irm->r))))) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
++ Reg left = ra_alloc2(as, irm, rset_exclude(rset_exclude(RSET_FPR, dest), add));
++ Reg right = (left >> 8); left &= 255;
++ emit_djka(as, loongi, (dest & 0x1f), (left & 0x1f), (right & 0x1f), (add & 0x1f));
++ return 1;
++ }
++ return 0;
++}
++/* -- Calls --------------------------------------------------------------- */
++
++/* Generate a call to a C function. */
++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
++{
++ uint32_t n, nargs = CCI_XNARGS(ci);
++ int32_t ofs = 0;
++ Reg gpr, fpr = REGARG_FIRSTFPR;
++ if ((void *)ci->func)
++ emit_call(as, (void *)ci->func);
++ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
++ as->cost[gpr] = REGCOST(~0u, ASMREF_L);
++ gpr = REGARG_FIRSTGPR;
++ for (n = 0; n < nargs; n++) { /* Setup args. */
++ IRRef ref = args[n];
++ if (ref) {
++ IRIns *ir = IR(ref);
++ if (irt_isfp(ir->t) && (n == 0 || !(ci->flags & CCI_VARARG))) {
++ if (fpr <= REGARG_LASTFPR) {
++ lj_assertA(rset_test(as->freeset, fpr),
++ "reg %d not free", fpr); /* Must have been evicted. */
++ ra_leftov(as, fpr, ref);
++ fpr++;
++ } else if (gpr <= REGARG_LASTGPR) {
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
++ ra_leftov(as, gpr, ref);
++ gpr++;
++ } else {
++ Reg r = ra_alloc1(as, ref, RSET_FPR);
++ emit_spstore(as, ir, r, ofs);
++ ofs += 8;
++ }
++ } else {
++ if (gpr <= REGARG_LASTGPR) {
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
++ ra_leftov(as, gpr, ref);
++ gpr++;
++ } else {
++ Reg r = ra_alloc1(as, ref, RSET_GPR);
++ emit_spstore(as, ir, r, ofs);
++ ofs += 8;
++ }
++ }
++ }
++ }
++}
++
++/* Setup result reg/sp for call. Evict scratch regs. */
++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
++{
++ RegSet drop = RSET_SCRATCH;
++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
++ if (ra_hasreg(ir->r))
++ rset_clear(drop, ir->r); /* Dest reg handled below. */
++ if (hiop && ra_hasreg((ir+1)->r))
++ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
++ ra_evictset(as, drop); /* Evictions must be performed first. */
++ if (ra_used(ir)) {
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
++ if (irt_isfp(ir->t)) {
++ if ((ci->flags & CCI_CASTU64)) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ emit_dj(as, irt_isnum(ir->t) ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W,
++ dest, RID_RET);
++ } else {
++ ra_destreg(as, ir, RID_FPRET);
++ }
++ } else if (hiop) {
++ ra_destpair(as, ir);
++ } else {
++ ra_destreg(as, ir, RID_RET);
++ }
++ }
++}
++
++static void asm_callx(ASMState *as, IRIns *ir)
++{
++ IRRef args[CCI_NARGS_MAX*2];
++ CCallInfo ci;
++ IRRef func;
++ IRIns *irf;
++ ci.flags = asm_callx_flags(as, ir);
++ asm_collectargs(as, ir, &ci, args);
++ asm_setupresult(as, ir, &ci);
++ func = ir->op2; irf = IR(func);
++ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
++ if (irref_isk(func)) { /* Call to constant address. */
++ ci.func = (ASMFunction)(void *)get_kval(as, func);
++ } else { /* Need specific register for indirect calls. */
++ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R12, RID_MAX_GPR)-RSET_FIXED);
++ *--as->mcp = LOONGI_JIRL | LOONGF_D(RID_RA) | LOONGF_J(freg);
++ ci.func = (ASMFunction)(void *)0;
++ }
++ asm_gencall(as, &ci, args);
++}
++
++static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
++{
++ /* The modified regs must match with the *.dasc implementation. */
++ RegSet drop = RID2RSET(RID_R12)|RID2RSET(RID_R13)|RID2RSET(RID_F0)|
++ RID2RSET(RID_F4)|RID2RSET(RID_F9)|RID2RSET(RID_F22)
++ |RID2RSET(RID_F23);
++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
++ ra_evictset(as, drop);
++ ra_destreg(as, ir, RID_FPRET);
++ emit_call(as, (void *)lj_ir_callinfo[id].func);
++ ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
++}
++
++/* -- Returns ------------------------------------------------------------- */
++
++/* Return to lower frame. Guard that it goes to the right spot. */
++static void asm_retf(ASMState *as, IRIns *ir)
++{
++ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
++ void *pc = ir_kptr(IR(ir->op2));
++ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
++ as->topslot -= (BCReg)delta;
++ if ((int32_t)as->topslot < 0) as->topslot = 0;
++ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
++ emit_setgl(as, base, jit_base);
++ emit_addptr(as, base, -8*delta);
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
++ asm_guard(as, LOONGI_BNE, tmp,
++ ra_allock(as, igcptr(pc), rset_exclude(rset_exclude(RSET_GPR, base), tmp)));
++ emit_dji(as, LOONGI_LD_D, tmp, base, -8&0xfff);
++}
++
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++ emit_djml(as, LOONGI_BSTRINS_D, RID_TMP, tmp,
++ lj_fls(SBUF_MASK_FLAG), 0);
++ emit_getgl(as, RID_TMP, cur_L);
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
++/* -- Type conversions ---------------------------------------------------- */
++
++static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ asm_guard21(as, LOONGI_BCEQZ, 0);
++ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, tmp, left);
++ emit_dj(as, LOONGI_FFINT_D_W, tmp, tmp);
++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp);
++ emit_dj(as, LOONGI_FTINT_W_D, tmp, left);
++}
++
++static void asm_tobit(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_FPR;
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, ir->op1, allow);
++ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
++ Reg tmp = ra_scratch(as, rset_clear(allow, right));
++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp);
++ emit_djk(as, LOONGI_FADD_D, tmp, left, right);
++}
++
++static void asm_conv(ASMState *as, IRIns *ir)
++{
++ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); // source type
++ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
++ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
++ IRRef lref = ir->op1;
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
++ /* Use GPR to pass floating-point arguments */
++ if (irt_isfp(ir->t) && ir->r >= RID_R4 && ir->r <= RID_R11) {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg ftmp = ra_scratch(as, RSET_FPR);
++ if (stfp) { /* FP to FP conversion. */
++ emit_dj(as, st == IRT_NUM ? LOONGI_MOVFR2GR_S : LOONGI_MOVFR2GR_D, dest, ftmp);
++ emit_dj(as, st == IRT_NUM ? LOONGI_FCVT_S_D : LOONGI_FCVT_D_S,
++ ftmp, ra_alloc1(as, lref, RSET_FPR));
++ } else if (st == IRT_U32) { /* U32 to FP conversion. */
++ /* y = (x ^ 0x80000000) + 2147483648.0 */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, ftmp));
++ if (irt_isfloat(ir->t)) {
++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, ftmp);
++ emit_dj(as, LOONGI_FCVT_S_D, ftmp, ftmp);
++ } else {
++ emit_dj(as, LOONGI_MOVFR2GR_D, dest, ftmp);
++ }
++ /* Must perform arithmetic with doubles to keep the precision. */
++ emit_djk(as, LOONGI_FADD_D, ftmp, ftmp, tmp);
++ emit_dj(as, LOONGI_FFINT_D_W, ftmp, ftmp);
++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
++ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
++ emit_dj(as, LOONGI_MOVGR2FR_W, ftmp, RID_TMP);
++ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, left);
++ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000);
++ } else if(st == IRT_U64) { /* U64 to FP conversion. */
++ /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, ftmp));
++ MCLabel l_end = emit_label(as);
++ if (irt_isfloat(ir->t)) {
++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, ftmp);
++ emit_djk(as, LOONGI_FADD_S, ftmp, ftmp, tmp);
++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), (void *)&as->J->k32[LJ_K32_2P63],
++ rset_exclude(RSET_GPR, left));
++ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end);
++ emit_dj(as, LOONGI_FFINT_S_L, ftmp, ftmp);
++ } else {
++ emit_dj(as, LOONGI_MOVFR2GR_D, dest, ftmp);
++ emit_djk(as, LOONGI_FADD_D, ftmp, ftmp, tmp);
++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), (void *)&as->J->k64[LJ_K64_2P63],
++ rset_exclude(RSET_GPR, left));
++ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end);
++ emit_dj(as, LOONGI_FFINT_D_L, ftmp, ftmp);
++ }
++ emit_dj(as, LOONGI_MOVGR2FR_D, ftmp, RID_TMP);
++ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, left, 62, 0);
++ } else { /* Integer to FP conversion. */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ LOONGIns loongi = irt_isfloat(ir->t) ?
++ (st64 ? LOONGI_FFINT_S_L : LOONGI_FFINT_S_W) :
++ (st64 ? LOONGI_FFINT_D_L : LOONGI_FFINT_D_W);
++ emit_dj(as, st64 ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dest, ftmp);
++ emit_dj(as, loongi, ftmp, ftmp);
++ emit_dj(as, st64 ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W, ftmp, left);
++ }
++ } else if (irt_isfp(ir->t)) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ if (stfp) { /* FP to FP conversion. */
++ emit_dj(as, st == IRT_NUM ? LOONGI_FCVT_S_D : LOONGI_FCVT_D_S,
++ dest, ra_alloc1(as, lref, RSET_FPR));
++ } else if (st == IRT_U32) { /* U32 to FP conversion. */
++ /* y = (x ^ 0x80000000) + 2147483648.0 */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
++ if (irt_isfloat(ir->t))
++ emit_dj(as, LOONGI_FCVT_S_D, dest, dest);
++ /* Must perform arithmetic with doubles to keep the precision. */
++ emit_djk(as, LOONGI_FADD_D, dest, dest, tmp);
++ emit_dj(as, LOONGI_FFINT_D_W, dest, dest);
++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
++ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
++ emit_dj(as, LOONGI_MOVGR2FR_W, dest, RID_TMP);
++ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, left);
++ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000);
++ } else if(st == IRT_U64) { /* U64 to FP conversion. */
++ /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
++ MCLabel l_end = emit_label(as);
++ if (irt_isfloat(ir->t)) {
++ emit_djk(as, LOONGI_FADD_S, dest, dest, tmp);
++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), (void *)&as->J->k32[LJ_K32_2P63],
++ rset_exclude(RSET_GPR, left));
++ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end);
++ emit_dj(as, LOONGI_FFINT_S_L, dest, dest);
++ } else {
++ emit_djk(as, LOONGI_FADD_D, dest, dest, tmp);
++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), (void *)&as->J->k64[LJ_K64_2P63],
++ rset_exclude(RSET_GPR, left));
++ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end);
++ emit_dj(as, LOONGI_FFINT_D_L, dest, dest);
++ }
++ emit_dj(as, LOONGI_MOVGR2FR_D, dest, RID_TMP);
++ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, left, 62, 0);
++ } else { /* Integer to FP conversion. */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ LOONGIns loongi = irt_isfloat(ir->t) ?
++ (st64 ? LOONGI_FFINT_S_L : LOONGI_FFINT_S_W) :
++ (st64 ? LOONGI_FFINT_D_L : LOONGI_FFINT_D_W);
++ emit_dj(as, loongi, dest, dest);
++ emit_dj(as, st64 ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W, dest, left);
++ }
++ } else if (stfp) { /* FP to integer conversion. */
++ if (irt_isguard(ir->t)) {
++ /* Checked conversions are only supported from number to int. */
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
++ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, lref, RSET_FPR);
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
++ if (irt_isu32(ir->t)) { /* FP to U32 conversion. */
++ /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
++ emit_djk(as, LOONGI_XOR, dest, dest, RID_TMP);
++ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000);
++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp);
++ emit_dj(as, st == IRT_FLOAT ? LOONGI_FTINTRM_W_S : LOONGI_FTINTRM_W_D,
++ tmp, tmp);
++ emit_djk(as, st == IRT_FLOAT ? LOONGI_FSUB_S : LOONGI_FSUB_D,
++ tmp, left, tmp);
++ if (st == IRT_FLOAT)
++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f),
++ (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
++ else
++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
++ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
++ } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
++ MCLabel l_end;
++ emit_dj(as, LOONGI_MOVFR2GR_D, dest, tmp);
++ l_end = emit_label(as);
++ /* For inputs >= 2^63 add -2^64 and convert again. */
++ if (st == IRT_NUM) {
++ emit_dj(as, LOONGI_FTINTRZ_L_D, tmp, tmp);
++ emit_djk(as, LOONGI_FADD_D, tmp, left, tmp);
++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
++ (void *)&as->J->k64[LJ_K64_M2P64],
++ rset_exclude(RSET_GPR, dest));
++ emit_branch21(as, LOONGI_BCNEZ, 0, l_end);
++ emit_dj(as, LOONGI_FTINTRZ_L_D, tmp, left);
++ emit_djk(as, LOONGI_FCMP_CLT_D, 0, left, tmp);
++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
++ (void *)&as->J->k64[LJ_K64_2P63],
++ rset_exclude(RSET_GPR, dest));
++ } else {
++ emit_dj(as, LOONGI_FTINTRZ_L_S, tmp, tmp);
++ emit_djk(as, LOONGI_FADD_S, tmp, left, tmp);
++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f),
++ (void *)&as->J->k32[LJ_K32_M2P64],
++ rset_exclude(RSET_GPR, dest));
++ emit_branch21(as, LOONGI_BCNEZ, 0, l_end);
++ emit_dj(as, LOONGI_FTINTRZ_L_S, tmp, left);
++ emit_djk(as, LOONGI_FCMP_CLT_S, 0, left, tmp);
++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f),
++ (void *)&as->J->k32[LJ_K32_2P63],
++ rset_exclude(RSET_GPR, dest));
++ }
++ } else {
++ LOONGIns loongi = irt_is64(ir->t) ?
++ (st == IRT_NUM ? LOONGI_FTINTRZ_L_D : LOONGI_FTINTRZ_L_S) :
++ (st == IRT_NUM ? LOONGI_FTINTRZ_W_D : LOONGI_FTINTRZ_W_S);
++ emit_dj(as, irt_is64(ir->t) ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dest, left);
++ emit_dj(as, loongi, left, left);
++ }
++ }
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
++ if ((ir->op2 & IRCONV_SEXT)) { // sign-extend
++ emit_dj(as, st == IRT_I8 ? LOONGI_EXT_W_B : LOONGI_EXT_W_H, dest, left);
++ } else { // zero-extend
++ int msbd = st == IRT_U8 ? 7 : 15;
++ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, msbd, 0);
++ }
++ } else { /* 32/64 bit integer conversions. */
++ if (irt_is64(ir->t)) {
++ if (st64) {
++ /* 64/64 bit no-op (cast)*/
++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
++ } else {
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
++ emit_dju(as, LOONGI_SLLI_W, dest, left, 0);
++ } else { /* 32 to 64 bit zero extension. */
++ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, 31, 0);
++ }
++ }
++ } else {
++ if (st64 && !(ir->op2 & IRCONV_NONE)) {
++ /* This is either a 32 bit reg/reg mov which zeroes the hiword
++ ** or a load of the loword from a 64 bit address.
++ */
++ Reg left = ra_alloc1(as, lref, RSET_GPR);
++ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, 31, 0);
++ } else { /* 32/32 bit no-op (cast). */
++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
++ }
++ }
++ }
++ }
++}
++
++static void asm_strto(ASMState *as, IRIns *ir)
++{
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
++ IRRef args[2];
++ int32_t ofs = SPOFS_TMP;
++ RegSet drop = RSET_SCRATCH;
++ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
++ ra_evictset(as, drop);
++ if (ir->s) ofs = sps_scale(ir->s);
++ asm_guard(as, LOONGI_BEQ, RID_RET, RID_ZERO); /* Test return status. */
++ args[0] = ir->op1; /* GCstr *str */
++ args[1] = ASMREF_TMP1; /* TValue *n */
++ asm_gencall(as, ci, args);
++ /* Store the result to the spill slot or temp slots. */
++ Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
++ emit_addk(as, tmp, RID_SP, ofs, RSET_GPR);
++}
++
++/* -- Memory references --------------------------------------------------- */
++
++/* Store tagged value for ref at base+ofs. */
++static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
++{
++ RegSet allow = rset_exclude(RSET_GPR, base);
++ IRIns *ir = IR(ref);
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
++ "store of IR type %d", irt_type(ir->t));
++ if (irref_isk(ref)) {
++ TValue k;
++ lj_ir_kvalue(as->J->L, &k, ir);
++ Reg ku64 = ra_allock(as, (int64_t)k.u64, allow);
++ rset_clear(allow, ku64);
++ if (checki12(ofs)) {
++ emit_dji(as, LOONGI_ST_D, ku64, base, ofs&0xfff);
++ } else {
++ emit_djk(as, LOONGI_STX_D, ku64, base, ra_allock(as, ofs, allow));
++ }
++ } else {
++ Reg src = ra_alloc1(as, ref, allow);
++ rset_clear(allow, src);
++ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
++ emit_dji(as, LOONGI_ST_D, RID_TMP, base, ofs&0xfff);
++ if (irt_isinteger(ir->t)) {
++ emit_djk(as, LOONGI_ADD_D, RID_TMP, RID_TMP, type);
++ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, src, 31, 0);
++ } else {
++ emit_djk(as, LOONGI_ADD_D, RID_TMP, src, type);
++ }
++ }
++}
++
++/* Get pointer to TValue. */
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new
++{
++ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
++ RegSet allow = RSET_GPR;
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if ((mode & IRTMPREF_OUT1)) {
++ Reg src = ra_alloc1(as, ref, RSET_FPR);
++ emit_addk(as, dest, RID_JGL, tmpofs, allow);
++ emit_lso(as, LOONGI_ST_D, src, RID_JGL, tmpofs, allow);
++ } else if (irref_isk(ref)) {
++ /* Use the number constant itself as a TValue. */
++ ra_allockreg(as, igcptr(ir_knum(ir)), dest);
++ } else {
++ emit_dji(as, LOONGI_ADDI_D, dest, RID_SP, ra_spill(as, ir)&0xfff);
++ }
++ } else {
++ /* Otherwise use g->tmptv to hold the TValue. */
++ asm_tvstore64(as, dest, 0, ref);
++ emit_addk(as, dest, RID_JGL, tmpofs, RSET_GPR);
++ }
++ } else {
++ emit_addk(as, dest, RID_JGL, tmpofs, RSET_GPR);
++ }
++}
++
++static void asm_aref(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg idx, base;
++ if (irref_isk(ir->op2)) {
++ IRRef tab = IR(ir->op1)->op1;
++ int32_t ofs = asm_fuseabase(as, tab);
++ IRRef refa = ofs ? tab : ir->op1;
++ ofs += 8*IR(ir->op2)->i;
++ if (checki12(ofs)) {
++ base = ra_alloc1(as, refa, RSET_GPR);
++ emit_dji(as, LOONGI_ADDI_D, dest, base, ofs&0xfff);
++ return;
++ }
++ }
++ base = ra_alloc1(as, ir->op1, RSET_GPR);
++ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
++ emit_djk(as, LOONGI_ADD_D, dest, RID_TMP, base);
++ emit_dju(as, LOONGI_SLLI_D, RID_TMP, idx, 3);
++}
++
++/* Inlined hash lookup. Specialized for key type and for const keys.
++** The equivalent C code is:
++** Node *n = hashkey(t, key);
++** do {
++** if (lj_obj_equal(&n->key, key)) return &n->val;
++** } while ((n = nextnode(n)));
++** return niltv(L);
++*/
++static void asm_href(ASMState *as, IRIns *ir, IROp merge)
++{
++ RegSet allow = RSET_GPR;
++ int destused = ra_used(ir);
++ Reg dest = ra_dest(as, ir, allow);
++ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
++ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2;
++ Reg cmp64 = RID_NONE;
++ IRRef refkey = ir->op2;
++ IRIns *irkey = IR(refkey);
++ int isk = irref_isk(refkey);
++ IRType1 kt = irkey->t;
++ uint32_t khash;
++ MCLabel l_end, l_loop, l_next;
++ rset_clear(allow, tab);
++ tmp1 = ra_scratch(as, allow);
++ rset_clear(allow, tmp1);
++ tmp2 = ra_scratch(as, allow);
++ rset_clear(allow, tmp2);
++
++ if (irt_isnum(kt)) {
++ key = ra_alloc1(as, refkey, RSET_FPR);
++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
++ } else {
++ /* Allocate cmp64 register used for 64-bit comparisons */
++ if (!isk && irt_isaddr(kt)) {
++ cmp64 = tmp2;
++ } else {
++ int64_t k;
++ if (isk && irt_isaddr(kt)) {
++ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
++ } else {
++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
++ k = ~((int64_t)~irt_toitype(kt) << 47);
++ }
++ cmp64 = ra_allock(as, k, allow);
++ rset_clear(allow, cmp64);
++ }
++ if (!irt_ispri(kt)) {
++ key = ra_alloc1(as, refkey, allow);
++ rset_clear(allow, key);
++ }
++ }
++
++ /* Key not found in chain: jump to exit (if merged) or load niltv. */
++ l_end = emit_label(as);
++ as->invmcp = NULL;
++ if (merge == IR_NE)
++ asm_guard(as, LOONGI_BEQ, RID_ZERO, RID_ZERO);
++ else if (destused)
++ emit_loada(as, dest, niltvg(J2G(as->J)));
++
++ /* Follow hash chain until the end. */
++ l_loop = --as->mcp;
++ emit_move(as, dest, tmp1);
++ emit_dji(as, LOONGI_LD_D, tmp1, dest, (int32_t)offsetof(Node, next)&0xfff);
++ l_next = emit_label(as);
++
++ /* Type and value comparison. */
++ if (merge == IR_EQ) { /* Must match asm_guard(). */
++ l_end = asm_exitstub_addr(as);
++ }
++ if (irt_isnum(kt)) {
++ emit_branch21(as, LOONGI_BCNEZ, 0, l_end);
++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
++ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, tmpnum, key);
++ emit_branch(as, LOONGI_BEQ, tmp1, RID_ZERO, l_next);
++ emit_dju(as, LOONGI_SLTUI, tmp1, tmp1, ((int32_t)LJ_TISNUM)&0xfff);
++ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 47);
++ emit_dj(as, LOONGI_MOVGR2FR_D, tmpnum, tmp1);
++ } else {
++ emit_branch(as, LOONGI_BEQ, tmp1, cmp64, l_end);
++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
++ }
++ emit_dji(as, LOONGI_LD_D, tmp1, dest, (int32_t)offsetof(Node, key.u64)&0xfff);
++ *l_loop = LOONGI_BNE | LOONGF_J(tmp1) | LOONGF_D(RID_ZERO) | LOONGF_I(((as->mcp-l_loop) & 0xffffu));
++ if (!isk && irt_isaddr(kt)) {
++ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
++ emit_djk(as, LOONGI_ADD_D, tmp2, key, type);
++ rset_clear(allow, type);
++ }
++
++ /* Load main position relative to tab->node into dest. */
++ khash = isk ? ir_khash(as, irkey) : 1;
++ if (khash == 0) {
++ emit_dji(as, LOONGI_LD_D, dest, tab, (int32_t)offsetof(GCtab, node)&0xfff);
++ } else {
++ Reg tmphash = tmp1;
++ if (isk)
++ tmphash = ra_allock(as, khash, allow);
++ /* node = tab->node + (idx*32-idx*8) */
++ emit_djk(as, LOONGI_ADD_D, dest, dest, tmp1);
++ lj_assertA(sizeof(Node) == 24, "bad Node size");
++ emit_djk(as, LOONGI_SUB_W, tmp1, tmp2, tmp1);
++ emit_dju(as, LOONGI_SLLI_W, tmp1, tmp1, 3);
++ emit_dju(as, LOONGI_SLLI_W, tmp2, tmp1, 5);
++ emit_djk(as, LOONGI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask
++ emit_dji(as, LOONGI_LD_D, dest, tab, ((int32_t)offsetof(GCtab, node))&0xfff);
++ emit_dji(as, LOONGI_LD_W, tmp2, tab, ((int32_t)offsetof(GCtab, hmask))&0xfff);
++ if (isk) {
++ /* Nothing to do. */
++ } else if (irt_isstr(kt)) {
++ emit_dji(as, LOONGI_LD_W, tmp1, key, ((int32_t)offsetof(GCstr, sid))&0xfff);
++ } else { /* Must match with hash*() in lj_tab.c. */
++ emit_djk(as, LOONGI_SUB_W, tmp1, tmp1, tmp2);
++ emit_dju(as, LOONGI_ROTRI_W, tmp2, tmp2, (-HASH_ROT3)&0x1f);
++ emit_djk(as, LOONGI_XOR, tmp1, tmp2, tmp1);
++ emit_dju(as, LOONGI_ROTRI_W, tmp1, tmp1, (-HASH_ROT2-HASH_ROT1)&0x1f);
++ emit_djk(as, LOONGI_SUB_W, tmp2, tmp2, dest);
++ emit_djk(as, LOONGI_XOR, tmp2, tmp2, tmp1);
++ emit_dju(as, LOONGI_ROTRI_W, dest, tmp1, (-HASH_ROT1)&0x1f);
++ if (irt_isnum(kt)) {
++ emit_dju(as, LOONGI_SLLI_W, tmp1, tmp1, 1);
++ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 32); // hi
++ emit_dju(as, LOONGI_SLLI_W, tmp2, tmp1, 0); // lo
++ emit_dj(as, LOONGI_MOVFR2GR_D, tmp1, key);
++ } else {
++ checkmclim(as);
++ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 32); // hi
++ emit_dju(as, LOONGI_SLLI_W, tmp2, key, 0); // lo
++ emit_djk(as, LOONGI_ADD_D, tmp1, key, type);
++ }
++ }
++ }
++}
++
++static void asm_hrefk(ASMState *as, IRIns *ir)
++{
++ IRIns *kslot = IR(ir->op2);
++ IRIns *irkey = IR(kslot->op1);
++ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
++ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
++ Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
++ RegSet allow = rset_exclude(RSET_GPR, node);
++ Reg idx = node;
++ Reg key = ra_scratch(as, allow);
++ int64_t k;
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
++ if (ofs > 32736) {
++ idx = dest;
++ rset_clear(allow, dest);
++ kofs = (int32_t)offsetof(Node, key);
++ } else if (ra_hasreg(dest)) {
++ emit_addk(as, dest, node, ofs, allow);
++ }
++ if (irt_ispri(irkey->t)) {
++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
++ k = ~((int64_t)~irt_toitype(irkey->t) << 47);
++ } else if (irt_isnum(irkey->t)) {
++ k = (int64_t)ir_knum(irkey)->u64;
++ } else {
++ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
++ }
++ asm_guard(as, LOONGI_BNE, key, ra_allock(as, k, allow));
++ emit_lso(as, LOONGI_LD_D, key, idx, kofs, allow);
++ if (ofs > 32736)
++ emit_djk(as, LOONGI_ADD_D, dest, node, ra_allock(as, ofs, allow));
++}
++
++static void asm_uref(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ if (irref_isk(ir->op1)) {
++ GCfunc *fn = ir_kfunc(IR(ir->op1));
++ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
++ emit_lsptr(as, LOONGI_LD_D, dest, v, RSET_GPR);
++ } else {
++ Reg uv = ra_scratch(as, RSET_GPR);
++ Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
++ if (ir->o == IR_UREFC) {
++ Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), uv));
++ asm_guard(as, LOONGI_BEQ, tmp, RID_ZERO);
++ emit_dji(as, LOONGI_ADDI_D, dest, uv, ((int32_t)offsetof(GCupval, tv))&0xfff);
++ emit_dji(as, LOONGI_LD_BU, tmp, uv, ((int32_t)offsetof(GCupval, closed))&0xfff);
++ } else {
++ emit_dji(as, LOONGI_LD_D, dest, uv, ((int32_t)offsetof(GCupval, v))&0xfff);
++ }
++ emit_lso(as, LOONGI_LD_D, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8), RSET_GPR);
++ }
++}
++
++static void asm_fref(ASMState *as, IRIns *ir)
++{
++ UNUSED(as); UNUSED(ir);
++ lj_assertA(!ra_used(ir), "unfused FREF");
++}
++
++static void asm_strref(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_GPR;
++ Reg dest = ra_dest(as, ir, allow);
++ Reg base = ra_alloc1(as, ir->op1, allow);
++ IRIns *irr = IR(ir->op2);
++ int32_t ofs = sizeof(GCstr);
++ rset_clear(allow, base);
++ if (irref_isk(ir->op2) && checki12(ofs + irr->i)) {
++ emit_dji(as, LOONGI_ADDI_D, dest, base, (ofs + irr->i)&0xfff);
++ } else {
++ emit_dji(as, LOONGI_ADDI_D, dest, dest, ofs&0xfff);
++ emit_djk(as, LOONGI_ADD_D, dest, base, ra_alloc1(as, ir->op2, allow));
++ }
++}
++
++/* -- Loads and stores ---------------------------------------------------- */
++
++static LOONGIns asm_fxloadins(ASMState *as, IRIns *ir)
++{
++ UNUSED(as);
++ switch (irt_type(ir->t)) {
++ case IRT_I8:
++ return LOONGI_LD_B;
++ case IRT_U8:
++ return LOONGI_LD_BU;
++ case IRT_I16:
++ return LOONGI_LD_H;
++ case IRT_U16:
++ return LOONGI_LD_HU;
++ case IRT_NUM:
++ lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
++ return LOONGI_FLD_D;
++ /* fallthrough */
++ case IRT_FLOAT:
++ return LOONGI_FLD_S;
++ /* fallthrough */
++ default:
++ return irt_is64(ir->t) ? LOONGI_LD_D : LOONGI_LD_W;
++ }
++}
++
++static LOONGIns asm_fxstoreins(ASMState *as, IRIns *ir)
++{
++ UNUSED(as);
++ switch (irt_type(ir->t)) {
++ case IRT_I8: case IRT_U8: return LOONGI_ST_B;
++ case IRT_I16: case IRT_U16: return LOONGI_ST_H;
++ case IRT_NUM:
++ lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
++ if (!LJ_SOFTFP) return LOONGI_FST_D;
++ /* fallthrough */
++ case IRT_FLOAT: return LOONGI_FST_S;
++ /* fallthrough */
++ default: return (LJ_64 && irt_is64(ir->t)) ? LOONGI_ST_D : LOONGI_ST_W;
++ }
++}
++
++static void asm_fload(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_GPR;
++ Reg idx, dest = ra_dest(as, ir, allow);
++ rset_clear(allow, dest);
++ LOONGIns loongi = asm_fxloadins(as, ir);
++ int32_t ofs;
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
++ idx = ra_allock(as, (int64_t)J2GG(as->J), allow);
++ ofs = (int32_t)(ir->op2<<2);
++ } else {
++ idx = ra_alloc1(as, ir->op1, allow);
++ if (ir->op2 == IRFL_TAB_ARRAY) {
++ ofs = asm_fuseabase(as, ir->op1);
++ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
++ emit_dji(as, LOONGI_ADDI_D, dest, idx, ofs);
++ return;
++ }
++ }
++ ofs = field_ofs[ir->op2];
++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
++ }
++ rset_clear(allow, idx);
++ emit_lso(as, loongi, dest, idx, ofs, allow);
++}
++
++static void asm_fstore(ASMState *as, IRIns *ir)
++{
++ if (ir->r == RID_SINK)
++ return;
++ Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
++ IRIns *irf = IR(ir->op1);
++ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
++ int32_t ofs = field_ofs[irf->op2];
++ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
++ emit_dji(as, asm_fxstoreins(as, ir), src, idx, ofs&0xfff);
++}
++
++static void asm_xload(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
++ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
++ "unaligned XLOAD");
++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
++}
++
++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
++{
++ if (ir->r == RID_SINK)
++ return;
++ Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
++ rset_exclude(RSET_GPR, src), ofs);
++}
++
++#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
++
++static void asm_ahuvload(ASMState *as, IRIns *ir)
++{
++ Reg dest = RID_NONE, type, idx;
++ RegSet allow = RSET_GPR;
++ int32_t ofs = 0;
++ IRType1 t = ir->t;
++
++ type = ra_scratch(as, allow);
++ rset_clear(allow, type);
++
++ if (ra_used(ir)) {
++ lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad load type %d", irt_type(ir->t));
++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
++ rset_clear(allow, dest);
++ if (irt_isaddr(t))
++ emit_djml(as, LOONGI_BSTRPICK_D, dest, dest, 46, 0);
++ else if (irt_isint(t))
++ emit_dju(as, LOONGI_SLLI_W, dest, dest, 0);
++ }
++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
++ rset_clear(allow, idx);
++ if (irt_isnum(t)) {
++ Reg tmp2 = ra_scratch(as, allow);
++ asm_guard(as, LOONGI_BEQ, tmp2, RID_ZERO);
++ emit_dju(as, LOONGI_SLTUI, tmp2, type, ((int32_t)LJ_TISNUM)&0xfff);
++ } else {
++ asm_guard(as, LOONGI_BNE, type,
++ ra_allock(as, (int32_t)irt_toitype(t), allow));
++ }
++ if (ra_hasreg(dest)) {
++ if (irt_isnum(t)) {
++ emit_lso(as, LOONGI_FLD_D, dest, idx, ofs, allow);
++ dest = type;
++ }
++ } else {
++ dest = type;
++ }
++ emit_dju(as, LOONGI_SRAI_D, type, dest, 47);
++ emit_lso(as, LOONGI_LD_D, dest, idx, ofs, allow);
++}
++
++static void asm_ahustore(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_GPR;
++ Reg idx, src = RID_NONE, type = RID_NONE;
++ int32_t ofs = 0;
++ if (ir->r == RID_SINK)
++ return;
++ if (irt_isnum(ir->t)) {
++ src = ra_alloc1(as, ir->op2, RSET_FPR);
++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++ emit_lso(as, LOONGI_FST_D, src, idx, ofs, allow);
++ } else {
++ Reg tmp = RID_TMP;
++ if (irt_ispri(ir->t)) {
++ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
++ rset_clear(allow, tmp);
++ } else {
++ src = ra_alloc1(as, ir->op2, allow);
++ rset_clear(allow, src);
++ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
++ rset_clear(allow, type);
++ }
++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++ emit_lso(as, LOONGI_ST_D, tmp, idx, ofs, allow);
++ if (ra_hasreg(src)) {
++ if (irt_isinteger(ir->t)) {
++ emit_djk(as, LOONGI_ADD_D, tmp, tmp, type);
++ emit_djml(as, LOONGI_BSTRPICK_D, tmp, src, 31, 0);
++ } else {
++ emit_djk(as, LOONGI_ADD_D, tmp, src, type);
++ }
++ }
++ }
++}
++
++static void asm_sload(ASMState *as, IRIns *ir)
++{
++ Reg dest = RID_NONE, type = RID_NONE, base;
++ RegSet allow = RSET_GPR;
++ IRType1 t = ir->t;
++ int32_t ofs = 8*((int32_t)ir->op1-2);
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
++ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
++ dest = ra_scratch(as, RSET_FPR);
++ asm_tointg(as, ir, dest);
++ t.irt = IRT_NUM; /* Continue with a regular number type check. */
++ } else if (ra_used(ir)) {
++ lj_assertA((irt_isnum(ir->t)) ||
++ irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad SLOAD type %d", irt_type(ir->t));
++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
++ rset_clear(allow, dest);
++ base = ra_alloc1(as, REF_BASE, allow);
++ rset_clear(allow, base);
++ if (ir->op2 & IRSLOAD_CONVERT) {
++ if (irt_isint(t)) {
++ Reg tmp = ra_scratch(as, RSET_FPR);
++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp);
++ emit_dj(as, LOONGI_FTINTRZ_W_D, tmp, tmp);
++ dest = tmp;
++ t.irt = IRT_NUM; /* Check for original type. */
++ } else {
++ Reg tmp = ra_scratch(as, RSET_GPR);
++ emit_dj(as, LOONGI_FFINT_D_W, dest, dest);
++ emit_dj(as, LOONGI_MOVGR2FR_W, dest, tmp);
++ dest = tmp;
++ t.irt = IRT_INT; /* Check for original type. */
++ }
++ } else if (irt_isaddr(t)) {
++ /* Clear type from pointers. */
++ emit_djml(as, LOONGI_BSTRPICK_D, dest, dest, 46, 0);
++ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
++ /* Sign-extend integers. */
++ emit_dju(as, LOONGI_SLLI_W, dest, dest, 0);
++ }
++ goto dotypecheck;
++ }
++ base = ra_alloc1(as, REF_BASE, allow);
++ rset_clear(allow, base);
++dotypecheck:
++ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
++ if (dest < RID_MAX_GPR) {
++ type = dest;
++ } else {
++ type = ra_scratch(as, allow);
++ }
++ rset_clear(allow, type);
++ Reg tmp1 = ra_scratch(as, allow);
++ if (irt_ispri(t)) {
++ asm_guard(as, LOONGI_BNE, type,
++ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
++ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) {
++ asm_guard(as, LOONGI_BNE, tmp1,
++ ra_allock(as, (int32_t)LJ_KEYINDEX, allow));
++ emit_dju(as, LOONGI_SRAI_D, tmp1, type, 32);
++ } else {
++ if (irt_isnum(t)) {
++ asm_guard(as, LOONGI_BEQ, tmp1, RID_ZERO);
++ emit_dji(as, LOONGI_SLTUI, tmp1, tmp1, LJ_TISNUM&0xfff);
++ if (ra_hasreg(dest)) {
++ emit_lso(as, LOONGI_FLD_D, dest, base, ofs, allow);
++ }
++ } else {
++ asm_guard(as, LOONGI_BNE, tmp1,
++ ra_allock(as, (int32_t)irt_toitype(t), allow));
++ }
++ emit_dju(as, LOONGI_SRAI_D, tmp1, type, 47);
++ }
++ emit_lso(as, LOONGI_LD_D, type, base, ofs, allow);
++ } else if (ra_hasreg(dest)) {
++ if (irt_isnum(t)) {
++ emit_lso(as, LOONGI_FLD_D, dest, base, ofs, allow);
++ } else {
++ emit_lso(as, irt_isint(t) ? LOONGI_LD_W : LOONGI_LD_D, dest, base, ofs, allow);
++ }
++ }
++}
++
++/* -- Allocations --------------------------------------------------------- */
++
++#if LJ_HASFFI
++static void asm_cnew(ASMState *as, IRIns *ir)
++{
++ CTState *cts = ctype_ctsG(J2G(as->J));
++ CTypeID id = (CTypeID)IR(ir->op1)->i;
++ CTSize sz;
++ CTInfo info = lj_ctype_info(cts, id, &sz);
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
++ IRRef args[4];
++ RegSet drop = RSET_SCRATCH;
++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
++ "bad CNEW/CNEWI operands");
++
++ as->gcsteps++;
++ if (ra_hasreg(ir->r))
++ rset_clear(drop, ir->r); /* Dest reg handled below. */
++ ra_evictset(as, drop);
++ if (ra_used(ir))
++ ra_destreg(as, ir, RID_RET); /* GCcdata * */
++
++ /* Initialize immutable cdata object. */
++ if (ir->o == IR_CNEWI) {
++ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
++ emit_dji(as, sz == 8 ? LOONGI_ST_D : LOONGI_ST_W, ra_alloc1(as, ir->op2, allow),
++ RID_RET, (sizeof(GCcdata))&0xfff);
++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
++ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
++ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
++ args[0] = ASMREF_L; /* lua_State *L */
++ args[1] = ir->op1; /* CTypeID id */
++ args[2] = ir->op2; /* CTSize sz */
++ args[3] = ASMREF_TMP1; /* CTSize align */
++ asm_gencall(as, ci, args);
++ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
++ return;
++ }
++
++ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
++ emit_dji(as, LOONGI_ST_B, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))&0xfff);
++ emit_dji(as, LOONGI_ST_H, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))&0xfff);
++ emit_dji(as, LOONGI_ADDI_D, RID_RET+1, RID_ZERO, ~LJ_TCDATA&0xfff);
++ emit_dj32i(as, RID_TMP, RID_ZERO, id);
++ args[0] = ASMREF_L; /* lua_State *L */
++ args[1] = ASMREF_TMP1; /* MSize size */
++ asm_gencall(as, ci, args);
++ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1));
++}
++#endif
++
++/* -- Write barriers ------------------------------------------------------ */
++
++static void asm_tbar(ASMState *as, IRIns *ir)
++{
++ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
++ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
++ Reg link = RID_TMP;
++ MCLabel l_end = emit_label(as);
++ emit_dji(as, LOONGI_ST_D, link, tab, ((int32_t)offsetof(GCtab, gclist))&0xfff);
++ emit_dji(as, LOONGI_ST_B, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff);
++ emit_setgl(as, tab, gc.grayagain); // make tab gray again
++ emit_getgl(as, link, gc.grayagain);
++ emit_branch(as, LOONGI_BEQ, RID_TMP, RID_ZERO, l_end); // black: not jump
++ emit_djk(as, LOONGI_XOR, mark, mark, RID_TMP); // mark=0: gray
++ emit_dju(as, LOONGI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
++ emit_dji(as, LOONGI_LD_BU, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff);
++}
++
++static void asm_obar(ASMState *as, IRIns *ir)
++{
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
++ IRRef args[2];
++ MCLabel l_end;
++ Reg obj, val, tmp;
++ /* No need for other object barriers (yet). */
++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue
++ ra_evictset(as, RSET_SCRATCH);
++ l_end = emit_label(as);
++ args[0] = ASMREF_TMP1; /* global_State *g */
++ args[1] = ir->op1; /* TValue *tv */
++ asm_gencall(as, ci, args);
++ obj = IR(ir->op1)->r;
++ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
++ emit_branch(as, LOONGI_BEQ, tmp, RID_ZERO, l_end);
++ emit_addk(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768, RSET_GPR);
++ emit_branch(as, LOONGI_BEQ, RID_TMP, RID_ZERO, l_end); // black: jump
++ emit_dju(as, LOONGI_ANDI, tmp, tmp, LJ_GC_BLACK);
++ emit_dju(as, LOONGI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
++ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
++ emit_dji(as, LOONGI_LD_BU, tmp, obj,
++ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))&0xfff);
++ emit_dji(as, LOONGI_LD_BU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))&0xfff);
++}
++
++/* -- Arithmetic and logic operations ------------------------------------- */
++
++static void asm_fparith(ASMState *as, IRIns *ir, LOONGIns loongi)
++{
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
++ right = (left >> 8); left &= 255;
++ emit_djk(as, loongi, dest, left, right);
++}
++
++static void asm_fpunary(ASMState *as, IRIns *ir, LOONGIns loongi)
++{
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
++ emit_dj(as, loongi, dest, left);
++}
++
++static void asm_fpmath(ASMState *as, IRIns *ir)
++{
++ IRFPMathOp fpm = (IRFPMathOp)ir->op2;
++ if (fpm <= IRFPM_TRUNC)
++ asm_callround(as, ir, IRCALL_lj_vm_floor + fpm);
++ else if (fpm == IRFPM_SQRT)
++ asm_fpunary(as, ir, LOONGI_FSQRT_D);
++ else
++ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
++}
++
++static void asm_add(ASMState *as, IRIns *ir)
++{
++ IRType1 t = ir->t;
++ if (irt_isnum(t)) {
++ if (!asm_fusemadd(as, ir, LOONGI_FMADD_D, LOONGI_FMADD_D))
++ asm_fparith(as, ir, LOONGI_FADD_D);
++ return;
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ if (irref_isk(ir->op2)) {
++ intptr_t k = get_kval(as, ir->op2);
++ if (LOONGF_S_OK(k, 12)) { // si12
++ if (irt_is64(t)) {
++ emit_dji(as, LOONGI_ADDI_D, dest, left, k&0xfff);
++ } else {
++ emit_dji(as, LOONGI_ADDI_W, dest, left, k&0xfff);
++ }
++ return;
++ }
++ }
++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ emit_djk(as, irt_is64(t) ? LOONGI_ADD_D : LOONGI_ADD_W, dest,
++ left, right);
++ }
++}
++
++static void asm_sub(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t)) {
++ if (!asm_fusemadd(as, ir, LOONGI_FMSUB_D, LOONGI_FNMSUB_D))
++ asm_fparith(as, ir, LOONGI_FSUB_D);
++ return;
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ emit_djk(as, irt_is64(ir->t) ? LOONGI_SUB_D : LOONGI_SUB_W, dest,
++ left, right);
++ }
++}
++
++static void asm_mul(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t)) {
++ asm_fparith(as, ir, LOONGI_FMUL_D);
++ } else
++ {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ if (irt_is64(ir->t)) {
++ emit_djk(as, LOONGI_MUL_D, dest, left, right);
++ } else {
++ emit_djk(as, LOONGI_MUL_W, dest, left, right);
++ }
++ }
++}
++
++static void asm_fpdiv(ASMState *as, IRIns *ir)
++{
++ asm_fparith(as, ir, LOONGI_FDIV_D);
++}
++
++static void asm_neg(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t)) {
++ asm_fpunary(as, ir, LOONGI_FNEG_D);
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ emit_djk(as, irt_is64(ir->t) ? LOONGI_SUB_D : LOONGI_SUB_W, dest,
++ RID_ZERO, left);
++ }
++}
++
++#define asm_abs(as, ir) asm_fpunary(as, ir, LOONGI_FABS_D)
++
++static void asm_arithov(ASMState *as, IRIns *ir)
++{
++ RegSet allow = RSET_GPR;
++ Reg right, left, tmp, tmp2, dest = ra_dest(as, ir, allow);
++ rset_clear(allow, dest);
++ lj_assertA(!irt_is64(ir->t), "bad usage");
++ tmp2 = ra_scratch(as, allow);
++ rset_clear(allow, tmp2);
++ if (irref_isk(ir->op2)) {
++ int k = IR(ir->op2)->i;
++ if (ir->o == IR_SUBOV) k = -k;
++ if (LOONGF_S_OK(k, 12)) { /* (dest < left) == (k >= 0 ? 1 : 0) */
++ left = ra_alloc1(as, ir->op1, allow);
++ asm_guard(as, k >= 0 ? LOONGI_BNE : LOONGI_BEQ, tmp2, RID_ZERO);
++ emit_djk(as, LOONGI_SLT, tmp2, dest, dest == left ? tmp2 : left);
++ emit_dji(as, LOONGI_ADDI_D, dest, left, k&0xfff);
++ if (dest == left) emit_move(as, tmp2, left);
++ return;
++ }
++ }
++ left = ra_alloc2(as, ir, allow);
++ right = (left >> 8); left &= 255;
++ rset_clear(allow, right);
++ rset_clear(allow, left);
++ tmp = ra_scratch(as, allow);
++ asm_guard(as, LOONGI_BLT, tmp2, RID_ZERO);
++ emit_djk(as, LOONGI_AND, tmp2, RID_TMP, tmp);
++ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */
++ emit_djk(as, LOONGI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
++ } else { /* ((dest^left) & (dest^~right)) < 0 */
++ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, dest);
++ emit_djk(as, LOONGI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO);
++ }
++ emit_djk(as, LOONGI_XOR, tmp, dest, dest == left ? RID_TMP : left);
++ emit_djk(as, ir->o == IR_ADDOV ? LOONGI_ADD_W : LOONGI_SUB_W, dest, left, right);
++ if (dest == left || dest == right)
++ emit_move(as, RID_TMP, dest == left ? left : right);
++}
++
++#define asm_addov(as, ir) asm_arithov(as, ir)
++#define asm_subov(as, ir) asm_arithov(as, ir)
++
++static void asm_mulov(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg tmp, tmp2, right, left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
++ right), dest));
++ tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
++ right), dest), tmp));
++ asm_guard(as, LOONGI_BNE, tmp2, tmp);
++ emit_dju(as, LOONGI_SRAI_W, tmp2, dest, 31);
++ emit_djk(as, LOONGI_MUL_W, dest, left, right); // dest: [31:0]+signextend
++ emit_djk(as, LOONGI_MULH_W, tmp, left, right); // tmp: [63:32]
++}
++
++static void asm_bnot(ASMState *as, IRIns *ir)
++{
++ Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
++ IRIns *irl = IR(ir->op1);
++ if (mayfuse(as, ir->op1) && irl->o == IR_BOR) {
++ left = ra_alloc2(as, irl, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ } else {
++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ right = RID_ZERO;
++ }
++ emit_djk(as, LOONGI_NOR, dest, left, right);
++}
++
++static void asm_bswap(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++ if (irt_is64(ir->t)) {
++ emit_dj(as, LOONGI_REVH_D, dest, RID_TMP);
++ emit_dj(as, LOONGI_REVB_4H, RID_TMP, left);
++ } else {
++ emit_dju(as, LOONGI_ROTRI_W, dest, RID_TMP, 16);
++ emit_dj(as, LOONGI_REVB_2H, RID_TMP, left);
++ }
++}
++
++static void asm_bitop(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongik)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ if (irref_isk(ir->op2)) {
++ intptr_t k = get_kval(as, ir->op2);
++ if (checku12(k)) {
++ emit_dji(as, loongik, dest, left, k&0xfff);
++ return;
++ }
++ }
++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ emit_djk(as, loongi, dest, left, right);
++}
++
++#define asm_band(as, ir) asm_bitop(as, ir, LOONGI_AND, LOONGI_ANDI)
++#define asm_bor(as, ir) asm_bitop(as, ir, LOONGI_OR, LOONGI_ORI)
++#define asm_bxor(as, ir) asm_bitop(as, ir, LOONGI_XOR, LOONGI_XORI)
++
++static void asm_bitshift(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongik)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++ uint32_t shmask = irt_is64(ir->t) ? 63 : 31;
++ if (irref_isk(ir->op2)) { /* Constant shifts. */
++ uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmask);
++ emit_dju(as, loongik, dest, left, shift);
++ } else {
++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ emit_djk(as, loongi, dest, left, right); /* Shift amount is in rs. */
++ }
++}
++
++#define asm_bshl(as, ir) (irt_is64(ir->t) ? \
++ asm_bitshift(as, ir, LOONGI_SLL_D, LOONGI_SLLI_D) : \
++ asm_bitshift(as, ir, LOONGI_SLL_W, LOONGI_SLLI_W))
++#define asm_bshr(as, ir) (irt_is64(ir->t) ? \
++ asm_bitshift(as, ir, LOONGI_SRL_D, LOONGI_SRLI_D) : \
++ asm_bitshift(as, ir, LOONGI_SRL_W, LOONGI_SRLI_W))
++#define asm_bsar(as, ir) (irt_is64(ir->t) ? \
++ asm_bitshift(as, ir, LOONGI_SRA_D, LOONGI_SRAI_D) : \
++ asm_bitshift(as, ir, LOONGI_SRA_W, LOONGI_SRAI_W))
++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
++#define asm_bror(as, ir) (irt_is64(ir->t) ? \
++ asm_bitshift(as, ir, LOONGI_ROTR_D, LOONGI_ROTRI_D) : \
++ asm_bitshift(as, ir, LOONGI_ROTR_W, LOONGI_ROTRI_W))
++
++static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
++{
++ if (irt_isnum(ir->t)) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
++ right = (left >> 8); left &= 255;
++ emit_djk(as, ismax ? LOONGI_FMAX_D : LOONGI_FMIN_D, dest, left, right);
++ } else {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++ emit_djk(as, LOONGI_OR, dest, dest, RID_TMP);
++ if (dest != right) {
++ emit_djk(as, LOONGI_MASKEQZ, RID_TMP, right, RID_TMP);
++ emit_djk(as, LOONGI_MASKNEZ, dest, left, RID_TMP);
++ } else {
++ emit_djk(as, LOONGI_MASKNEZ, RID_TMP, left, RID_TMP);
++ emit_djk(as, LOONGI_MASKEQZ, dest, right, RID_TMP);
++ }
++ emit_djk(as, LOONGI_SLT, RID_TMP,
++ ismax ? left : right, ismax ? right : left);
++ }
++}
++
++#define asm_min(as, ir) asm_min_max(as, ir, 0)
++#define asm_max(as, ir) asm_min_max(as, ir, 1)
++
++/* -- Comparisons --------------------------------------------------------- */
++
++/* FP comparisons. */
++static void asm_fpcomp(ASMState *as, IRIns *ir)
++{
++ IROp op = ir->o;
++ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
++ right = (left >> 8); left &= 255;
++ asm_guard21(as, (op&1) ? LOONGI_BCNEZ : LOONGI_BCEQZ, 0);
++ switch (op) {
++ case IR_LT: case IR_UGE:
++ emit_djk(as, LOONGI_FCMP_CLT_D, 0, left, right);
++ break;
++ case IR_GE: case IR_ULT:
++ emit_djk(as, LOONGI_FCMP_CULT_D, 0, left, right);
++ break;
++ case IR_LE: case IR_UGT: case IR_ABC:
++ emit_djk(as, LOONGI_FCMP_CLE_D, 0, left, right);
++ break;
++ case IR_ULE: case IR_GT:
++ emit_djk(as, LOONGI_FCMP_CULE_D, 0, left, right);
++ break;
++ case IR_EQ: case IR_NE:
++ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, left, right);
++ break;
++ default:
++ break;
++ }
++}
++
++/* Integer comparisons. */
++static void asm_intcomp(ASMState *as, IRIns *ir)
++{
++ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
++ /* 00 01 10 11 100 101 110 111 */
++ IROp op = ir->o;
++ RegSet allow = RSET_GPR;
++ Reg tmp, right, left = ra_alloc1(as, ir->op1, allow);
++ rset_clear(allow, left);
++ if (op == IR_ABC) op = IR_UGT;
++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
++ switch (op) {
++ case IR_GT: asm_guard(as, LOONGI_BGE, RID_ZERO, left); break;
++ case IR_LE: asm_guard(as, LOONGI_BLT, RID_ZERO, left); break;
++ case IR_GE: asm_guard(as, LOONGI_BLT, left, RID_ZERO); break;
++ case IR_LT: asm_guard(as, LOONGI_BGE, left, RID_ZERO); break;
++ default: break;
++ }
++ return;
++ }
++ tmp = ra_scratch(as, allow);
++ rset_clear(allow, tmp);
++ if (irref_isk(ir->op2)) {
++ intptr_t k = get_kval(as, ir->op2);
++ if ((op&2)) k++;
++ if (checki12(k)) {
++ asm_guard(as, (op&1) ? LOONGI_BNE : LOONGI_BEQ, tmp, RID_ZERO);
++ emit_dji(as, (op&4) ? LOONGI_SLTUI : LOONGI_SLTI, tmp, left, k&0xfff);
++ return;
++ }
++ }
++ right = ra_alloc1(as, ir->op2, allow);
++ asm_guard(as, ((op^(op>>1))&1) ? LOONGI_BNE : LOONGI_BEQ, tmp, RID_ZERO);
++ emit_djk(as, (op&4) ? LOONGI_SLTU : LOONGI_SLT,
++ tmp, (op&2) ? right : left, (op&2) ? left : right);
++}
++
++static void asm_comp(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t))
++ asm_fpcomp(as, ir);
++ else
++ asm_intcomp(as, ir);
++}
++
++static void asm_equal(ASMState *as, IRIns *ir)
++{
++ if (irt_isnum(ir->t)) {
++ asm_fpcomp(as, ir);
++ } else {
++ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++ right = (left >> 8); left &= 255;
++ asm_guard(as, (ir->o & 1) ? LOONGI_BEQ : LOONGI_BNE, left, right);
++ }
++}
++
++/* -- Split register ops -------------------------------------------------- */
++
++/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++static void asm_hiop(ASMState *as, IRIns *ir)
++{
++ /* HIOP is marked as a store because it needs its own DCE logic. */
++ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
++ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
++ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
++ switch ((ir-1)->o) {
++ case IR_CALLN:
++ case IR_CALLL:
++ case IR_CALLS:
++ case IR_CALLXS:
++ if (!uselo)
++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
++ break;
++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
++ }
++}
++
++/* -- Profiling ----------------------------------------------------------- */
++
++static void asm_prof(ASMState *as, IRIns *ir)
++{
++ UNUSED(ir);
++ Reg tmp = ra_scratch(as, RSET_GPR);
++ asm_guard(as, LOONGI_BNE, tmp, RID_ZERO);
++ emit_dju(as, LOONGI_ANDI, tmp, tmp, HOOK_PROFILE);
++ emit_lsglptr2(as, LOONGI_LD_BU, tmp,
++ (int32_t)offsetof(global_State, hookmask));
++}
++
++/* -- Stack handling ------------------------------------------------------ */
++
++/* Check Lua stack size for overflow. Use exit handler as fallback. */
++static void asm_stack_check(ASMState *as, BCReg topslot,
++ IRIns *irp, RegSet allow, ExitNo exitno)
++{
++ /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */
++ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
++ ExitNo oldsnap = as->snapno;
++ rset_clear(allow, pbase);
++ as->snapno = exitno;
++ asm_guard(as, LOONGI_BNE, RID_R20, RID_ZERO);
++ as->snapno = oldsnap;
++ if (allow) {
++ tmp = rset_pickbot(allow);
++ ra_modified(as, tmp);
++ } else { // allow == RSET_EMPTY
++ tmp = RID_RET;
++ emit_dji(as, LOONGI_LD_D, tmp, RID_SP, 0); /* Restore tmp1 register. */
++ }
++ lj_assertA(checki12(8*topslot), "slot offset %d does not fit in si12", 8*topslot);
++ emit_dji(as, LOONGI_SLTUI, RID_R20, RID_R20, (int32_t)(8*topslot)&0xfff);
++ emit_djk(as, LOONGI_SUB_D, RID_R20, tmp, pbase);
++ emit_dji(as, LOONGI_LD_D, tmp, tmp, offsetof(lua_State, maxstack));
++ if (pbase == RID_TMP)
++ emit_getgl(as, RID_TMP, jit_base);
++ emit_getgl(as, tmp, cur_L);
++ if (allow == RSET_EMPTY) /* Spill temp register. */
++ emit_dji(as, LOONGI_ST_D, tmp, RID_SP, 0);
++}
++
++/* Restore Lua stack from on-trace state. */
++static void asm_stack_restore(ASMState *as, SnapShot *snap)
++{
++ SnapEntry *map = &as->T->snapmap[snap->mapofs];
++#ifdef LUA_USE_ASSERT
++ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
++#endif
++ MSize n, nent = snap->nent;
++ /* Store the value of all modified slots to the Lua stack. */
++ for (n = 0; n < nent; n++) {
++ SnapEntry sn = map[n];
++ BCReg s = snap_slot(sn);
++ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
++ IRRef ref = snap_ref(sn);
++ IRIns *ir = IR(ref);
++ if ((sn & SNAP_NORESTORE))
++ continue;
++ if (irt_isnum(ir->t)) {
++ Reg src = ra_alloc1(as, ref, RSET_FPR);
++ emit_dji(as, LOONGI_FST_D, src, RID_BASE, ofs&0xfff);
++ } else {
++ if ((sn & SNAP_KEYINDEX)) {
++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
++ int64_t kki = (int64_t)LJ_KEYINDEX << 32;
++ if (irref_isk(ref)) {
++ emit_djk(as, LOONGI_STX_D,
++ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow),
++ RID_BASE, RID_R20);
++ emit_d16i(as, RID_R20, ofs);
++ } else {
++ Reg src = ra_alloc1(as, ref, allow);
++ Reg rki = ra_allock(as, kki, rset_exclude(allow, src));
++ emit_djk(as, LOONGI_STX_D, RID_TMP, RID_BASE, RID_R20);
++ emit_d16i(as, RID_R20, ofs);
++ emit_djk(as, LOONGI_ADD_D, RID_TMP, src, rki);
++ }
++ } else {
++ asm_tvstore64(as, RID_BASE, ofs, ref);
++ }
++ }
++ checkmclim(as);
++ }
++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
++}
++
++/* -- GC handling --------------------------------------------------------- */
++
++/* Marker to prevent patching the GC check exit. */
++#define LOONG_NOPATCH_GC_CHECK LOONGI_OR
++
++/* Check GC threshold and do one or more GC steps. */
++static void asm_gc_check(ASMState *as)
++{
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
++ IRRef args[2];
++ MCLabel l_end;
++ Reg tmp1, tmp2;
++ ra_evictset(as, RSET_SCRATCH);
++ l_end = emit_label(as);
++ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
++ asm_guard(as, LOONGI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */
++ *--as->mcp = LOONG_NOPATCH_GC_CHECK;
++ args[0] = ASMREF_TMP1; /* global_State *g */
++ args[1] = ASMREF_TMP2; /* MSize steps */
++ asm_gencall(as, ci, args);
++ tmp1 = ra_releasetmp(as, ASMREF_TMP1);
++ tmp2 = ra_releasetmp(as, ASMREF_TMP2);
++ ra_allockreg(as, (int64_t)(J2G(as->J)), tmp1);
++ emit_loadi(as, tmp2, as->gcsteps);
++ /* Jump around GC step if GC total < GC threshold. */
++ emit_branch(as, LOONGI_BLTU, RID_TMP, tmp2, l_end);
++ emit_getgl(as, tmp2, gc.threshold);
++ emit_getgl(as, RID_TMP, gc.total);
++ as->gcsteps = 0;
++ checkmclim(as);
++}
++
++/* -- Loop handling ------------------------------------------------------- */
++
++/* Fixup the loop branch. */
++static void asm_loop_fixup(ASMState *as)
++{
++ MCode *p = as->mctop;
++ MCode *target = as->mcp;
++ if (as->loopinv) { /* Inverted loop branch? */
++ /* asm_guard* already inverted the bceqz/bcnez/beq/bne/blt/bge, and patched the final b. */
++ uint32_t mask = (p[-2] & 0xfc000000) == 0x48000000 ? 0x1fffffu : 0xffffu;
++ ptrdiff_t delta = target - (p - 2);
++ if (mask == 0x1fffffu) { /* BCEQZ BCNEZ*/
++ p[-2] = p[-2] | LOONGF_I((uint32_t)delta & 0xffffu) | (((uint32_t)delta & 0x1f0000u) >> 16);
++ } else { /* BEQ BNE BLE BGE BLTU BGEU*/
++ p[-2] |= LOONGF_I(delta & 0xffffu);
++ }
++ if (p[-1] == 0)
++ p[-1] = LOONGI_NOP;
++ } else {
++ /* b */
++ ptrdiff_t delta = target - (p - 1);
++ p[-1] = LOONGI_B | LOONGF_I(delta & 0xffffu) | ((delta & 0x3ff0000) >> 16);
++ }
++}
++
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++ UNUSED(as); /* Nothing to do. */
++}
++
++/* -- Head of trace ------------------------------------------------------- */
++
++/* Coalesce BASE register for a root trace. */
++static void asm_head_root_base(ASMState *as)
++{
++ IRIns *ir = IR(REF_BASE);
++ Reg r = ir->r;
++ if (ra_hasreg(r)) {
++ ra_free(as, r);
++ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
++ if (r != RID_BASE)
++ emit_move(as, r, RID_BASE);
++ }
++}
++
++/* Coalesce BASE register for a side trace. */
++static Reg asm_head_side_base(ASMState *as, IRIns *irp)
++{
++ IRIns *ir = IR(REF_BASE);
++ Reg r = ir->r;
++ if (ra_hasreg(r)) {
++ ra_free(as, r);
++ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
++ if (irp->r == r) {
++ return r; /* Same BASE register already coalesced. */
++ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
++ emit_move(as, r, irp->r); /* Move from coalesced parent reg. */
++ return irp->r;
++ } else {
++ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
++ }
++ }
++ return RID_NONE;
++}
++
++/* -- Tail of trace ------------------------------------------------------- */
++
++/* Fixup the tail code. */
++static void asm_tail_fixup(ASMState *as, TraceNo lnk)
++{
++ MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
++ int32_t spadj = as->T->spadjust;
++ MCode *p = as->mctop - 1;
++ if (spadj == 0) {
++ p[-1] = LOONGI_NOP;
++ } else {
++ p[-1] = LOONGI_ADDI_D|LOONGF_D(RID_SP)|LOONGF_J(RID_SP)|LOONGF_I(spadj);
++ }
++
++ MCode *tmp = p;
++ *p = LOONGI_B | LOONGF_I((uintptr_t)(target-tmp)&0xffffu) | (((uintptr_t)(target-tmp)&0x3ff0000u) >> 16);
++}
++
++/* Prepare tail of code. */
++static void asm_tail_prep(ASMState *as)
++{
++ MCode *p = as->mctop - 1; /* Leave room for exit branch. */
++ if (as->loopref) {
++ as->invmcp = as->mcp = p;
++ } else {
++ as->mcp = p-1; /* Leave room for stack pointer adjustment. */
++ as->invmcp = NULL;
++ }
++ *p = LOONGI_NOP; /* Prevent load/store merging. */
++}
++
++/* -- Trace setup --------------------------------------------------------- */
++
++/* Ensure there are enough stack slots for call arguments. */
++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
++{
++ IRRef args[CCI_NARGS_MAX*2];
++ uint32_t i, nargs = CCI_XNARGS(ci);
++ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
++ asm_collectargs(as, ir, ci, args);
++ for (i = 0; i < nargs; i++) {
++ if (args[i] && irt_isfp(IR(args[i])->t)) {
++ if (nfpr > 0)
++ nfpr--;
++ else if (ngpr > 0)
++ ngpr--;
++ else
++ nslots += 2;
++ } else {
++ if (ngpr > 0)
++ ngpr--;
++ else
++ nslots += 2;
++ }
++ }
++ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
++ as->evenspill = nslots;
++ return REGSP_HINT(RID_RET);
++}
++
++static void asm_sparejump_setup(ASMState *as)
++{
++ MCode *mxp = as->mctop;
++ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
++ mxp -= 4*1;
++ as->mctop = mxp;
++ }
++}
++
++static void asm_setup_target(ASMState *as)
++{
++ asm_sparejump_setup(as);
++ asm_exitstub_setup(as);
++}
++
++/* -- Trace patching ------------------------------------------------------ */
++
++/* Patch exit jumps of existing machine code to a new target. */
++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
++{
++ MCode *p = T->mcode;
++ MCode *pe = (MCode *)((char *)p + T->szmcode);
++ MCode *px = exitstub_trace_addr(T, exitno);
++ MCode *cstart = NULL;
++ MCode *mcarea = lj_mcode_patch(J, p, 0);
++
++ MCode exitload = LOONGI_ADDI_D | LOONGF_D(RID_TMP) | LOONGF_J(RID_ZERO) | LOONGF_I(exitno&0xfff);
++
++ for (; p < pe; p++) {
++ if (*p == exitload) {
++ /* Look for exitstub branch, replace with branch to target. */
++ ptrdiff_t delta = target - p - 1;
++ MCode ins = p[1];
++ if (((ins ^ ((px-p-1)<<10)) & 0x3fffc00) == 0 &&
++ ((ins & 0xfc000000u) == LOONGI_BEQ ||
++ (ins & 0xfc000000u) == LOONGI_BNE ||
++ (ins & 0xfc000000u) == LOONGI_BLT ||
++ (ins & 0xfc000000u) == LOONGI_BGE ||
++ (ins & 0xfc000000u) == LOONGI_BLTU)) {
++ /* Patch beq/bne/blt/bge, if within range. */
++ if (p[-1] == LOONG_NOPATCH_GC_CHECK) {
++ /* nothing */
++ } else if (LOONGF_S_OK(delta, 16)) {
++ p[1] = (ins & 0xfc0003ffu) | LOONGF_I(delta & 0xffff);
++ *p = LOONGI_NOP;
++ if (!cstart) cstart = p + 1;
++ }
++ } else if (((ins ^ ((((px-p-1)&0xffff)<<10) + (((px-p-1)>>10)&0x1f))) & 0x3fffc1f) == 0 &&
++ ((ins & 0xfc000000u) == LOONGI_BCEQZ ||
++ (ins & 0xfc000100u) == LOONGI_BCNEZ)) {
++ /* Patch bceqz/bcnez, if within range. */
++ if (p[-1] == LOONG_NOPATCH_GC_CHECK) {
++ /* nothing */
++ } else if (LOONGF_S_OK(delta, 21)) {
++ p[1] = (ins & 0xfc0003e0u) | LOONGF_I(delta & 0xffff) | ((delta & 0x1f0000) >> 16);
++ *p = LOONGI_NOP;
++ if (!cstart) cstart = p + 1;
++ }
++ } else if (((ins ^ ((((px-p-1)&0xffff)<<10) + (((px-p-1)>>10)&0x3f))) & 0x3ffffff) == 0 &&
++ ((ins & 0xfc000000u) == LOONGI_B)) {
++ /* Patch b. */
++ lj_assertJ(LOONGF_S_OK(delta, 26), "branch target out of range");
++ p[1] = (ins & 0xfc000000u) | LOONGF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16);
++ *p = LOONGI_NOP;
++ if (!cstart) cstart = p + 1;
++ } else if (p+2 == pe){
++ if (p[2] == LOONGI_NOP) {
++ ptrdiff_t delta = target - &p[2];
++ lj_assertJ(LOONGF_S_OK(delta, 26), "branch target out of range");
++ p[2] = LOONGI_B | LOONGF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16);
++ *p = LOONGI_NOP;
++ if (!cstart) cstart = p + 2;
++ }
++ }
++ }
++ }
++ if (cstart) lj_mcode_sync(cstart, px+1);
++ lj_mcode_patch(J, mcarea, 1);
++}
+Index: luajit-2.1.0+openresty20240815/src/lj_ccall.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_ccall.c
++++ luajit-2.1.0+openresty20240815/src/lj_ccall.c
+@@ -778,6 +778,95 @@
+ } \
+ }
+
++#elif LJ_TARGET_LOONGARCH64
++/* -- LoongArch lp64 calling conventions ---------------------------------------- */
++
++#define CCALL_HANDLE_STRUCTRET \
++ /* Return structs of size > 16 by reference. */ \
++ cc->retref = !(sz <= 16); \
++ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
++
++#define CCALL_HANDLE_STRUCTRET2 \
++ unsigned int cl = ccall_classify_struct(cts, ctr); \
++ if ((cl & 4) && (cl >> 8) <= 2) { \
++ CTSize i = (cl >> 8) - 1; \
++ do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
++ } else { \
++ if (cl > 1) { \
++ sp = (uint8_t *)&cc->fpr[0]; \
++ if ((cl >> 8) > 2) \
++ sp = (uint8_t *)&cc->gpr[0]; \
++ } \
++ memcpy(dp, sp, ctr->size); \
++ } \
++
++#define CCALL_HANDLE_COMPLEXRET \
++ /* Complex values are returned in 1 or 2 FPRs. */ \
++ cc->retref = 0;
++
++#define CCALL_HANDLE_COMPLEXRET2 \
++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
++ ((float *)dp)[0] = cc->fpr[0].f; \
++ ((float *)dp)[1] = cc->fpr[1].f; \
++ } else { /* Copy complex double from FPRs. */ \
++ ((double *)dp)[0] = cc->fpr[0].d; \
++ ((double *)dp)[1] = cc->fpr[1].d; \
++ }
++
++#define CCALL_HANDLE_COMPLEXARG \
++ /* Pass complex double by reference. */ \
++ if (sz == 4*sizeof(double)) { \
++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
++ sz = CTSIZE_PTR; \
++ } else if (sz == 2*sizeof(float)) { \
++ isfp = 2; \
++ sz = 2*CTSIZE_PTR; \
++ } else { \
++ isfp = 1; \
++ sz = 2*CTSIZE_PTR; \
++ }
++
++#define CCALL_HANDLE_RET \
++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
++ sp = (uint8_t *)&cc->fpr[0].f;
++
++#define CCALL_HANDLE_STRUCTARG \
++ /* Pass structs of size >16 by reference. */ \
++ unsigned int cl = ccall_classify_struct(cts, d); \
++ nff = cl >> 8; \
++ if (sz > 16) { \
++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
++ sz = CTSIZE_PTR; \
++ } \
++ /* Pass struct in FPRs. */ \
++ if (cl > 1) { \
++ isfp = (cl & 4) ? 2 : 1; \
++ }
++
++
++#define CCALL_HANDLE_REGARG \
++ if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \
++ int n2 = ctype_isvector(d->info) ? 1 : \
++ isfp == 1 ? n : 2; \
++ if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
++ dp = &cc->fpr[nfpr]; \
++ nfpr += n2; \
++ goto done; \
++ } else { \
++ if (ngpr + n2 <= maxgpr) { \
++ dp = &cc->gpr[ngpr]; \
++ ngpr += n2; \
++ goto done; \
++ } \
++ } \
++ } else { /* Try to pass argument in GPRs. */ \
++ if (ngpr + n <= maxgpr) { \
++ dp = &cc->gpr[ngpr]; \
++ ngpr += n; \
++ goto done; \
++ } \
++ }
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+@@ -1183,6 +1272,53 @@
+
+ #endif
+
++/* -- LoongArch64 ABI struct classification ---------------------------- */
++
++#if LJ_TARGET_LOONGARCH64
++
++static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
++{
++ CTSize sz = ct->size;
++ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
++ while (ct->sib) {
++ CType *sct;
++ ct = ctype_get(cts, ct->sib);
++ if (ctype_isfield(ct->info)) {
++ sct = ctype_rawchild(cts, ct);
++ if (ctype_isfp(sct->info)) {
++ r |= sct->size;
++ if (!isu) n++; else if (n == 0) n = 1;
++ } else if (ctype_iscomplex(sct->info)) {
++ r |= (sct->size >> 1);
++ if (!isu) n += 2; else if (n < 2) n = 2;
++ } else if (ctype_isstruct(sct->info)) {
++ goto substruct;
++ } else {
++ goto noth;
++ }
++ } else if (ctype_isbitfield(ct->info)) {
++ goto noth;
++ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
++ sct = ctype_rawchild(cts, ct);
++ substruct:
++ if (sct->size > 0) {
++ unsigned int s = ccall_classify_struct(cts, sct);
++ if (s <= 1) goto noth;
++ r |= (s & 255);
++ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
++ }
++ }
++ }
++ if ((r == 4 || r == 8) && n <= 4)
++ return r + (n << 8);
++noth: /* Not a homogeneous float/double aggregate. */
++ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
++}
++
++
++#endif
++
++
+ /* -- Common C call handling ---------------------------------------------- */
+
+ /* Infer the destination CTypeID for a vararg argument. */
+@@ -1232,7 +1368,9 @@
+ #if LJ_TARGET_RISCV64
+ int nff = 0;
+ #endif
+-
++#if LJ_TARGET_LOONGARCH64
++ int nff = 0;
++#endif
+ /* Clear unused regs to get some determinism in case of misdeclaration. */
+ memset(cc->gpr, 0, sizeof(cc->gpr));
+ #if CCALL_NUM_FPR
+@@ -1426,7 +1564,7 @@
+ if (isfp && d->size == sizeof(float))
+ ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */
+ #endif
+-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
+ #if LJ_TARGET_MIPS64
+ || (isfp && nsp == 0)
+@@ -1474,6 +1612,14 @@
+ ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i];
+ } while (i--);
+ }
++#elif LJ_TARGET_LOONGARCH64
++ if (isfp == 2 && nff <= 2) {
++ /* Split complex float into separate registers. */
++ CTSize i = (sz >> 2) - 1;
++ do {
++ ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i];
++ } while (i--);
++ }
+ #else
+ UNUSED(isfp);
+ #endif
+@@ -1483,7 +1629,7 @@
+ if ((int32_t)nsp < 0) nsp = 0;
+ #endif
+
+-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64
++#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
+ cc->nfpr = nfpr; /* Required for vararg functions. */
+ #endif
+ cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
+Index: luajit-2.1.0+openresty20240815/src/lj_ccall.h
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_ccall.h
++++ luajit-2.1.0+openresty20240815/src/lj_ccall.h
+@@ -172,6 +172,21 @@
+ struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+ } FPRArg;
+
++#elif LJ_TARGET_LOONGARCH64
++
++#define CCALL_NARG_GPR 8
++#define CCALL_NARG_FPR 8
++#define CCALL_NRET_GPR 2
++#define CCALL_NRET_FPR 2
++#define CCALL_SPS_EXTRA 3
++#define CCALL_SPS_FREE 1
++
++typedef intptr_t GPRArg;
++typedef union FPRArg {
++ double d;
++ struct { LJ_ENDIAN_LOHI(float f; , float g;) };
++} FPRArg;
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+@@ -219,7 +234,7 @@
+ uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
+ #elif LJ_TARGET_ARM64
+ void *retp; /* Aggregate return pointer in x8. */
+-#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64
++#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
+ uint8_t nfpr; /* Number of arguments in FPRs. */
+ #endif
+ #if LJ_32
+Index: luajit-2.1.0+openresty20240815/src/lj_ccallback.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_ccallback.c
++++ luajit-2.1.0+openresty20240815/src/lj_ccallback.c
+@@ -95,6 +95,10 @@
+
+ #define CALLBACK_MCODE_HEAD 68
+
++#elif LJ_TARGET_LOONGARCH64
++
++#define CALLBACK_MCODE_HEAD 52
++
+ #else
+
+ /* Missing support for this architecture. */
+@@ -330,6 +334,33 @@
+ }
+ return p;
+ }
++#elif LJ_TARGET_LOONGARCH64
++static void *callback_mcode_init(global_State *g, uint32_t *page)
++{
++ uint32_t *p = page;
++ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
++ uintptr_t ug = (uintptr_t)(void *)g;
++ MSize slot;
++ *p++ = LOONGI_LU12I_W | LOONGF_D(RID_R18) | LOONGF_I20((target >> 12) & 0xfffff);
++ *p++ = LOONGI_LU12I_W | LOONGF_D(RID_R17) | LOONGF_I20((ug >> 12) & 0xfffff);
++ *p++ = LOONGI_ORI | LOONGF_D(RID_R18) | LOONGF_J(RID_R18) | LOONGF_I(target & 0xfff);
++ *p++ = LOONGI_ORI | LOONGF_D(RID_R17) | LOONGF_J(RID_R17) | LOONGF_I(ug & 0xfff);
++ *p++ = LOONGI_LU32I_D | LOONGF_D(RID_R18) | LOONGF_I20((target >> 32) & 0xfffff);
++ *p++ = LOONGI_LU32I_D | LOONGF_D(RID_R17) | LOONGF_I20((ug >> 32) & 0xfffff);
++ *p++ = LOONGI_LU52I_D | LOONGF_D(RID_R18) | LOONGF_J(RID_R18) | LOONGF_I((target >> 52) & 0xfff);
++ *p++ = LOONGI_LU52I_D | LOONGF_D(RID_R17) | LOONGF_J(RID_R17) | LOONGF_I((ug >> 52) & 0xfff);
++ *p++ = LOONGI_NOP;
++ *p++ = LOONGI_NOP;
++ *p++ = LOONGI_NOP;
++ *p++ = LOONGI_NOP;
++ *p++ = LOONGI_JIRL | LOONGF_D(RID_R0) | LOONGF_J(RID_R18) | LOONGF_I(0);
++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
++ *p++ = LOONGI_ORI | LOONGF_D(RID_R19) | LOONGF_J(RID_R0) | LOONGF_I(slot & 0xfff);
++ *p = LOONGI_B | LOONGF_I((page-p) & 0xffff) | (((page-p) >> 16) & 0x3ff);
++ p++;
++ }
++ return p;
++}
+ #else
+ /* Missing support for this architecture. */
+ #define callback_mcode_init(g, p) (p)
+@@ -608,6 +639,31 @@
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ ((float *)dp)[1] = *(float *)dp;
+
++#elif LJ_TARGET_LOONGARCH64
++
++#define CALLBACK_HANDLE_REGARG \
++ if (isfp) { \
++ if (nfpr + n <= CCALL_NARG_FPR) { \
++ sp = &cts->cb.fpr[nfpr]; \
++ nfpr += n; \
++ goto done; \
++ } else if (ngpr + n <= maxgpr) { \
++ sp = &cts->cb.gpr[ngpr]; \
++ ngpr += n; \
++ goto done; \
++ } \
++ } else { \
++ if (ngpr + n <= maxgpr) { \
++ sp = &cts->cb.gpr[ngpr]; \
++ ngpr += n; \
++ goto done; \
++ } \
++ }
++
++#define CALLBACK_HANDLE_RET \
++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
++ ((float *)dp)[1] = *(float *)dp;
++
+ #elif LJ_TARGET_RISCV64
+
+ #define CALLBACK_HANDLE_REGARG \
+@@ -797,7 +853,7 @@
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+ #endif
+-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
+ /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
+ if (ctr->size <= 4 &&
+ (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
+Index: luajit-2.1.0+openresty20240815/src/lj_emit_loongarch64.h
+===================================================================
+--- /dev/null
++++ luajit-2.1.0+openresty20240815/src/lj_emit_loongarch64.h
+@@ -0,0 +1,306 @@
++/*
++** LoongArch instruction emitter.
++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++static intptr_t get_k64val(ASMState *as, IRRef ref)
++{
++ IRIns *ir = IR(ref);
++ if (ir->o == IR_KINT64) {
++ return (intptr_t)ir_kint64(ir)->u64;
++ } else if (ir->o == IR_KGC) {
++ return (intptr_t)ir_kgc(ir);
++ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
++ return (intptr_t)ir_kptr(ir);
++ } else {
++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
++ "bad 64 bit const IR op %d", ir->o);
++ return ir->i; /* Sign-extended. */
++ }
++}
++
++#define get_kval(as, ref) get_k64val(as, ref)
++
++/* -- Emit basic instructions --------------------------------------------- */
++
++static void emit_djk(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, Reg rk)
++{
++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_K(rk & 0x1f);
++}
++
++#define emit_dj(as, loongi, rd, rj) emit_djk(as, loongi, rd, rj, 0)
++
++static void emit_di(ASMState *as, LOONGIns loongi, Reg rd, int32_t i)
++{
++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_I20(i & 0xfffff);
++}
++
++static void emit_dji(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, int32_t i)
++{
++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(i);
++}
++
++static void emit_dju(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, uint32_t u)
++{
++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(u);
++}
++
++#define checki12(x) LOONGF_S_OK(x, 12)
++#define checku12(x) ((x) == ((x) & 0xfff))
++
++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
++static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
++static Reg ra_scratch(ASMState *as, RegSet allow);
++
++static void emit_dj32i(ASMState *as, Reg rd, Reg rj, int32_t i)
++{
++ if (checki12(i)) {
++ *--as->mcp = LOONGI_ADDI_D | LOONGF_D(rd) | LOONGF_J(rj) | LOONGF_I(i&0xfff);
++ } else {
++ emit_djk(as, LOONGI_ADD_D, rd, RID_R20, rj);
++ emit_dju(as, LOONGI_ORI, RID_R20, RID_R20, i&0xfff);
++ emit_di(as, LOONGI_LU12I_W, RID_R20, (i>>12)&0xfffff);
++ }
++}
++
++static void emit_d16i(ASMState *as, Reg rd, int32_t i)
++{
++ emit_dji(as, LOONGI_SRAI_D, rd, rd, 16);
++ emit_dji(as, LOONGI_ADDU16I_D, rd, RID_ZERO, (i&0xffff));
++}
++
++static void emit_djml(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, uint32_t m, uint32_t l)
++{
++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(l & 0x3f) | LOONGF_M(m & 0x3f);
++}
++
++static void emit_djka(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, Reg rk, Reg ra)
++{
++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_K(rk & 0x1f) | LOONGF_A(ra & 0x1f);
++}
++
++static void emit_b_bl(ASMState *as, LOONGIns loongi, uint32_t i)
++{
++ *--as->mcp = loongi | LOONGF_I(i & 0xffff) | ((i >> 16) & 0x3ff);
++}
++
++
++/* -- Emit loads/stores --------------------------------------------------- */
++
++/* Prefer rematerialization of BASE/L from global_State over spills. */
++#define emit_canremat(ref) ((ref) <= REF_BASE)
++
++
++/* Load a 32 bit constant into a GPR. */
++static void emit_loadi(ASMState *as, Reg r, int32_t i)
++{
++ emit_dj32i(as, r, RID_ZERO, i);
++}
++
++/* Load a 64 bit constant into a GPR. */
++static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
++{
++ if (checki32((int64_t)u64)) {
++ emit_dj32i(as, r, RID_ZERO, (int32_t)u64);
++ } else {
++ *--as->mcp = LOONGI_LU52I_D | LOONGF_D(r) | LOONGF_J(r) | LOONGF_I((u64>>52)&0xfff);
++ *--as->mcp = LOONGI_LU32I_D | LOONGF_D(r) | LOONGF_I20((u64>>32)&0xfffff);
++ *--as->mcp = LOONGI_ORI | LOONGF_D(r) | LOONGF_J(r) | LOONGF_I(u64&0xfff);
++ *--as->mcp = LOONGI_LU12I_W | LOONGF_D(r) | LOONGF_I20((u64>>12)&0xfffff);
++ }
++}
++
++#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
++
++/* Get/set from constant pointer. */
++static void emit_lsptr(ASMState *as, LOONGIns loongi, Reg r, void *p, RegSet allow)
++{
++ intptr_t jgl = (intptr_t)(J2G(as->J));
++ intptr_t i = (intptr_t)(p);
++ Reg base;
++ if ((uint32_t)(i-jgl) < 65536) {
++ i = i-jgl-32768;
++ base = RID_JGL;
++ } else {
++ base = ra_allock(as, i-(int16_t)i, allow);
++ }
++ if (checki12(i)) {
++ emit_dji(as, loongi, r, base, i&0xfff);
++ }
++ else {
++ /* ld.d->ldx.d, fld.d->fldx.d, ld.s->fldx.s */
++ if (loongi == LOONGI_LD_D)
++ loongi = LOONGI_LDX_D;
++ else if (loongi == LOONGI_FLD_D)
++ loongi = LOONGI_FLDX_D;
++ else if (loongi == LOONGI_FLD_S)
++ loongi = LOONGI_FLDX_S;
++ emit_djk(as, loongi, r, base, RID_R20);
++
++ /* move i to a GPR */
++ emit_d16i(as, RID_R20, i); // i&0xffff
++ }
++}
++
++/* Load 64 bit IR constant into register. */
++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
++{
++ const uint64_t *k = &ir_k64(ir)->u64;
++ Reg r64 = r;
++ if (rset_test(RSET_FPR, r)) {
++ r64 = RID_TMP;
++ emit_dj(as, LOONGI_MOVGR2FR_D, r, r64);
++ }
++ if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
++ emit_lsptr(as, LOONGI_LD_D, r64, (void *)k, 0); /*To copy a doubleword from a GPR to an FPR*/
++ else
++ emit_loadu64(as, r64, *k);
++}
++
++/* Get/set global_State fields. */
++static void emit_lsglptr2(ASMState *as, LOONGIns loongi, Reg r, int32_t ofs)
++{
++ emit_djk(as, loongi, r, RID_JGL, RID_R20);
++ emit_loadi(as, RID_R20, (ofs-32768));
++}
++
++#define emit_getgl(as, r, field) \
++ emit_lsglptr2(as, LOONGI_LDX_D, (r), (int32_t)offsetof(global_State, field))
++#define emit_setgl(as, r, field) \
++ emit_lsglptr2(as, LOONGI_STX_D, (r), (int32_t)offsetof(global_State, field))
++
++/* Trace number is determined from per-trace exit stubs. */
++#define emit_setvmstate(as, i) UNUSED(i)
++
++/* -- Emit control-flow instructions -------------------------------------- */
++
++/* Label for internal jumps. */
++typedef MCode *MCLabel;
++
++/* Return label pointing to current PC. */
++#define emit_label(as) ((as)->mcp)
++
++static void emit_branch(ASMState *as, LOONGIns loongi, Reg rj, Reg rd, MCode *target)
++{
++ MCode *p = as->mcp;
++ ptrdiff_t delta = target - (p - 1);
++ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range");
++ /*BEQ BNE BGE BLZ*/
++ *--p = loongi | LOONGF_D(rd) | LOONGF_J(rj) | LOONGF_I(((uint32_t)delta & 0xffffu));
++ as->mcp = p;
++}
++
++static void emit_branch21(ASMState *as, LOONGIns loongi, Reg rj, MCode *target)
++{
++ MCode *p = as->mcp;
++ ptrdiff_t delta = target - (p - 1);
++ lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range");
++ *--p = loongi | LOONGF_J(rj) | LOONGF_I(((uint32_t)delta & 0xffffu))
++ | (((uint32_t)delta & 0x1f0000u)>>16); /*BEQZ BNEZ BCEQZ BCNEZ*/
++ as->mcp = p;
++}
++
++static void emit_jmp(ASMState *as, MCode *target)
++{
++ MCode *p = as->mcp;
++ ptrdiff_t delta = target - (p - 1);
++ emit_b_bl(as, LOONGI_B, (delta&0x3ffffff)); /*offs 26*/
++}
++
++#define emit_move(as, dst, src) \
++ emit_djk(as, LOONGI_OR, (dst), (src), RID_ZERO)
++
++static void emit_call(ASMState *as, void *target)
++{
++ MCode *p = --as->mcp;
++ ptrdiff_t delta = (char *)target - (char *)p;
++ if (LOONGF_S_OK(delta>>2, 26)) {
++ *p = LOONGI_BL | LOONGF_I((delta>>2) & 0xffff) | (((delta>>2) >> 16) & 0x3ff);
++ } else { /* Target out of range: need indirect call. */
++ Reg r = ra_allock(as, (intptr_t)target, RSET_RANGE(RID_R12, RID_R19+1));
++ *p = LOONGI_JIRL | LOONGF_D(RID_RA) | LOONGF_J(r) | LOONGF_I(0);
++ }
++}
++
++/* -- Emit generic operations --------------------------------------------- */
++
++/* Generic move between two regs. */
++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
++{
++ if (dst < RID_MAX_GPR && src >= RID_MIN_FPR)
++ emit_dj(as, irt_isnum(ir->t) ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dst, src);
++ else if (dst < RID_MAX_GPR)
++ emit_move(as, dst, src);
++ else
++ emit_dj(as, irt_isnum(ir->t) ? LOONGI_FMOV_D : LOONGI_FMOV_S, dst, src);
++}
++
++/* Emit an arithmetic operation with a constant operand. */
++static void emit_addk(ASMState *as, Reg dest, Reg src, int32_t i, RegSet allow)
++{
++ if (checki12(i)) {
++ emit_dji(as, LOONGI_ADDI_D, dest, src, i&0xfff);
++ } else {
++ Reg src2 = ra_allock(as, i, allow);
++ emit_djk(as, LOONGI_ADD_D, dest, src, src2);
++ }
++}
++
++static void emit_lso(ASMState *as, LOONGIns loongi, Reg dest, Reg src, int64_t i, RegSet allow)
++{
++ if (checki12(i)) {
++ emit_dji(as, loongi, dest, src, i&0xfff);
++ } else {
++ LOONGIns loongk = LOONGI_NOP;
++ switch (loongi) {
++ case LOONGI_LD_D: loongk = LOONGI_LDX_D; break;
++ case LOONGI_LD_W: loongk = LOONGI_LDX_W; break;
++ case LOONGI_ST_D: loongk = LOONGI_STX_D; break;
++ case LOONGI_FLD_D: loongk = LOONGI_FLDX_D; break;
++ case LOONGI_FST_D: loongk = LOONGI_FSTX_D; break;
++ case LOONGI_LD_B: loongk = LOONGI_LDX_B; break;
++ case LOONGI_LD_BU: loongk = LOONGI_LDX_BU; break;
++ case LOONGI_LD_H: loongk = LOONGI_LDX_H; break;
++ case LOONGI_LD_HU: loongk = LOONGI_LDX_HU; break;
++ case LOONGI_FLD_S: loongk = LOONGI_FLDX_S; break;
++ default: break;
++ }
++ //Reg src2 = ra_allock(as, i, allow);
++ Reg src2 = ra_scratch(as, allow);
++ emit_djk(as, loongk, dest, src, src2);
++ emit_d16i(as, src2, i);
++ }
++}
++
++/* Generic load of register with base and (small) offset address. */
++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
++{
++ if (r < RID_MAX_GPR) {
++ emit_djk(as, irt_is64(ir->t) ? LOONGI_LDX_D : LOONGI_LDX_W, r, base, RID_R20);
++ } else {
++ emit_djk(as, irt_isnum(ir->t) ? LOONGI_FLDX_D : LOONGI_FLDX_S, r, base, RID_R20);
++ }
++ emit_d16i(as, RID_R20, ofs);
++}
++
++/* Generic store of register with base and (small) offset address. */
++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
++{
++ if (r < RID_MAX_GPR) {
++ emit_djk(as, irt_is64(ir->t) ? LOONGI_STX_D : LOONGI_STX_W, r, base, RID_R20);
++ } else {
++ emit_djk(as, irt_isnum(ir->t) ? LOONGI_FSTX_D : LOONGI_FSTX_S, (r&31), base, RID_R20);
++ }
++ emit_d16i(as, RID_R20, ofs);
++}
++
++/* Add offset to pointer. */
++static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
++{
++ if (ofs) {
++ emit_addk(as, r, r, ofs, rset_exclude(RSET_GPR, r));
++ }
++}
++
++
++#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
+Index: luajit-2.1.0+openresty20240815/src/lj_frame.h
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_frame.h
++++ luajit-2.1.0+openresty20240815/src/lj_frame.h
+@@ -296,6 +296,15 @@
+ ** need to change to 3.
+ */
+ #define CFRAME_SHIFT_MULTRES 0
++#elif LJ_TARGET_LOONGARCH64
++#define CFRAME_OFS_ERRF 196
++#define CFRAME_OFS_NRES 192
++#define CFRAME_OFS_PREV 184
++#define CFRAME_OFS_L 176
++#define CFRAME_OFS_PC 168
++#define CFRAME_SIZE 200
++#define CFRAME_OFS_MULTRES 0
++#define CFRAME_SHIFT_MULTRES 3
+ #else
+ #error "Missing CFRAME_* definitions for this architecture"
+ #endif
+Index: luajit-2.1.0+openresty20240815/src/lj_gdbjit.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_gdbjit.c
++++ luajit-2.1.0+openresty20240815/src/lj_gdbjit.c
+@@ -309,6 +309,9 @@
+ #elif LJ_TARGET_RISCV64
+ DW_REG_SP = 2,
+ DW_REG_RA = 1,
++#elif LJ_TARGET_LOONGARCH64
++ DW_REG_SP = 3,
++ DW_REG_RA = 1,
+ #else
+ #error "Unsupported target architecture"
+ #endif
+@@ -388,6 +391,8 @@
+ .machine = 8,
+ #elif LJ_TARGET_RISCV64
+ .machine = 243,
++#elif LJ_TARGET_LOONGARCH64
++ .machine = 258,
+ #else
+ #error "Unsupported target architecture"
+ #endif
+@@ -606,6 +611,13 @@
+ DB(DW_CFA_offset|32|9); DUV(29);
+ DB(DW_CFA_offset|32|8); DUV(30);
+ }
++#elif LJ_TARGET_LOONGARCH64
++ {
++ int i;
++ DB(DW_CFA_offset|30); DUV(2);
++ for (i = 31; i >= 23; i--) { DB(DW_CFA_offset|i); DUV(3+(31-i)); }
++ for (i = 31; i >= 24; i--) { DB(DW_CFA_offset|32|i); DUV(43-i); }
++ }
+ #else
+ #error "Unsupported target architecture"
+ #endif
+Index: luajit-2.1.0+openresty20240815/src/lj_jit.h
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_jit.h
++++ luajit-2.1.0+openresty20240815/src/lj_jit.h
+@@ -77,6 +77,10 @@
+
+ #define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006XThead"
+
++//#elif LJ_TARGET_LOONGARCH64
++//#define JIT_F_GS464V (JIT_F_CPU << 0)
++//#define JIT_F_CPUSTRING "\6GS464V"
++
+ #else
+
+ #define JIT_F_CPUSTRING ""
+@@ -378,7 +382,7 @@
+ LJ_K64_M2P64_31 = LJ_K64_M2P64,
+ #endif
+ #endif
+-#if LJ_TARGET_MIPS
++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+ LJ_K64_2P31, /* 2^31 */
+ #if LJ_64
+ LJ_K64_2P63, /* 2^63 */
+@@ -387,7 +391,7 @@
+ #endif
+ LJ_K64__MAX,
+ };
+-#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
++#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64)
+
+ enum {
+ #if LJ_TARGET_X86ORX64
+@@ -397,16 +401,17 @@
+ LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
+ LJ_K32_2P52, /* 2^52 */
+ #endif
+-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+ LJ_K32_2P31, /* 2^31 */
+ #endif
+-#if LJ_TARGET_MIPS64
++#if LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64
+ LJ_K32_2P63, /* 2^63 */
+ LJ_K32_M2P64, /* -2^64 */
+ #endif
+ LJ_K32__MAX
+ };
+-#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS)
++#define LJ_K32__USED \
++ (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64)
+
+ /* Get 16 byte aligned pointer to SIMD constant. */
+ #define LJ_KSIMD(J, n) \
+Index: luajit-2.1.0+openresty20240815/src/lj_target.h
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_target.h
++++ luajit-2.1.0+openresty20240815/src/lj_target.h
+@@ -55,7 +55,7 @@
+ /* Bitset for registers. 32 registers suffice for most architectures.
+ ** Note that one set holds bits for both GPRs and FPRs.
+ */
+-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64
++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
+ typedef uint64_t RegSet;
+ #define RSET_BITS 6
+ #define rset_picktop_(rs) ((Reg)lj_fls64(rs))
+@@ -147,6 +147,8 @@
+ #include "lj_target_riscv.h"
+ #elif LJ_TARGET_S390X
+ #include "lj_target_s390x.h"
++#elif LJ_TARGET_LOONGARCH64
++#include "lj_target_loongarch64.h"
+ #else
+ #error "Missing include for target CPU"
+ #endif
+Index: luajit-2.1.0+openresty20240815/src/lj_target_loongarch64.h
+===================================================================
+--- /dev/null
++++ luajit-2.1.0+openresty20240815/src/lj_target_loongarch64.h
+@@ -0,0 +1,313 @@
++/*
++** Definitions for LoongArch CPUs.
++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#ifndef _LJ_TARGET_LOONGARCH_H
++#define _LJ_TARGET_LOONGARCH_H
++
++/* -- Registers IDs ------------------------------------------------------- */
++
++#define GPRDEF(_) \
++ _(R0) _(RA) _(R2) _(SP) _(R4) _(R5) _(R6) _(R7) \
++ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
++ _(R16) _(R17) _(R18) _(R19) _(R20) _(X) _(R22) _(R23) \
++ _(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31)
++#define FPRDEF(_) \
++ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
++ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
++ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
++ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
++#define VRIDDEF(_)
++
++#define RIDENUM(name) RID_##name,
++
++enum {
++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
++ RID_MAX,
++ RID_ZERO = RID_R0,
++ RID_TMP = RID_RA,
++
++ /* Calling conventions. */
++ RID_RET = RID_R4,
++
++ RID_RETHI = RID_R5,
++ RID_RETLO = RID_R4,
++
++ RID_FPRET = RID_F0,
++
++ /* These definitions must match with the *.dasc file(s): */
++ RID_BASE = RID_R23, /* Interpreter BASE. */
++ RID_LPC = RID_R25, /* Interpreter PC. */
++ RID_DISPATCH = RID_R26, /* Interpreter DISPATCH table. */
++ RID_LREG = RID_R27, /* Interpreter L. */
++ RID_JGL = RID_R22, /* On-trace: global_State + 32768. */
++
++ /* Register ranges [min, max) and number of registers. */
++ RID_MIN_GPR = RID_R0,
++ RID_MAX_GPR = RID_R31+1,
++ RID_MIN_FPR = RID_MAX_GPR,
++ RID_MAX_FPR = RID_F31+1,
++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
++};
++
++#define RID_NUM_KREF RID_NUM_GPR
++#define RID_MIN_KREF RID_R0
++
++/* -- Register sets ------------------------------------------------------- */
++
++/* Make use of all registers, except ZERO, TMP, R2, SP, JGL, R20 and X. */
++#define RSET_FIXED \
++ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_R2)|\
++ RID2RSET(RID_SP)|RID2RSET(RID_JGL)|RID2RSET(RID_R20)|\
++ RID2RSET(RID_X))
++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
++#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
++#define RSET_ALL (RSET_GPR|RSET_FPR)
++#define RSET_INIT RSET_ALL
++
++/* scratch register. */
++#define RSET_SCRATCH_GPR RSET_RANGE(RID_R4, RID_R19+1)
++#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F23+1)
++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
++#define REGARG_FIRSTGPR RID_R4
++#define REGARG_LASTGPR RID_R11
++#define REGARG_NUMGPR 8
++#define REGARG_FIRSTFPR RID_F0
++#define REGARG_LASTFPR RID_F7
++#define REGARG_NUMFPR 8
++
++/* -- Spill slots --------------------------------------------------------- */
++
++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
++**
++** SPS_FIXED: Available fixed spill slots in interpreter frame.
++** This definition must match with the *.dasc file(s).
++**
++** SPS_FIRST: First spill slot for general use.
++*/
++#define SPS_FIXED 4
++#define SPS_FIRST 4
++
++#define SPOFS_TMP 0
++
++#define sps_scale(slot) (4 * (int32_t)(slot))
++#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
++
++/* -- Exit state ---------------------------------------------------------- */
++
++/* This definition must match with the *.dasc file(s). */
++typedef struct {
++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
++ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
++ int32_t spill[256]; /* Spill slots. */
++} ExitState;
++
++/* Highest exit + 1 indicates stack check. */
++#define EXITSTATE_CHECKEXIT 1
++
++/* Return the address of a per-trace exit stub. */
++static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
++{
++ while (*p == 0x03400000) p++; /* Skip LOONGI_NOP. */
++ return p;
++}
++/* Avoid dependence on lj_jit.h if only including lj_target.h. */
++#define exitstub_trace_addr(T, exitno) \
++ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode))
++
++/* -- Instructions -------------------------------------------------------- */
++
++/* Instruction fields. */
++#define LOONGF_D(r) (r)
++#define LOONGF_J(r) ((r) << 5)
++#define LOONGF_K(r) ((r) << 10)
++#define LOONGF_A(r) ((r) << 15)
++#define LOONGF_I(n) ((n) << 10)
++#define LOONGF_I20(n) ((n) << 5)
++#define LOONGF_M(n) ((n) << 16)
++
++/* Check for valid field range. */
++#define LOONGF_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
++
++typedef enum LOONGIns {
++/* Integer instructions. */
++ LOONGI_MOVE = 0x00150000,
++ LOONGI_NOP = 0x03400000,
++
++ LOONGI_AND = 0x00148000,
++ LOONGI_ANDI = 0x03400000,
++ LOONGI_OR = 0x00150000,
++ LOONGI_ORI = 0x03800000,
++ LOONGI_XOR = 0x00158000,
++ LOONGI_XORI = 0x03c00000,
++ LOONGI_NOR = 0x00140000,
++
++ LOONGI_SLT = 0x00120000,
++ LOONGI_SLTU = 0x00128000,
++ LOONGI_SLTI = 0x02000000,
++ LOONGI_SLTUI = 0x02400000,
++
++ LOONGI_ADD_W = 0x00100000,
++ LOONGI_ADDI_W = 0x02800000,
++ LOONGI_SUB_W = 0x00110000,
++ LOONGI_MUL_W = 0x001c0000,
++ LOONGI_MULH_W = 0x001c8000,
++ LOONGI_DIV_W = 0x00200000,
++ LOONGI_DIV_WU = 0x00210000,
++
++ LOONGI_SLLI_W = 0x00408000,
++ LOONGI_SRLI_W = 0x00448000,
++ LOONGI_SRAI_W = 0x00488000,
++ LOONGI_ROTRI_W = 0x004c8000,
++ LOONGI_ROTRI_D = 0x004d0000,
++ LOONGI_SLL_W = 0x00170000,
++ LOONGI_SRL_W = 0x00178000,
++ LOONGI_SRA_W = 0x00180000,
++ LOONGI_ROTR_W = 0x001b0000,
++ LOONGI_ROTR_D = 0x001b8000,
++
++ LOONGI_EXT_W_B = 0x00005c00,
++ LOONGI_EXT_W_H = 0x00005800,
++ LOONGI_REVB_2H = 0x00003000,
++ LOONGI_REVB_4H = 0x00003400,
++
++ LOONGI_ALSL_W = 0x00040000,
++ LOONGI_ALSL_D = 0x002c0000,
++
++ LOONGI_B = 0x50000000,
++ LOONGI_BL = 0x54000000,
++ LOONGI_JIRL = 0x4c000000,
++
++ LOONGI_BEQ = 0x58000000,
++ LOONGI_BNE = 0x5c000000,
++ LOONGI_BLT = 0x60000000,
++ LOONGI_BGE = 0x64000000,
++ LOONGI_BGEU = 0x6c000000,
++ LOONGI_BLTU = 0x68000000,
++ LOONGI_BCEQZ = 0x48000000,
++ LOONGI_BCNEZ = 0x48000100,
++
++ /* Load/store instructions. */
++ LOONGI_LD_W = 0x28800000,
++ LOONGI_LD_D = 0x28c00000,
++ LOONGI_ST_W = 0x29800000,
++ LOONGI_ST_D = 0x29c00000,
++ LOONGI_LD_B = 0x28000000,
++ LOONGI_ST_B = 0x29000000,
++ LOONGI_LD_H = 0x28400000,
++ LOONGI_ST_H = 0x29400000,
++ LOONGI_LD_BU = 0x2a000000,
++ LOONGI_LD_HU = 0x2a400000,
++ LOONGI_LDX_B = 0x38000000,
++ LOONGI_LDX_BU = 0x38200000,
++ LOONGI_LDX_H = 0x38040000,
++ LOONGI_LDX_HU = 0x38240000,
++ LOONGI_LDX_D = 0x380c0000,
++ LOONGI_STX_D = 0x381c0000,
++ LOONGI_LDX_W = 0x38080000,
++ LOONGI_STX_W = 0x38180000,
++ LOONGI_STX_B = 0x38100000,
++ LOONGI_STX_H = 0x38140000,
++ LOONGI_FLD_S = 0x2b000000,
++ LOONGI_FST_S = 0x2b400000,
++ LOONGI_FLD_D = 0x2b800000,
++ LOONGI_FST_D = 0x2bc00000,
++ LOONGI_FLDX_D = 0x38340000,
++ LOONGI_FLDX_S = 0x38300000,
++ LOONGI_FSTX_D = 0x383c0000,
++ LOONGI_FSTX_S = 0x38380000,
++
++ LOONGI_ADD_D = 0x00108000,
++ LOONGI_ADDI_D = 0x02c00000,
++ LOONGI_ADDU16I_D = 0x10000000,
++ LOONGI_LU12I_W = 0x14000000,
++ LOONGI_LU32I_D = 0x16000000,
++ LOONGI_LU52I_D = 0x3000000,
++ LOONGI_SUB_D = 0x00118000,
++ LOONGI_DIV_D = 0x00220000,
++ LOONGI_DIV_DU = 0x00230000,
++ LOONGI_MUL_D = 0x001d8000,
++
++ LOONGI_SLLI_D = 0x00410000,
++ LOONGI_SRLI_D = 0x00450000,
++ LOONGI_SLL_D = 0x00188000,
++ LOONGI_SRL_D = 0x00190000,
++ LOONGI_SRAI_D = 0x00490000,
++ LOONGI_SRA_D = 0x00198000,
++ LOONGI_REVH_D = 0x00004400,
++
++ /* Extract/insert instructions. */
++ LOONGI_BSTRPICK_D = 0x00c00000,
++ LOONGI_BSTRINS_D = 0x00800000,
++
++ LOONGI_MASKEQZ = 0x00130000,
++ LOONGI_MASKNEZ = 0x00138000,
++
++ /* FP instructions. */
++ LOONGI_FRINT_S = 0x011e4400,
++ LOONGI_FRINT_D = 0x011e4800,
++ LOONGI_FTINTRM_L_D = 0x011a2800,
++ LOONGI_FTINTRP_L_D = 0x011a6800,
++ LOONGI_FTINTRNE_L_D = 0x011ae800,
++
++ LOONGI_FMOV_S = 0x01149400,
++ LOONGI_FMOV_D = 0x01149800,
++
++ LOONGI_FABS_D = 0x01140800,
++ LOONGI_FNEG_D = 0x01141800,
++
++ LOONGI_FADD_D = 0x01010000,
++ LOONGI_FSUB_D = 0x01030000,
++ LOONGI_FMUL_D = 0x01050000,
++ LOONGI_FDIV_D = 0x01070000,
++ LOONGI_FSQRT_D = 0x01144800,
++
++ LOONGI_FMIN_D = 0x010b0000,
++ LOONGI_FMAX_D = 0x01090000,
++
++ LOONGI_FADD_S = 0x01008000,
++ LOONGI_FSUB_S = 0x01028000,
++
++ LOONGI_FMADD_S = 0x08100000,
++ LOONGI_FMADD_D = 0x08200000,
++ LOONGI_FNMADD_D = 0x08a00000,
++ LOONGI_FMSUB_S = 0x08500000,
++ LOONGI_FMSUB_D = 0x08600000,
++ LOONGI_FNMSUB_D = 0x08e00000,
++
++ LOONGI_FCVT_D_S = 0x01192400,
++ LOONGI_FTINT_W_S = 0x011b0400,
++ LOONGI_FCVT_S_D = 0x01191800,
++ LOONGI_FTINT_W_D = 0x011b0800,
++ LOONGI_FFINT_S_W = 0x011d1000,
++ LOONGI_FFINT_D_W = 0x011d2000,
++ LOONGI_FFINT_S_L = 0x011d1800,
++ LOONGI_FFINT_D_L = 0x011d2800,
++
++ LOONGI_FTINTRZ_W_S = 0x011a8400,
++ LOONGI_FTINTRZ_W_D = 0x011a8800,
++ LOONGI_FTINTRZ_L_S = 0x011aa400,
++ LOONGI_FTINTRZ_L_D = 0x011aa800,
++ LOONGI_FTINTRM_W_S = 0x011a0400,
++ LOONGI_FTINTRM_W_D = 0x011a0800,
++
++ LOONGI_MOVFR2GR_S = 0x0114b400,
++ LOONGI_MOVGR2FR_W = 0x0114a400,
++ LOONGI_MOVGR2FR_D = 0x0114a800,
++ LOONGI_MOVFR2GR_D = 0x0114b800,
++
++ LOONGI_FCMP_CEQ_D = 0x0c220000,
++ LOONGI_FCMP_CLT_S = 0x0c110000,
++ LOONGI_FCMP_CLT_D = 0x0c210000,
++ LOONGI_FCMP_CLE_D = 0x0c230000,
++ LOONGI_FCMP_CULE_D = 0x0c270000,
++ LOONGI_FCMP_CULT_D = 0x0c250000,
++ LOONGI_FCMP_CNE_D = 0x0c280000,
++ LOONGI_FSEL = 0x0d000000,
++} LOONGIns;
++
++#endif
++
+Index: luajit-2.1.0+openresty20240815/src/lj_trace.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_trace.c
++++ luajit-2.1.0+openresty20240815/src/lj_trace.c
+@@ -334,17 +334,17 @@
+ J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
+ J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
+ #endif
+-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
++#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64
+ J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
+ #endif
+ #if LJ_TARGET_PPC
+ J->k32[LJ_K32_2P52_2P31] = 0x59800004;
+ J->k32[LJ_K32_2P52] = 0x59800000;
+ #endif
+-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+ J->k32[LJ_K32_2P31] = 0x4f000000;
+ #endif
+-#if LJ_TARGET_MIPS
++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+ J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
+ #if LJ_64
+ J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
+Index: luajit-2.1.0+openresty20240815/src/lj_vmmath.c
+===================================================================
+--- luajit-2.1.0+openresty20240815.orig/src/lj_vmmath.c
++++ luajit-2.1.0+openresty20240815/src/lj_vmmath.c
+@@ -70,7 +70,7 @@
+ /* -- Helper functions for generated machine code ------------------------- */
+
+ #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \
+- || LJ_TARGET_RISCV64
++ || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
+ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
+ {
+ uint32_t y, ua, ub;
+Index: luajit-2.1.0+openresty20240815/src/vm_loongarch64.dasc
+===================================================================
+--- /dev/null
++++ luajit-2.1.0+openresty20240815/src/vm_loongarch64.dasc
+@@ -0,0 +1,4625 @@
++|// Low-level VM code for LoongArch CPUs.
++|// Bytecode interpreter, fast functions and helper functions.
++|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
++|
++|.arch loongarch64
++|.section code_op, code_sub
++|
++|.actionlist build_actionlist
++|.globals GLOB_
++|.globalnames globnames
++|.externnames extnames
++|
++|//-----------------------------------------------------------------------
++|
++|// Fixed register assignments for the interpreter.
++|// Don't use: r0 = 0, r1 = ra, r2 = tp, r3 = sp, r21 = reserved
++|
++|
++|// The following must be C callee-save (but BASE is often refetched).
++|.define BASE, r23 // Base of current Lua stack frame.
++|.define KBASE, r24 // Constants of current Lua function.
++|.define PC, r25 // Next PC.
++|.define DISPATCH, r26 // Opcode dispatch table.
++|.define LREG, r27 // Register holding lua_State (also in SAVE_L).
++|.define MULTRES, r28 // Size of multi-result: (nresults+1)*8.
++|
++|.define JGL, r22 // On-trace: global_State + 32768.
++|
++|// Constants for type-comparisons, stores and conversions. C callee-save.
++|.define TISNIL, r22
++|.define TISNUM, r29
++|.define TOBIT, f30 // 2^52 + 2^51.
++|
++|// The following temporaries are not saved across C calls, except for RA.
++|.define RA, r30 // Callee-save.
++|.define RB, r8
++|.define RC, r9
++|.define RD, r10
++|.define INS, r11
++|
++|.define TMP0, r12
++|.define TMP1, r13
++|.define TMP2, r14
++|.define TMP3, r15
++|.define TMP4, r17
++|
++|// Loongarch lp64 calling convention.
++|.define CARG1, r4
++|.define CARG2, r5
++|.define CARG3, r6
++|.define CARG4, r7
++|.define CARG5, r8
++|.define CARG6, r9
++|.define CARG7, r10
++|.define CARG8, r11
++|
++|.define CRET1, r4
++|.define CRET2, r5
++|
++|.define FARG1, f0
++|.define FARG2, f1
++|.define FARG3, f2
++|.define FARG4, f3
++|.define FARG5, f4
++|.define FARG6, f5
++|.define FARG7, f6
++|.define FARG8, f7
++|
++|.define FRET1, f0
++|.define FRET2, f1
++|
++|.define FTMP0, f8
++|.define FTMP1, f9
++|.define FTMP2, f10
++|.define FTMP3, f22
++|.define FTMP4, f23
++|
++|.define FCC0, fcc0
++|.define FCC1, fcc1
++|
++|// Stack layout while in interpreter. Must match with lj_frame.h.
++|// LoongArch64 hard-float.
++|
++|.define CFRAME_SPACE, 200 // Delta for sp.
++|
++|//----- 16 byte aligned, <-- sp entering interpreter
++|.define SAVE_ERRF, 196 // 32 bit values.
++|.define SAVE_NRES, 192
++|.define SAVE_CFRAME, 184 // 64 bit values.
++|.define SAVE_L, 176
++|.define SAVE_PC, 168
++|//----- 16 byte aligned
++|.define SAVE_GPR_, 80 // .. 80+11*8: 64 bit GPR saves.
++|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves.
++|
++|
++|.define TMPD, 0
++|//----- 16 byte aligned
++|
++|.define TMPD_OFS, 0
++|
++|//-----------------------------------------------------------------------
++|
++|.macro saveregs
++| addi.d sp, sp, -CFRAME_SPACE
++| st.d ra, SAVE_GPR_+10*8(sp)
++| st.d r22, SAVE_GPR_+9*8(sp)
++| st.d r31, SAVE_GPR_+8*8(sp)
++| fst.d f31, SAVE_FPR_+7*8(sp)
++| st.d r30, SAVE_GPR_+7*8(sp)
++| fst.d f30, SAVE_FPR_+6*8(sp)
++| st.d r29, SAVE_GPR_+6*8(sp)
++| fst.d f29, SAVE_FPR_+5*8(sp)
++| st.d r28, SAVE_GPR_+5*8(sp)
++| fst.d f28, SAVE_FPR_+4*8(sp)
++| st.d r27, SAVE_GPR_+4*8(sp)
++| fst.d f27, SAVE_FPR_+3*8(sp)
++| st.d r26, SAVE_GPR_+3*8(sp)
++| fst.d f26, SAVE_FPR_+2*8(sp)
++| st.d r25, SAVE_GPR_+2*8(sp)
++| fst.d f25, SAVE_FPR_+1*8(sp)
++| st.d r24, SAVE_GPR_+1*8(sp)
++| fst.d f24, SAVE_FPR_+0*8(sp)
++| st.d r23, SAVE_GPR_+0*8(sp)
++|.endmacro
++|
++|.macro restoreregs_ret
++| ld.d ra, SAVE_GPR_+10*8(sp)
++| ld.d r22, SAVE_GPR_+9*8(sp)
++| ld.d r31, SAVE_GPR_+8*8(sp)
++| ld.d r30, SAVE_GPR_+7*8(sp)
++| fld.d f31, SAVE_FPR_+7*8(sp)
++| ld.d r29, SAVE_GPR_+6*8(sp)
++| fld.d f30, SAVE_FPR_+6*8(sp)
++| ld.d r28, SAVE_GPR_+5*8(sp)
++| fld.d f29, SAVE_FPR_+5*8(sp)
++| ld.d r27, SAVE_GPR_+4*8(sp)
++| fld.d f28, SAVE_FPR_+4*8(sp)
++| ld.d r26, SAVE_GPR_+3*8(sp)
++| fld.d f27, SAVE_FPR_+3*8(sp)
++| ld.d r25, SAVE_GPR_+2*8(sp)
++| fld.d f26, SAVE_FPR_+2*8(sp)
++| ld.d r24, SAVE_GPR_+1*8(sp)
++| fld.d f25, SAVE_FPR_+1*8(sp)
++| ld.d r23, SAVE_GPR_+0*8(sp)
++| fld.d f24, SAVE_FPR_+0*8(sp)
++| addi.d sp, sp, CFRAME_SPACE
++| jirl r0, ra, 0
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|.macro .STXW, a, b, c
++| addu16i.d r20, r0, c
++| srai.d r20, r20, 16
++| stx.w a, b, r20
++|.endmacro
++|
++|.macro .STXD, a, b, c
++| addu16i.d r20, r0, c
++| srai.d r20, r20, 16
++| stx.d a, b, r20
++|.endmacro
++|
++|.macro .LDXW, a, b, c
++| addu16i.d r20, r0, c
++| srai.d r20, r20, 16
++| ldx.w a, b, r20
++|.endmacro
++|
++|.macro .LDXD, a, b, c
++| addu16i.d r20, r0, c
++| srai.d r20, r20, 16
++| ldx.d a, b, r20
++|.endmacro
++|
++|.macro .LDXBU, a, b, c
++| addu16i.d r20, r0, c
++| srai.d r20, r20, 16
++| ldx.bu a, b, r20
++|.endmacro
++|
++|.macro .ADD16I, a, b, c
++| addu16i.d r20, r0, c
++| srai.d r20, r20, 16
++| add.d a, b, r20
++|.endmacro
++|
++|// Type definitions. Some of these are only used for documentation.
++|.type L, lua_State, LREG
++|.type GL, global_State
++|.type TVALUE, TValue
++|.type GCOBJ, GCobj
++|.type STR, GCstr
++|.type TAB, GCtab
++|.type LFUNC, GCfuncL
++|.type CFUNC, GCfuncC
++|.type PROTO, GCproto
++|.type UPVAL, GCupval
++|.type NODE, Node
++|.type NARGS8, int
++|.type TRACE, GCtrace
++|.type SBUF, SBuf
++|
++|//-----------------------------------------------------------------------
++|
++|// Trap for not-yet-implemented parts.
++|.macro NYI; break 0; .endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|// Access to frame relative to BASE.
++|.define FRAME_PC, -8
++|.define FRAME_FUNC, -16
++|
++|//-----------------------------------------------------------------------
++|
++|// Endian-specific defines. LoongArch is little endian.
++|.define OFS_RD, 2
++|.define OFS_RA, 1
++|.define OFS_OP, 0
++|
++|// Instruction decode.
++|.macro decode_BC4b, dst; slli.w dst, dst, 2; .endmacro
++|.macro decode_BC8b, dst; slli.w dst, dst, 3; .endmacro
++|.macro decode_OP, dst, ins; andi dst, ins, 0xff; .endmacro
++|.macro decode_RA, dst, ins; bstrpick.d dst, ins, 15, 8; decode_BC8b dst; .endmacro
++|.macro decode_RB, dst, ins; bstrpick.d dst, ins, 31, 24; decode_BC8b dst; .endmacro
++|.macro decode_RC, dst, ins; bstrpick.d dst, ins, 23, 16; decode_BC8b dst; .endmacro
++|.macro decode_RD, dst, ins; bstrpick.d dst, ins, 31, 16; decode_BC8b dst; .endmacro
++|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
++|
++|// Instruction fetch.
++|.macro ins_NEXT1
++| ld.w INS, 0(PC)
++| addi.d PC, PC, 4
++|.endmacro
++|// Instruction decode+dispatch.
++|.macro ins_NEXT2
++| decode_OP TMP1, INS
++| decode_BC8b TMP1
++| add.d TMP0, DISPATCH, TMP1
++| ld.d TMP4, 0(TMP0)
++| decode_RD RD, INS
++| decode_RA RA, INS
++| jirl r0, TMP4, 0
++|.endmacro
++|.macro ins_NEXT
++| ins_NEXT1
++| ins_NEXT2
++|.endmacro
++|
++|// Instruction footer.
++|.if 1
++| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
++| .define ins_next, ins_NEXT
++| .define ins_next_, ins_NEXT
++| .define ins_next1, ins_NEXT1
++| .define ins_next2, ins_NEXT2
++|.else
++| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
++| // Affects only certain kinds of benchmarks (and only with -j off).
++| .macro ins_next
++| b ->ins_next
++| .endmacro
++| .macro ins_next1
++| .endmacro
++| .macro ins_next2
++| b ->ins_next
++| .endmacro
++| .macro ins_next_
++| ->ins_next:
++| ins_NEXT
++| .endmacro
++|.endif
++|
++|// Call decode and dispatch.
++|.macro ins_callt
++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
++| ld.d PC, LFUNC:RB->pc
++| ld.w INS, 0(PC)
++| addi.d PC, PC, 4
++| decode_OP TMP1, INS
++| decode_RA RA, INS
++| decode_BC8b TMP1
++| add.d TMP0, DISPATCH, TMP1
++| ld.d TMP0, 0(TMP0)
++| add.d RA, RA, BASE
++| jirl r0, TMP0, 0
++|.endmacro
++|
++|.macro ins_call
++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
++| st.d PC, FRAME_PC(BASE)
++| ins_callt
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|.macro branch_RD
++| srli.w TMP0, RD, 1
++| addu16i.d TMP4, r0, -0x2 // -BCBIAS_J*4
++| add.w TMP0, TMP0, TMP4 // (jump - 0x8000)<<2
++| add.d PC, PC, TMP0
++|.endmacro
++|
++|// Assumes DISPATCH is relative to GL.
++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
++|
++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
++|
++|.macro hotcheck, delta, target
++| srli.d TMP1, PC, 1
++| andi TMP1, TMP1, 126
++| add.d TMP1, TMP1, DISPATCH
++| ld.hu TMP2, GG_DISP2HOT(TMP1)
++| addi.w TMP2, TMP2, -delta
++| st.h TMP2, GG_DISP2HOT(TMP1)
++| blt TMP2, r0, target
++|.endmacro
++|
++|.macro hotloop
++| hotcheck HOTCOUNT_LOOP, ->vm_hotloop
++|.endmacro
++|
++|.macro hotcall
++| hotcheck HOTCOUNT_CALL, ->vm_hotcall
++|.endmacro
++|
++|// Set current VM state. Uses TMP0.
++|.macro li_vmstate, st; addi.w TMP0, r0, ~LJ_VMST_..st; .endmacro
++|.macro st_vmstate; .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate); .endmacro
++|
++|// Move table write barrier back. Overwrites mark and tmp.
++|.macro barrierback, tab, mark, tmp, target
++| .LDXD tmp, DISPATCH, DISPATCH_GL(gc.grayagain)
++| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab)
++| .STXD tab, DISPATCH, DISPATCH_GL(gc.grayagain)
++| st.b mark, tab->marked
++| st.d tmp, tab->gclist
++| b target
++|.endmacro
++|
++|// Clear type tag. Isolate lowest 47 bits of reg.
++|.macro cleartp, reg; bstrpick.d reg, reg, 46, 0; .endmacro
++|.macro cleartp, dst, reg; bstrpick.d dst, reg, 46, 0; .endmacro
++|
++|// Set type tag: Merge 17 type bits into bits [47, 63] of dst.
++|.macro settp, dst, tp; bstrins.d dst, tp, 63, 47; .endmacro
++|
++|// Extract (negative) type tag.
++|.macro gettp, dst, src; srai.d dst, src, 47; .endmacro
++|
++|// Macros to check the TValue type and extract the GCobj. Branch on failure.
++|.macro checktp, reg, tp, target
++| gettp TMP4, reg
++| addi.d TMP4, TMP4, tp
++| cleartp reg
++| bnez TMP4, target
++|.endmacro
++|.macro checktp, dst, reg, tp, target
++| gettp TMP4, reg
++| addi.d TMP4, TMP4, tp
++| cleartp dst, reg
++| bnez TMP4, target
++|.endmacro
++|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
++|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
++|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
++|.macro checkint, reg, target
++| gettp TMP4, reg
++| bne TMP4, TISNUM, target
++|.endmacro
++|.macro checknum, reg, target
++| gettp TMP4, reg
++| sltui TMP4, TMP4, LJ_TISNUM
++| beqz TMP4, target
++|.endmacro
++|
++|.macro mov_false, reg
++| addi.d reg, r0, 0x0001
++| slli.d reg, reg, 47
++| nor reg, reg, r0
++|.endmacro
++|.macro mov_true, reg
++| addi.d reg, r0, 0x0001
++| slli.d reg, reg, 48
++| nor reg, reg, r0
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++
++/* Generate subroutines used by opcodes and other parts of the VM. */
++/* The .code_sub section should be last to help static branch prediction. */
++static void build_subroutines(BuildCtx *ctx)
++{
++ |.code_sub
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Return handling ----------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_returnp:
++ | // See vm_return. Also: TMP2 = previous base.
++ | andi TMP0, PC, FRAME_P
++ |
++ | // Return from pcall or xpcall fast func.
++ | mov_true TMP1
++ | beqz TMP0, ->cont_dispatch
++ | ld.d PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
++ | or BASE, TMP2, r0 // Restore caller base.
++ | // Prepending may overwrite the pcall frame, so do it at the end.
++ | st.d TMP1, -8(RA) // Prepend true to results.
++ | addi.d RA, RA, -8
++ |
++ |->vm_returnc:
++ | addi.w RD, RD, 8 // RD = (nresults+1)*8.
++ | andi TMP0, PC, FRAME_TYPE
++ | addi.w CRET1, r0, LUA_YIELD
++ | beqz RD, ->vm_unwind_c_eh
++ | or MULTRES, RD, r0
++ | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
++ |
++ |->vm_return:
++ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
++ | // TMP0 = PC & FRAME_TYPE
++ | addi.w TMP2, r0, -8 // TMP2 = 0xfffffff8
++ | xori TMP0, TMP0, FRAME_C
++ | and TMP2, PC, TMP2
++ | sub.d TMP2, BASE, TMP2 // TMP2 = previous base.
++ | bnez TMP0, ->vm_returnp
++ |
++ | addi.w TMP1, RD, -8
++ | st.d TMP2, L->base
++ | li_vmstate C
++ | ld.w TMP2, SAVE_NRES(sp)
++ | addi.d BASE, BASE, -16
++ | st_vmstate
++ | slli.w TMP2, TMP2, 3
++ | beqz TMP1, >2
++ |1:
++ | addi.w TMP1, TMP1, -8
++ | ld.d CRET1, 0(RA)
++ | addi.d RA, RA, 8
++ | st.d CRET1, 0(BASE)
++ | addi.d BASE, BASE, 8
++ | bnez TMP1, <1
++ |
++ |2:
++ | bne TMP2, RD, >6
++ |3:
++ | st.d BASE, L->top // Store new top.
++ |
++ |->vm_leave_cp:
++ | ld.d TMP0, SAVE_CFRAME(sp) // Restore previous C frame.
++ | or CRET1, r0, r0 // Ok return status for vm_pcall.
++ | st.d TMP0, L->cframe
++ |
++ |->vm_leave_unw:
++ | restoreregs_ret
++ |
++ |6:
++ | ld.d TMP1, L->maxstack
++ | slt TMP0, TMP2, RD
++ | // More results wanted. Check stack size and fill up results with nil.
++ | slt TMP1, BASE, TMP1
++ | bnez TMP0, >7
++ | beqz TMP1, >8
++ | st.d TISNIL, 0(BASE)
++ | addi.w RD, RD, 8
++ | addi.d BASE, BASE, 8
++ | b <2
++ |
++ |7: // Less results wanted.
++ | sub.w TMP0, RD, TMP2
++ | sub.d TMP0, BASE, TMP0 // Either keep top or shrink it.
++ | maskeqz TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
++ | masknez BASE, BASE, TMP2
++ | or BASE, BASE, TMP0
++ | b <3
++ |
++ |8: // Corner case: need to grow stack for filling up results.
++ | // This can happen if:
++ | // - A C function grows the stack (a lot).
++ | // - The GC shrinks the stack in between.
++ | // - A return back from a lua_call() with (high) nresults adjustment.
++ |
++ | st.d BASE, L->top // Save current top held in BASE (yes).
++ | or MULTRES, RD, r0
++ | srli.w CARG2, TMP2, 3
++ | or CARG1, L, r0
++ | bl extern lj_state_growstack // (lua_State *L, int n)
++ | ld.w TMP2, SAVE_NRES(sp)
++ | ld.d BASE, L->top // Need the (realloced) L->top in BASE.
++ | or RD, MULTRES, r0
++ | slli.w TMP2, TMP2, 3
++ | b <2
++ |
++ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
++ | // (void *cframe, int errcode)
++ | or sp, CARG1, r0
++ | or CRET1, CARG2, r0
++ |->vm_unwind_c_eh: // Landing pad for external unwinder.
++ | ld.d L, SAVE_L(sp)
++ | addi.w TMP0, r0, ~LJ_VMST_C
++ | ld.d GL:TMP1, L->glref
++ | st.w TMP0, GL:TMP1->vmstate
++ | b ->vm_leave_unw
++ |
++ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
++ | // (void *cframe)
++ | addi.d TMP3, r0, CFRAME_RAWMASK
++ | and sp, CARG1, TMP3
++ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
++ | ld.d L, SAVE_L(sp)
++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | addi.d TISNIL, r0, LJ_TNIL
++ | addi.d TISNUM, r0, LJ_TISNUM
++ | ld.d BASE, L->base
++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table.
++ | movgr2fr.w TOBIT, TMP3
++ | mov_false TMP1
++ | li_vmstate INTERP
++ | ld.d PC, FRAME_PC(BASE) // Fetch PC of previous frame.
++ | fcvt.d.s TOBIT, TOBIT
++ | addi.d RA, BASE, -8 // Results start at BASE-8.
++ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP
++ | st.d TMP1, 0(RA) // Prepend false to error message.
++ | st_vmstate
++ | addi.d RD, r0, 16 // 2 results: false + error message.
++ | b ->vm_returnc
++ |
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Grow stack for calls -----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_growstack_c: // Grow stack for C function.
++ | addi.d CARG2, r0, LUA_MINSTACK
++ | b >2
++ |
++ |->vm_growstack_l: // Grow stack for Lua function.
++ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
++ | add.d RC, BASE, RC
++ | sub.d RA, RA, BASE
++ | st.d BASE, L->base
++ | addi.d PC, PC, 4 // Must point after first instruction.
++ | st.d RC, L->top
++ | srli.w CARG2, RA, 3
++ |2:
++ | // L->base = new base, L->top = top
++ | st.d PC, SAVE_PC(sp)
++ | or CARG1, L, r0
++ | bl extern lj_state_growstack // (lua_State *L, int n)
++ | ld.d BASE, L->base
++ | ld.d RC, L->top
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
++ | sub.d RC, RC, BASE
++ | cleartp LFUNC:RB
++ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
++ | ins_callt // Just retry the call.
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Entry points into the assembler VM ---------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_resume: // Setup C frame and resume thread.
++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
++ | saveregs
++ | or L, CARG1, r0
++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table.
++ | or BASE, CARG2, r0
++ | ld.bu TMP1, L->status
++ | st.d L, SAVE_L(sp)
++ | addi.d PC, r0, FRAME_CP
++ | addi.d TMP0, sp, CFRAME_RESUME
++ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP
++ | st.w r0, SAVE_NRES(sp)
++ | st.w r0, SAVE_ERRF(sp)
++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
++ | st.d r0, SAVE_CFRAME(sp)
++ | st.d TMP0, L->cframe
++ | beqz TMP1, >3
++ |
++ | // Resume after yield (like a return).
++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++ | or RA, BASE, r0
++ | ld.d BASE, L->base
++ | ld.d TMP1, L->top
++ | ld.d PC, FRAME_PC(BASE)
++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | sub.d RD, TMP1, BASE
++ | movgr2fr.w TOBIT, TMP3
++ | st.b r0, L->status
++ | fcvt.d.s TOBIT, TOBIT
++ | li_vmstate INTERP
++ | addi.d RD, RD, 8
++ | st_vmstate
++ | or MULTRES, RD, r0
++ | andi TMP0, PC, FRAME_TYPE
++ | addi.d TISNIL, r0, LJ_TNIL
++ | addi.d TISNUM, r0, LJ_TISNUM
++ | beqz TMP0, ->BC_RET_Z
++ | b ->vm_return
++ |
++ |->vm_pcall: // Setup protected C frame and enter VM.
++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
++ | saveregs
++ | st.w CARG4, SAVE_ERRF(sp)
++ | addi.d PC, r0, FRAME_CP
++ | b >1
++ |
++ |->vm_call: // Setup C frame and enter VM.
++ | // (lua_State *L, TValue *base, int nres1)
++ | saveregs
++ | addi.d PC, r0, FRAME_C
++ |
++ |1: // Entry point for vm_pcall above (PC = ftype).
++ | ld.d TMP1, L:CARG1->cframe
++ | or L, CARG1, r0
++ | st.w CARG3, SAVE_NRES(sp)
++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table.
++ | st.d CARG1, SAVE_L(sp)
++ | or BASE, CARG2, r0
++ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP
++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
++ | st.d TMP1, SAVE_CFRAME(sp)
++ | st.d sp, L->cframe // Add our C frame to cframe chain.
++ |
++ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++ | ld.d TMP2, L->base // TMP2 = old base (used in vmeta_call).
++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | ld.d TMP1, L->top
++ | movgr2fr.w TOBIT, TMP3
++ | add.d PC, PC, BASE
++ | sub.d NARGS8:RC, TMP1, BASE
++ | addi.d TISNUM, r0, LJ_TISNUM
++ | sub.d PC, PC, TMP2 // PC = frame delta + frame type
++ | fcvt.d.s TOBIT, TOBIT
++ | li_vmstate INTERP
++ | addi.d TISNIL, r0, LJ_TNIL
++ | st_vmstate
++ |
++ |->vm_call_dispatch:
++ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
++ | checkfunc LFUNC:RB, ->vmeta_call
++ |
++ |->vm_call_dispatch_f:
++ | ins_call
++ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
++ |
++ |->vm_cpcall: // Setup protected C frame, call C.
++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
++ | saveregs
++ | or L, CARG1, r0
++ | ld.d TMP0, L:CARG1->stack
++ | st.d CARG1, SAVE_L(sp)
++ | ld.d TMP1, L->top
++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table.
++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
++ | sub.d TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
++ | ld.d TMP1, L->cframe
++ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP
++ | st.w TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame.
++ | st.w r0, SAVE_ERRF(sp) // No error function.
++ | st.d TMP1, SAVE_CFRAME(sp)
++ | st.d sp, L->cframe // Add our C frame to cframe chain.
++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++ | jirl r1, CARG4, 0 // (lua_State *L, lua_CFunction func, void *ud)
++ | or BASE, CRET1, r0
++ | addi.d PC, r0, FRAME_CP
++ | bnez CRET1, <3 // Else continue with the call.
++ | b ->vm_leave_cp // No base? Just remove C frame.
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Metamethod handling ------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |//-- Continuation dispatch ----------------------------------------------
++ |
++ |->cont_dispatch:
++ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
++ | ld.d TMP0, -32(BASE) // Continuation.
++ | or RB, BASE, r0
++ | or BASE, TMP2, r0 // Restore caller BASE.
++ | ld.d LFUNC:TMP1, FRAME_FUNC(TMP2)
++ |.if FFI
++ | sltui TMP3, TMP0, 2
++ |.endif
++ | ld.d PC, -24(RB) // Restore PC from [cont|PC].
++ | cleartp LFUNC:TMP1
++ | add.d TMP2, RA, RD
++ | ld.d TMP1, LFUNC:TMP1->pc
++ | st.d TISNIL, -8(TMP2) // Ensure one valid arg.
++ |.if FFI
++ | bnez TMP3, >1
++ |.endif
++ | // BASE = base, RA = resultptr, RB = meta base
++ | ld.d KBASE, PC2PROTO(k)(TMP1)
++ | jirl r0, TMP0, 0 // Jump to continuation.
++ |
++ |.if FFI
++ |1:
++ | addi.d TMP1, RB, -32
++ | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
++ | // cont = 0: tailcall from C function.
++ | sub.d RC, TMP1, BASE
++ | b ->vm_call_tail
++ |.endif
++ |
++ |->cont_cat: // RA = resultptr, RB = meta base
++ | ld.w INS, -4(PC)
++ | addi.d CARG2, RB, -32
++ | ld.d TMP0, 0(RA)
++ | decode_RB MULTRES, INS
++ | decode_RA RA, INS
++ | add.d TMP1, BASE, MULTRES
++ | st.d BASE, L->base
++ | sub.d CARG3, CARG2, TMP1
++ | st.d TMP0, 0(CARG2)
++ | bne TMP1, CARG2, ->BC_CAT_Z
++ | add.d RA, BASE, RA
++ | st.d TMP0, 0(RA)
++ | b ->cont_nop
++ |
++ |//-- Table indexing metamethods -----------------------------------------
++ |
++ |->vmeta_tgets1:
++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
++ | addi.d TMP0, r0, LJ_TSTR
++ | settp STR:RC, TMP0
++ | st.d STR:RC, 0(CARG3)
++ | b >1
++ |
++ |->vmeta_tgets:
++ | .ADD16I CARG2, DISPATCH, DISPATCH_GL(tmptv)
++ | addi.d TMP0, r0, LJ_TTAB
++ | addi.d TMP1, r0, LJ_TSTR
++ | settp TAB:RB, TMP0
++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv2)
++ | st.d TAB:RB, 0(CARG2)
++ | settp STR:RC, TMP1
++ | st.d STR:RC, 0(CARG3)
++ | b >1
++ |
++ |->vmeta_tgetb: // TMP0 = index
++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
++ | settp TMP0, TISNUM
++ | st.d TMP0, 0(CARG3)
++ |
++ |->vmeta_tgetv:
++ |1:
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
++ | // Returns TValue * (finished) or NULL (metamethod).
++ | beqz CRET1, >3
++ | ld.d TMP0, 0(CRET1)
++ | st.d TMP0, 0(RA)
++ | ins_next
++ |
++ |3: // Call __index metamethod.
++ | // BASE = base, L->top = new base, stack = cont/func/t/k
++ | addi.d TMP1, BASE, -FRAME_CONT
++ | addi.d NARGS8:RC, r0, 16 // 2 args for func(t, k).
++ | ld.d BASE, L->top
++ | st.d PC, -24(BASE) // [cont|PC]
++ | sub.d PC, BASE, TMP1
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
++ | cleartp LFUNC:RB
++ | b ->vm_call_dispatch_f
++ |
++ |->vmeta_tgetr:
++ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
++ | // Returns cTValue * or NULL.
++ | or TMP1, TISNIL, r0
++ | beqz CRET1, ->BC_TGETR_Z
++ | ld.d TMP1, 0(CRET1)
++ | b ->BC_TGETR_Z
++ |
++ |//-----------------------------------------------------------------------
++ |
++ |->vmeta_tsets1:
++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
++ | addi.d TMP0, r0, LJ_TSTR
++ | settp STR:RC, TMP0
++ | st.d STR:RC, 0(CARG3)
++ | b >1
++ |
++ |->vmeta_tsets:
++ | .ADD16I CARG2, DISPATCH, DISPATCH_GL(tmptv)
++ | addi.d TMP0, r0, LJ_TTAB
++ | addi.d TMP1, r0, LJ_TSTR
++ | settp TAB:RB, TMP0
++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv2)
++ | st.d TAB:RB, 0(CARG2)
++ | settp STR:RC, TMP1
++ | st.d STR:RC, 0(CARG3)
++ | b >1
++ |
++ |->vmeta_tsetb: // TMP0 = index
++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
++ | settp TMP0, TISNUM
++ | st.d TMP0, 0(CARG3)
++ |
++ |->vmeta_tsetv:
++ |1:
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
++ | // Returns TValue * (finished) or NULL (metamethod).
++ | ld.d TMP2, 0(RA)
++ | beqz CRET1, >3
++ | // NOBARRIER: lj_meta_tset ensures the table is not black.
++ | st.d TMP2, 0(CRET1)
++ | ins_next
++ |
++ |3: // Call __newindex metamethod.
++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
++ | addi.d TMP1, BASE, -FRAME_CONT
++ | ld.d BASE, L->top
++ | st.d PC, -24(BASE) // [cont|PC]
++ | sub.d PC, BASE, TMP1
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
++ | addi.d NARGS8:RC, r0, 24 // 3 args for func(t, k, v)
++ | cleartp LFUNC:RB
++ | st.d TMP2, 16(BASE) // Copy value to third argument.
++ | b ->vm_call_dispatch_f
++ |
++ |->vmeta_tsetr:
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
++ | // Returns TValue *.
++ | b ->BC_TSETR_Z
++ |
++ |//-- Comparison metamethods ---------------------------------------------
++ |
++ |->vmeta_comp:
++ | // RA/RD point to o1/o2.
++ | or CARG2, RA, r0
++ | or CARG3, RD, r0
++ | addi.d PC, PC, -4
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | decode_OP CARG4, INS
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
++ | // Returns 0/1 or TValue * (metamethod).
++ |3:
++ | sltui TMP1, CRET1, 2
++ | beqz TMP1, ->vmeta_binop
++ | sub.w TMP2, r0, CRET1
++ |4:
++ | ld.hu RD, OFS_RD(PC)
++ | addi.d PC, PC, 4
++ | addu16i.d TMP1, r0, -0x2 // -BCBIAS_J*4
++ | slli.w RD, RD, 2
++ | add.w RD, RD, TMP1
++ | and RD, RD, TMP2
++ | add.d PC, PC, RD
++ |->cont_nop:
++ | ins_next
++ |
++ |->cont_ra: // RA = resultptr
++ | ld.bu TMP1, -4+OFS_RA(PC)
++ | ld.d TMP2, 0(RA)
++ | slli.w TMP1, TMP1, 3
++ | add.d TMP1, BASE, TMP1
++ | st.d TMP2, 0(TMP1)
++ | b ->cont_nop
++ |
++ |->cont_condt: // RA = resultptr
++ | ld.d TMP0, 0(RA)
++ | gettp TMP0, TMP0
++ | sltui TMP1, TMP0, LJ_TISTRUECOND
++ | sub.w TMP2, r0, TMP1 // Branch if result is true.
++ | b <4
++ |
++ |->cont_condf: // RA = resultptr
++ | ld.d TMP0, 0(RA)
++ | gettp TMP0, TMP0
++ | sltui TMP1, TMP0, LJ_TISTRUECOND
++ | addi.w TMP2, TMP1, -1 // Branch if result is false.
++ | b <4
++ |
++ |->vmeta_equal:
++ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
++ | cleartp LFUNC:CARG3, CARG2
++ | cleartp LFUNC:CARG2, CARG1
++ | or CARG4, TMP0, r0
++ | addi.d PC, PC, -4
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
++ | // Returns 0/1 or TValue * (metamethod).
++ | b <3
++ |
++ |->vmeta_equal_cd:
++ |.if FFI
++ | or CARG2, INS, r0
++ | addi.d PC, PC, -4
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
++ | // Returns 0/1 or TValue * (metamethod).
++ | b <3
++ |.endif
++ |
++ |->vmeta_istype:
++ | addi.d PC, PC, -4
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | srli.w CARG2, RA, 3
++ | srli.w CARG3, RD, 3
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
++ | b ->cont_nop
++ |
++ |//-- Arithmetic metamethods ---------------------------------------------
++ |
++ |->vmeta_unm:
++ | or RC, RB, r0
++ |
++ |->vmeta_arith:
++ | st.d BASE, L->base
++ | or CARG2, RA, r0
++ | st.d PC, SAVE_PC(sp)
++ | or CARG3, RB, r0
++ | or CARG4, RC, r0
++ | decode_OP CARG5, INS
++ | or CARG1, L, r0
++ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
++ | // Returns NULL (finished) or TValue * (metamethod).
++ | beqz CRET1, ->cont_nop
++ |
++ | // Call metamethod for binary op.
++ |->vmeta_binop:
++ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
++ | sub.d TMP1, CRET1, BASE
++ | st.d PC, -24(CRET1) // [cont|PC]
++ | or TMP2, BASE, r0
++ | addi.d PC, TMP1, FRAME_CONT
++ | or BASE, CRET1, r0
++ | addi.d NARGS8:RC, r0, 16 // 2 args for func(o1, o2).
++ | b ->vm_call_dispatch
++ |
++ |->vmeta_len:
++ | // CARG2 already set by BC_LEN.
++#if LJ_52
++ | or MULTRES, CARG1, r0
++#endif
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_meta_len // (lua_State *L, TValue *o)
++ | // Returns NULL (retry) or TValue * (metamethod base).
++#if LJ_52
++ | bnez CRET1, ->vmeta_binop // Binop call for compatibility.
++ | or CARG1, MULTRES, r0
++ | b ->BC_LEN_Z
++#else
++ | b ->vmeta_binop // Binop call for compatibility.
++#endif
++ |
++ |//-- Call metamethod ----------------------------------------------------
++ |
++ |->vmeta_call: // Resolve and call __call metamethod.
++ | // TMP2 = old base, BASE = new base, RC = nargs*8
++ | st.d TMP2, L->base // This is the callers base!
++ | addi.d CARG2, BASE, -16
++ | st.d PC, SAVE_PC(sp)
++ | add.d CARG3, BASE, RC
++ | or CARG1, L, r0
++ | or MULTRES, NARGS8:RC, r0
++ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
++ | addi.d NARGS8:RC, MULTRES, 8 // Got one more argument now.
++ | cleartp LFUNC:RB
++ | ins_call
++ |
++ |->vmeta_callt: // Resolve __call for BC_CALLT.
++ | // BASE = old base, RA = new base, RC = nargs*8
++ | st.d BASE, L->base
++ | addi.d CARG2, RA, -16
++ | st.d PC, SAVE_PC(sp)
++ | add.d CARG3, RA, RC
++ | or CARG1, L, r0
++ | or MULTRES, NARGS8:RC, r0
++ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
++ | ld.d RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
++ | ld.d TMP1, FRAME_PC(BASE)
++ | addi.d NARGS8:RC, MULTRES, 8 // Got one more argument now.
++ | cleartp LFUNC:CARG3, RB
++ | b ->BC_CALLT_Z
++ |
++ |//-- Argument coercion for 'for' statement ------------------------------
++ |
++ |->vmeta_for:
++ | st.d BASE, L->base
++ | or CARG2, RA, r0
++ | st.d PC, SAVE_PC(sp)
++ | or MULTRES, INS, r0
++ | or CARG1, L, r0
++ | bl extern lj_meta_for // (lua_State *L, TValue *base)
++ |.if JIT
++ | decode_OP TMP0, MULTRES
++ | addi.d TMP1, r0, BC_JFORI
++ |.endif
++ | decode_RA RA, MULTRES
++ | decode_RD RD, MULTRES
++ |.if JIT
++ | beq TMP0, TMP1, =>BC_JFORI
++ |.endif
++ | b =>BC_FORI
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Fast functions -----------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |.macro .ffunc, name
++ |->ff_ .. name:
++ |.endmacro
++ |
++ |.macro .ffunc_1, name
++ |->ff_ .. name:
++ | ld.d CARG1, 0(BASE)
++ | beqz NARGS8:RC, ->fff_fallback
++ |.endmacro
++ |
++ |.macro .ffunc_2, name
++ |->ff_ .. name:
++ | sltui TMP0, NARGS8:RC, 16
++ | ld.d CARG1, 0(BASE)
++ | ld.d CARG2, 8(BASE)
++ | bnez TMP0, ->fff_fallback
++ |.endmacro
++ |
++ |.macro .ffunc_n, name
++ |->ff_ .. name:
++ | ld.d CARG1, 0(BASE)
++ | fld.d FARG1, 0(BASE)
++ | beqz NARGS8:RC, ->fff_fallback
++ | checknum CARG1, ->fff_fallback
++ |.endmacro
++ |
++ |.macro .ffunc_nn, name
++ |->ff_ .. name:
++ | ld.d CARG1, 0(BASE)
++ | ld.d CARG2, 8(BASE)
++ | sltui TMP0, NARGS8:RC, 16
++ | gettp TMP1, CARG1
++ | bnez TMP0, ->fff_fallback
++ | gettp TMP2, CARG2
++ | sltui TMP1, TMP1, LJ_TISNUM
++ | sltui TMP2, TMP2, LJ_TISNUM
++ | fld.d FARG1, 0(BASE)
++ | and TMP1, TMP1, TMP2
++ | fld.d FARG2, 8(BASE)
++ | beqz TMP1, ->fff_fallback
++ |.endmacro
++ |
++ |// Inlined GC threshold check.
++ |.macro ffgccheck
++ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total)
++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold)
++ | bltu TMP0, TMP1, >1
++ | bl ->fff_gcstep
++ |1:
++ |.endmacro
++ |
++ |//-- Base library: checks -----------------------------------------------
++ |.ffunc_1 assert
++ | gettp TMP1, CARG1
++ |// ld.d PC, FRAME_PC(BASE)
++ | sltui TMP1, TMP1, LJ_TISTRUECOND
++ | addi.d RA, BASE, -16
++ | beqz TMP1, ->fff_fallback
++ | ld.d PC, FRAME_PC(BASE)
++ | addi.w RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
++ | addi.d TMP1, BASE, 8
++ | add.d TMP2, RA, RD
++ | st.d CARG1, 0(RA)
++ | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
++ |1:
++ | ld.d TMP0, 0(TMP1)
++ | st.d TMP0, -16(TMP1)
++ | or TMP3, TMP1, r0
++ | addi.d TMP1, TMP1, 8
++ | bne TMP3, TMP2, <1
++ | b ->fff_res
++ |
++ |.ffunc_1 type
++ | gettp TMP0, CARG1
++ | addi.w TMP1, r0, ~LJ_TISNUM
++ | sltu TMP2, TISNUM, TMP0
++ | nor TMP3, TMP0, r0
++ | masknez TMP1, TMP1, TMP2
++ | maskeqz TMP3, TMP3, TMP2
++ | or TMP3, TMP3, TMP1
++ | slli.d TMP3, TMP3, 3
++ | add.d TMP3, CFUNC:RB, TMP3
++ | ld.d CARG1, CFUNC:TMP3->upvalue
++ | b ->fff_restv
++ |
++ |//-- Base library: getters and setters ---------------------------------
++ |
++ |.ffunc_1 getmetatable
++ | gettp TMP2, CARG1
++ | addi.d TMP0, TMP2, -LJ_TTAB
++ | addi.d TMP1, TMP2, -LJ_TUDATA
++ | maskeqz TMP0, TMP1, TMP0
++ | cleartp TAB:CARG1
++ | bnez TMP0, >6
++ |1: // Field metatable must be at same offset for GCtab and GCudata!
++ | ld.d TAB:RB, TAB:CARG1->metatable
++ |2:
++ | .LDXD STR:RC, DISPATCH, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])
++ | addi.d CARG1, r0, LJ_TNIL
++ | beqz TAB:RB, ->fff_restv
++ | ld.w TMP0, TAB:RB->hmask
++ | ld.w TMP1, STR:RC->sid
++ | ld.d NODE:TMP2, TAB:RB->node
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
++ | slli.d TMP0, TMP1, 5
++ | slli.d TMP1, TMP1, 3
++ | sub.d TMP1, TMP0, TMP1
++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
++ | addi.w CARG4, r0, LJ_TSTR
++ | settp STR:RC, CARG4 // Tagged key to look for.
++ |3: // Rearranged logic, because we expect _not_ to find the key.
++ | ld.d TMP0, NODE:TMP2->key
++ | ld.d CARG1, NODE:TMP2->val
++ | ld.d NODE:TMP2, NODE:TMP2->next
++ | addi.d TMP3, r0, LJ_TTAB
++ | beq RC, TMP0, >5
++ | bnez NODE:TMP2, <3
++ |4:
++ | or CARG1, RB, r0
++ | settp CARG1, TMP3
++ | b ->fff_restv // Not found, keep default result.
++ |5:
++ | bne CARG1, TISNIL, ->fff_restv
++ | b <4 // Ditto for nil value.
++ |
++ |6:
++ | sltui TMP3, TMP2, LJ_TISNUM
++ | maskeqz TMP0, TISNUM, TMP3
++ | masknez TMP3, TMP2, TMP3
++ | or TMP2, TMP0, TMP3
++ | slli.d TMP2, TMP2, 3
++ | sub.d TMP0, DISPATCH, TMP2
++ | .LDXD TAB:RB, TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8
++ | b <2
++ |
++ |.ffunc_2 setmetatable
++ | // Fast path: no mt for table yet and not clearing the mt.
++ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++ | gettp TMP3, CARG2
++ | ld.d TAB:TMP0, TAB:TMP1->metatable
++ | ld.bu TMP2, TAB:TMP1->marked
++ | addi.d TMP3, TMP3, -LJ_TTAB
++ | cleartp TAB:CARG2
++ | or TMP3, TMP3, TAB:TMP0
++ | bnez TMP3, ->fff_fallback
++ | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table)
++ | st.d TAB:CARG2, TAB:TMP1->metatable
++ | beqz TMP3, ->fff_restv
++ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
++ |
++ |.ffunc rawget
++ | ld.d CARG2, 0(BASE)
++ | sltui TMP0, NARGS8:RC, 16
++ | gettp TMP1, CARG2
++ | cleartp CARG2
++ | addi.d TMP1, TMP1, -LJ_TTAB
++ | or TMP0, TMP0, TMP1
++ | addi.d CARG3, BASE, 8
++ | bnez TMP0, ->fff_fallback
++ | or CARG1, L, r0
++ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
++ | // Returns cTValue *.
++ | ld.d CARG1, 0(CRET1)
++ | b ->fff_restv
++ |
++ |//-- Base library: conversions ------------------------------------------
++ |
++ |.ffunc tonumber
++ | // Only handles the number case inline (without a base argument).
++ | ld.d CARG1, 0(BASE)
++ | xori TMP0, NARGS8:RC, 8 // Exactly one number argument.
++ | gettp TMP1, CARG1
++ | sltu TMP1, TISNUM, TMP1
++ | or TMP0, TMP0, TMP1
++ | bnez TMP0, ->fff_fallback // No args or CARG1 is not number
++ | b ->fff_restv
++ |
++ |.ffunc_1 tostring
++ | // Only handles the string or number case inline.
++ | gettp TMP0, CARG1
++ | addi.d TMP1, TMP0, -LJ_TSTR
++ | // A __tostring method in the string base metatable is ignored.
++ | beqz TMP1, ->fff_restv // String key?
++ | // Handle numbers inline, unless a number base metatable is present.
++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])
++ | sltu TMP0, TISNUM, TMP0
++ | st.d BASE, L->base // Add frame since C call can throw.
++ | or TMP0, TMP0, TMP1
++ | bnez TMP0, ->fff_fallback
++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value).
++ | ffgccheck
++ | or CARG1, L, r0
++ | or CARG2, BASE, r0
++ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
++ | // Returns GCstr *.
++ | addi.d TMP1, r0, LJ_TSTR
++ |// ld.d BASE, L->base
++ | settp CARG1, TMP1
++ | b ->fff_restv
++ |
++ |//-- Base library: iterators -------------------------------------------
++ |
++ |.ffunc_1 next
++ | checktp CARG1, -LJ_TTAB, ->fff_fallback
++ | add.d TMP0, BASE, NARGS8:RC
++ | ld.d PC, FRAME_PC(BASE)
++ | st.d TISNIL, 0(TMP0) // Set missing 2nd arg to nil.
++ | addi.d CARG2, BASE, 8
++ | addi.d CARG3, BASE, -16
++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ | // Returns 1=found, 0=end, -1=error.
++ |// addi.d RA, BASE, -16
++ | addi.d RD, r0, (2+1)*8
++ | blt r0, CRET1, ->fff_res // Found key/value.
++ | or TMP1, CRET1, r0
++ | or CARG1, TISNIL, r0
++ | beqz TMP1, ->fff_restv // End of traversal: return nil.
++ | ld.d CFUNC:RB, FRAME_FUNC(BASE)
++ | addi.w RC, r0, 2*8
++ | cleartp CFUNC:RB
++ | b ->fff_fallback // Invalid key.
++ |
++ |.ffunc_1 pairs
++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++ | ld.d PC, FRAME_PC(BASE)
++#if LJ_52
++ | ld.d TAB:TMP2, TAB:TMP1->metatable
++ | ld.d TMP0, CFUNC:RB->upvalue[0]
++ | addi.d RA, BASE, -16
++ | bnez TAB:TMP2, ->fff_fallback
++#else
++ | ld.d TMP0, CFUNC:RB->upvalue[0]
++ | addi.d RA, BASE, -16
++#endif
++ | st.d TISNIL, 0(BASE)
++ | st.d CARG1, -8(BASE)
++ | st.d TMP0, 0(RA)
++ | addi.d RD, r0, (3+1)*8
++ | b ->fff_res
++ |
++ |.ffunc_2 ipairs_aux
++ | checktab CARG1, ->fff_fallback
++ | checkint CARG2, ->fff_fallback
++ | ld.w TMP0, TAB:CARG1->asize
++ | ld.d TMP1, TAB:CARG1->array
++ | ld.d PC, FRAME_PC(BASE)
++ | slli.w TMP2, CARG2, 0
++ | addi.w TMP2, TMP2, 1
++ | sltu TMP3, TMP2, TMP0
++ | addi.d RA, BASE, -16
++ | bstrpick.d TMP0, TMP2, 31, 0
++ | settp TMP0, TISNUM
++ | st.d TMP0, 0(RA)
++ | beqz TMP3, >2 // Not in array part?
++ | slli.d TMP3, TMP2, 3
++ | add.d TMP3, TMP1, TMP3
++ | ld.d TMP1, 0(TMP3)
++ |1:
++ | addi.d RD, r0, (0+1)*8
++ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
++ | st.d TMP1, -8(BASE)
++ | addi.d RD, r0, (2+1)*8
++ | b ->fff_res
++ |2: // Check for empty hash part first. Otherwise call C function.
++ | ld.w TMP0, TAB:CARG1->hmask
++ | addi.d RD, r0, (0+1)*8
++ | beqz TMP0, ->fff_res
++ | or CARG2, TMP2, r0
++ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
++ | // Returns cTValue * or NULL.
++ | addi.d RD, r0, (0+1)*8
++ | beqz CRET1, ->fff_res
++ | ld.d TMP1, 0(CRET1)
++ | b <1
++ |
++ |.ffunc_1 ipairs
++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++ | ld.d PC, FRAME_PC(BASE)
++#if LJ_52
++ | ld.d TAB:TMP2, TAB:TMP1->metatable
++#endif
++ | ld.d CFUNC:TMP0, CFUNC:RB->upvalue[0]
++ | addi.d RA, BASE, -16
++#if LJ_52
++ | bnez TAB:TMP2, ->fff_fallback
++#endif
++ | slli.d TMP1, TISNUM, 47
++ | st.d CARG1, -8(BASE)
++ | st.d TMP1, 0(BASE)
++ | st.d CFUNC:TMP0, 0(RA)
++ | addi.d RD, r0, (3+1)*8
++ | b ->fff_res
++ |
++ |//-- Base library: catch errors ----------------------------------------
++ |
++ |.ffunc pcall
++ | addi.d NARGS8:RC, NARGS8:RC, -8
++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
++ | or TMP2, BASE, r0
++ | blt NARGS8:RC, r0, ->fff_fallback
++ | addi.d BASE, BASE, 16
++ | // Remember active hook before pcall.
++ | srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT
++ | andi TMP3, TMP3, 1
++ | addi.d PC, TMP3, 16+FRAME_PCALL
++ | beqz NARGS8:RC, ->vm_call_dispatch
++ |1:
++ | add.d TMP0, BASE, NARGS8:RC
++ |2:
++ | ld.d TMP1, -16(TMP0)
++ | st.d TMP1, -8(TMP0)
++ | addi.d TMP0, TMP0, -8
++ | bne TMP0, BASE, <2
++ | b ->vm_call_dispatch
++ |
++ |.ffunc xpcall
++ | addi.d NARGS8:TMP0, NARGS8:RC, -16
++ | ld.d CARG1, 0(BASE)
++ | ld.d CARG2, 8(BASE)
++ | .LDXBU TMP1, DISPATCH, DISPATCH_GL(hookmask)
++ | blt NARGS8:TMP0, r0, ->fff_fallback
++ | gettp TMP2, CARG2
++ | addi.d TMP2, TMP2, -LJ_TFUNC
++ | bnez TMP2, ->fff_fallback // Traceback must be a function.
++ | or TMP2, BASE, r0
++ | or NARGS8:RC, NARGS8:TMP0, r0
++ | addi.d BASE, BASE, 24
++ | // Remember active hook before pcall.
++ | srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT
++ | st.d CARG2, 0(TMP2) // Swap function and traceback.
++ | andi TMP3, TMP3, 1
++ | st.d CARG1, 8(TMP2)
++ | addi.d PC, TMP3, 24+FRAME_PCALL
++ | beqz NARGS8:RC, ->vm_call_dispatch
++ | b <1
++ |
++ |//-- Coroutine library --------------------------------------------------
++ |
++ |.macro coroutine_resume_wrap, resume
++ |.if resume
++ |.ffunc_1 coroutine_resume
++ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
++ |.else
++ |.ffunc coroutine_wrap_aux
++ | ld.d L:CARG1, CFUNC:RB->upvalue[0].gcr
++ | cleartp L:CARG1
++ |.endif
++ | ld.bu TMP0, L:CARG1->status
++ | ld.d TMP1, L:CARG1->cframe
++ | ld.d CARG2, L:CARG1->top
++ | ld.d TMP2, L:CARG1->base
++ | addi.w CARG4, TMP0, -LUA_YIELD
++ | add.d CARG3, CARG2, TMP0
++ | addi.d TMP3, CARG2, 8
++ | masknez CARG2, CARG2, CARG4
++ | maskeqz TMP3, TMP3, CARG4
++ | or CARG2, TMP3, CARG2
++ | blt r0, CARG4, ->fff_fallback // st > LUA_YIELD?
++ | xor TMP2, TMP2, CARG3
++ | or CARG4, TMP2, TMP0
++ | bnez TMP1, ->fff_fallback // cframe != 0?
++ | ld.d TMP0, L:CARG1->maxstack
++ | ld.d PC, FRAME_PC(BASE)
++ | beqz CARG4, ->fff_fallback // base == top && st == 0?
++ | add.d TMP2, CARG2, NARGS8:RC
++ | sltu CARG4, TMP0, TMP2
++ | st.d BASE, L->base
++ | st.d PC, SAVE_PC(sp)
++ | bnez CARG4, ->fff_fallback // Stack overflow?
++ |1:
++ |.if resume
++ | addi.d BASE, BASE, 8 // Keep resumed thread in stack for GC.
++ | addi.d NARGS8:RC, NARGS8:RC, -8
++ | addi.d TMP2, TMP2, -8
++ |.endif
++ | st.d TMP2, L:CARG1->top
++ | st.d BASE, L->top
++ | add.d TMP1, BASE, NARGS8:RC
++ | or CARG3, CARG2, r0
++ |2: // Move args to coroutine.
++ | ld.d TMP0, 0(BASE)
++ | sltu TMP3, BASE, TMP1
++ | addi.d BASE, BASE, 8
++ | beqz TMP3, >3
++ | st.d TMP0, 0(CARG3)
++ | addi.d CARG3, CARG3, 8
++ | b <2
++ |3:
++ | or L:RA, L:CARG1, r0
++ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
++ | // Returns thread status.
++ |4:
++ | ld.d TMP2, L:RA->base
++ | sltui TMP1, CRET1, LUA_YIELD+1
++ | ld.d TMP3, L:RA->top
++ | li_vmstate INTERP
++ | ld.d BASE, L->base
++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++ | st_vmstate
++ | sub.d RD, TMP3, TMP2
++ | beqz TMP1, >8
++ | ld.d TMP0, L->maxstack
++ | add.d TMP1, BASE, RD
++ | beqz RD, >6 // No results?
++ | add.d TMP3, TMP2, RD
++ | bltu TMP0, TMP1, >9 // Need to grow stack?
++ | st.d TMP2, L:RA->top // Clear coroutine stack.
++ | or TMP1, BASE, r0
++ |5: // Move results from coroutine.
++ | ld.d TMP0, 0(TMP2)
++ | addi.d TMP2, TMP2, 8
++ | st.d TMP0, 0(TMP1)
++ | addi.d TMP1, TMP1, 8
++ | bltu TMP2, TMP3, <5
++ |6:
++ |.if resume
++ | mov_true TMP1
++ | addi.d RD, RD, 16
++ |7:
++ | st.d TMP1, -8(BASE) // Prepend true/false to results.
++ | addi.d RA, BASE, -8
++ |.else
++ | or RA, BASE, r0
++ | addi.d RD, RD, 8
++ |.endif
++ | andi TMP0, PC, FRAME_TYPE
++ | st.d PC, SAVE_PC(sp)
++ | or MULTRES, RD, r0
++ | beqz TMP0, ->BC_RET_Z
++ | b ->vm_return
++ |
++ |8: // Coroutine returned with error (at co->top-1).
++ |.if resume
++ | addi.d TMP3, TMP3, -8
++ | mov_false TMP1
++ | addi.w RD, r0, (2+1)*8
++ | ld.d TMP0, 0(TMP3)
++ | st.d TMP3, L:RA->top // Remove error from coroutine stack.
++ | st.d TMP0, 0(BASE) // Copy error message.
++ | b <7
++ |.else
++ | or CARG1, L, r0
++ | or CARG2, L:RA, r0
++ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
++ |.endif
++ |
++ |9: // Handle stack expansion on return from yield.
++ | or CARG1, L, r0
++ | srli.w CARG2, RD, 3
++ | bl extern lj_state_growstack // (lua_State *L, int n)
++ | addi.d CRET1, r0, 0
++ | b <4
++ |.endmacro
++ |
++ | coroutine_resume_wrap 1 // coroutine.resume
++ | coroutine_resume_wrap 0 // coroutine.wrap
++ |
++ |.ffunc coroutine_yield
++ | ld.d TMP0, L->cframe
++ | add.d TMP1, BASE, NARGS8:RC
++ | addi.w CRET1, r0, LUA_YIELD
++ | st.d BASE, L->base
++ | andi TMP0, TMP0, CFRAME_RESUME
++ | st.d TMP1, L->top
++ | beqz TMP0, ->fff_fallback
++ | st.d r0, L->cframe
++ | st.b CRET1, L->status
++ | b ->vm_leave_unw
++ |
++ |//-- Math library -------------------------------------------------------
++ |
++ |.macro math_round, func
++ |->ff_math_ .. func:
++ | ld.d CARG1, 0(BASE)
++ | gettp TMP0, CARG1
++ | beqz NARGS8:RC, ->fff_fallback
++ | beq TMP0, TISNUM, ->fff_restv
++ | fld.d FARG1, 0(BASE)
++ | bgeu TMP0, TISNUM, ->fff_fallback
++ | bl ->vm_ .. func
++ | b ->fff_resn
++ |.endmacro
++ |
++ | math_round floor
++ | math_round ceil
++ |
++ |.ffunc_1 math_abs
++ | gettp CARG2, CARG1
++ | addi.d TMP2, CARG2, -LJ_TISNUM
++ | slli.w TMP1, CARG1, 0
++ | bnez TMP2, >1
++ | srai.w TMP0, TMP1, 31 // Extract sign. int
++ | xor TMP1, TMP1, TMP0
++ | sub.d CARG1, TMP1, TMP0
++ | slli.d TMP3, CARG1, 32
++ | settp CARG1, TISNUM
++ | bge TMP3, r0, ->fff_restv
++ | ori CARG1, r0, 0x41e // 2^31 as a double.
++ | slli.w CARG1, CARG1, 4 // 0x41e0
++ | slli.d CARG1, CARG1, 48
++ | b ->fff_restv
++ |1:
++ | sltui TMP2, CARG2, LJ_TISNUM
++ | bstrpick.d CARG1, CARG1, 62, 0
++ | beqz TMP2, ->fff_fallback // int
++ |// fallthrough
++ |
++ |->fff_restv:
++ | // CARG1 = TValue result.
++ | ld.d PC, FRAME_PC(BASE)
++ | st.d CARG1, -16(BASE)
++ |->fff_res1:
++ | // RA = results, PC = return.
++ | addi.d RD, r0, (1+1)*8
++ |->fff_res:
++ | // RA = results, RD = (nresults+1)*8, PC = return.
++ | andi TMP0, PC, FRAME_TYPE
++ | or MULTRES, RD, r0
++ | addi.d RA, BASE, -16
++ | bnez TMP0, ->vm_return
++ | ld.w INS, -4(PC)
++ | decode_RB RB, INS
++ |5:
++ | sltu TMP2, RD, RB
++ | decode_RA TMP0, INS
++ | bnez TMP2, >6 // More results expected?
++ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
++ | sub.d BASE, RA, TMP0
++ | ins_next
++ |
++ |6: // Fill up results with nil.
++ | add.d TMP1, RA, RD
++ | addi.d RD, RD, 8
++ | st.d TISNIL, -8(TMP1)
++ | b <5
++ |
++ |.macro math_extern, func
++ | .ffunc_n math_ .. func
++ | bl extern func
++ | b ->fff_resn
++ |.endmacro
++ |
++ |.macro math_extern2, func
++ | .ffunc_nn math_ .. func
++ | bl extern func
++ | b ->fff_resn
++ |.endmacro
++ |
++ |.ffunc_n math_sqrt
++ | fsqrt.d FRET1, FARG1
++ |->fff_resn:
++ | ld.d PC, FRAME_PC(BASE)
++ | fst.d FRET1, -16(BASE)
++ | b ->fff_res1
++ |
++ |.ffunc math_log
++ | addi.d TMP1, r0, 8
++ | ld.d CARG1, 0(BASE)
++ | fld.d FARG1, 0(BASE)
++ | bne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument.
++ | checknum CARG1, ->fff_fallback
++ | bl extern log
++ | b ->fff_resn
++ |
++ | math_extern log10
++ | math_extern exp
++ | math_extern sin
++ | math_extern cos
++ | math_extern tan
++ | math_extern asin
++ | math_extern acos
++ | math_extern atan
++ | math_extern sinh
++ | math_extern cosh
++ | math_extern tanh
++ | math_extern2 pow
++ | math_extern2 atan2
++ | math_extern2 fmod
++ |
++ |.ffunc_2 math_ldexp
++ | checknum CARG1, ->fff_fallback
++ | checkint CARG2, ->fff_fallback
++ | fld.d FARG1, 0(BASE)
++ | ld.w CARG1, 8(BASE)
++ | bl extern ldexp // (double x, int exp)
++ | b ->fff_resn
++ |
++ |.ffunc_n math_frexp
++ | ld.d PC, FRAME_PC(BASE)
++ | .ADD16I CARG1, DISPATCH, DISPATCH_GL(tmptv)
++ | bl extern frexp
++ | .LDXW TMP1, DISPATCH, DISPATCH_GL(tmptv)
++ | movgr2fr.w FARG2, TMP1
++ | fst.d FRET1, -16(BASE)
++ | ffint.d.w FARG2, FARG2
++ | fst.d FARG2, -8(BASE)
++ | addi.d RD, r0, (2+1)*8
++ | b ->fff_res
++ |
++ |.ffunc_n math_modf
++ | addi.d CARG1, BASE, -16
++ | ld.d PC, FRAME_PC(BASE)
++ | bl extern modf
++ | fst.d FRET1, -8(BASE)
++ | addi.d RD, r0, (2+1)*8
++ | b ->fff_res
++ |
++ |.macro math_minmax, name, intins, intinsc, fpins
++ | .ffunc_1 name
++ | add.d TMP3, BASE, NARGS8:RC
++ | addi.d TMP2, BASE, 8
++ | checkint CARG1, >4
++ |1: // Handle integers.
++ | ld.d CARG2, 0(TMP2)
++ | beq TMP2, TMP3, ->fff_restv
++ | slli.w CARG1, CARG1, 0
++ | checkint CARG2, >3
++ | slli.w CARG2, CARG2, 0
++ | slt TMP0, CARG1, CARG2
++ | intins TMP1, CARG2, TMP0
++ | intinsc CARG1, CARG1, TMP0
++ | or CARG1, CARG1, TMP1
++ | addi.d TMP2, TMP2, 8
++ | bstrpick.d CARG1, CARG1, 31, 0
++ | settp CARG1, TISNUM
++ | b <1
++ |
++ |3: // Convert intermediate result to number and continue with number loop.
++ | movgr2fr.w FTMP3, CARG1
++ | checknum CARG2, ->fff_fallback
++ | ffint.d.w FTMP3, FTMP3
++ | fld.d FARG1, 0(TMP2)
++ | fmov.d FTMP4, FARG1
++ | b >6
++ |
++ |4:
++ | fld.d FTMP3, 0(BASE)
++ |5: // Handle numbers.
++ | ld.d CARG2, 0(TMP2)
++ | checknum CARG1, ->fff_fallback
++ | fld.d FTMP4, 0(TMP2)
++ | beq TMP2, TMP3, ->fff_resn
++ | checknum CARG2, >7
++ |6:
++ | fpins FRET1, FTMP3, FTMP4
++ | fmov.d FTMP3, FRET1
++ | addi.d TMP2, TMP2, 8
++ | b <5
++ |
++ |7: // Convert integer to number and continue with number loop.
++ | fld.s FARG1, 0(TMP2)
++ | checkint CARG2, ->fff_fallback
++ | ffint.d.w FARG1, FARG1
++ | b <6
++ |.endmacro
++ |
++ | math_minmax math_min, masknez, maskeqz, fmin.d
++ | math_minmax math_max, maskeqz, masknez, fmax.d
++ |
++ |//-- String library -----------------------------------------------------
++ |
++ |.ffunc string_byte // Only handle the 1-arg case here.
++ | ld.d CARG1, 0(BASE)
++ | gettp TMP0, CARG1
++ | xori TMP1, NARGS8:RC, 8
++ | addi.d TMP0, TMP0, -LJ_TSTR
++ | or TMP1, TMP1, TMP0
++ | cleartp STR:CARG1
++ | bnez TMP1, ->fff_fallback // Need exactly 1 string argument.
++ | ld.w TMP0, STR:CARG1->len
++ | ld.d PC, FRAME_PC(BASE)
++ | sltu RD, r0, TMP0
++ | ld.bu TMP2, STR:CARG1[1] // Access is always ok (NUL at end).
++ | addi.w RD, RD, 1
++ | slli.w RD, RD, 3 // RD = ((str->len != 0)+1)*8
++ | settp TMP2, TISNUM
++ | st.d TMP2, -16(BASE)
++ | b ->fff_res
++ |
++ |.ffunc string_char // Only handle the 1-arg case here.
++ | ffgccheck
++ | ld.d CARG1, 0(BASE)
++ | gettp TMP0, CARG1
++ | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument.
++ | addi.d TMP0, TMP0, -LJ_TISNUM // Integer.
++ | addi.d TMP2, r0, 255
++ | slli.w CARG1, CARG1, 0
++ | or TMP1, TMP1, TMP0
++ | sltu TMP2, TMP2, CARG1 // !(255 < n).
++ | or TMP1, TMP1, TMP2
++ | addi.d CARG3, r0, 1
++ | bnez TMP1, ->fff_fallback
++ | addi.d CARG2, sp, TMPD_OFS
++ | st.b CARG1, TMPD(sp)
++ |->fff_newstr:
++ | st.d BASE, L->base
++ | st.d PC, SAVE_PC(sp)
++ | or CARG1, L, r0
++ | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
++ | // Returns GCstr *.
++ | ld.d BASE, L->base
++ |->fff_resstr:
++ | addi.d TMP1, r0, LJ_TSTR
++ | settp CRET1, TMP1
++ | b ->fff_restv
++ |
++ |.ffunc string_sub
++ | ffgccheck
++ | ld.d CARG1, 0(BASE)
++ | ld.d CARG2, 8(BASE)
++ | ld.d CARG3, 16(BASE)
++ | addi.d TMP0, NARGS8:RC, -16
++ | gettp TMP1, CARG1
++ | blt TMP0, r0, ->fff_fallback
++ | cleartp STR:CARG1, CARG1
++ | addi.w CARG4, r0, -1
++ | beqz TMP0, >1
++ | slli.w CARG4, CARG3, 0
++ | checkint CARG3, ->fff_fallback
++ |1:
++ | checkint CARG2, ->fff_fallback
++ | addi.d TMP0, TMP1, -LJ_TSTR
++ | slli.w CARG3, CARG2, 0
++ | bnez TMP0, ->fff_fallback
++ | ld.w CARG2, STR:CARG1->len
++ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
++ | addi.w TMP0, CARG2, 1
++ | slt TMP3, CARG4, r0
++ | add.w TMP2, CARG4, TMP0
++ | slt TMP1, CARG3, r0
++ | maskeqz TMP2, TMP2, TMP3
++ | masknez CARG4, CARG4, TMP3
++ | or CARG4, TMP2, CARG4 // if (end < 0) end += len+1
++ | add.w TMP2, CARG3, TMP0
++ | maskeqz TMP2, TMP2, TMP1
++ | masknez CARG3, CARG3, TMP1
++ | or CARG3, TMP2, CARG3 // if (start < 0) start += len+1
++ | addi.d TMP3, r0, 1
++ | slt TMP2, CARG4, r0
++ | slt TMP1, r0, CARG3
++ | masknez CARG4, CARG4, TMP2 // if (end < 0) end = 0
++ | maskeqz CARG3, CARG3, TMP1
++ | masknez TMP3, TMP3, TMP1
++ | slt TMP2, CARG2, CARG4
++ | or CARG3, TMP3, CARG3 // if (start < 1) start = 1
++ | masknez CARG4, CARG4, TMP2
++ | maskeqz CARG2, CARG2, TMP2
++ | or CARG4, CARG2, CARG4 // if (end > len) end = len
++ | add.d CARG2, STR:CARG1, CARG3
++ | sub.d CARG3, CARG4, CARG3 // len = end - start
++ | addi.d CARG2, CARG2, sizeof(GCstr)-1
++ | addi.w CARG3, CARG3, 1 // len += 1
++ | bge CARG3, r0, ->fff_newstr
++ |->fff_emptystr: // Return empty string.
++ | addi.d TMP1, r0, LJ_TSTR
++ | .ADD16I STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
++ | settp CARG1, TMP1
++ | b ->fff_restv
++ |
++ |.macro ffstring_op, name
++ | .ffunc string_ .. name
++ | ffgccheck
++ | ld.d CARG2, 0(BASE)
++ | beqz NARGS8:RC, ->fff_fallback
++ | checkstr STR:CARG2, ->fff_fallback
++ | .ADD16I SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
++ | ld.d TMP0, SBUF:CARG1->b
++ | st.d L, SBUF:CARG1->L
++ | st.d BASE, L->base
++ | st.d TMP0, SBUF:CARG1->w
++ | st.d PC, SAVE_PC(sp)
++ | bl extern lj_buf_putstr_ .. name
++ |// or SBUF:CARG1, SBUF:CRET1, r0
++ | bl extern lj_buf_tostr
++ | ld.d BASE, L->base
++ | b ->fff_resstr
++ |.endmacro
++ |
++ |ffstring_op reverse
++ |ffstring_op lower
++ |ffstring_op upper
++ |
++ |//-- Bit library --------------------------------------------------------
++ |
++ |->vm_tobit_fb:
++ | fld.d FARG1, 0(BASE)
++ | beqz TMP1, ->fff_fallback
++ | fadd.d FARG1, FARG1, TOBIT
++ | movfr2gr.s CRET1, FARG1
++ | bstrpick.d CRET1, CRET1, 31, 0
++ | jirl r0, ra, 0
++ |
++ |.macro .ffunc_bit, name
++ | .ffunc_1 bit_..name
++ | gettp TMP0, CARG1
++ | bstrpick.d CRET1, CARG1, 31, 0
++ | beq TMP0, TISNUM, >1
++ | sltui TMP1, TMP0, LJ_TISNUM
++ | bl ->vm_tobit_fb
++ |1:
++ |.endmacro
++ |
++ |.macro .ffunc_bit_op, name, bins
++ | .ffunc_bit name
++ | addi.d TMP2, BASE, 8
++ | add.d TMP3, BASE, NARGS8:RC
++ |1:
++ | ld.d TMP1, 0(TMP2)
++ | beq TMP2, TMP3, ->fff_resi
++ | gettp TMP0, TMP1
++ | addi.d TMP2, TMP2, 8
++ | bne TMP0, TISNUM, >2
++ | bstrpick.d TMP1, TMP1, 31, 0
++ | bins CRET1, CRET1, TMP1
++ | b <1
++ |2:
++ | fld.d FARG1, -8(TMP2)
++ | sltui TMP0, TMP0, LJ_TISNUM
++ | fadd.d FARG1, FARG1, TOBIT
++ | beqz TMP0, ->fff_fallback
++ | movfr2gr.s TMP1, FARG1
++ | bstrpick.d TMP1, TMP1, 31, 0
++ | bins CRET1, CRET1, TMP1
++ | b <1
++ |.endmacro
++ |
++ |.ffunc_bit_op band, and
++ |.ffunc_bit_op bor, or
++ |.ffunc_bit_op bxor, xor
++ |
++ |.ffunc_bit bswap
++ | srli.d TMP0, CRET1, 8
++ | srli.d TMP1, CRET1, 24
++ | srli.d TMP2,TMP0, 8
++ | andi TMP3, TMP2, 0xff
++ | slli.d TMP3, TMP3, 8
++ | bstrins.d TMP1, CRET1, 31, 24
++ | bstrins.d TMP3, TMP0, 23, 16
++ | or CRET1, TMP1, TMP3
++ | b ->fff_resi
++ |
++ |.ffunc_bit tobit
++ |->fff_resi:
++ | settp CARG1, TISNUM // CARG1 = CRET1
++ | b ->fff_restv
++ |
++ |.ffunc_bit bnot
++ | nor CRET1, CRET1, r0
++ | bstrpick.d CRET1, CRET1, 31, 0
++ | b ->fff_resi
++ |
++ |.macro .ffunc_bit_sh, name, shins, shmod
++ | .ffunc_2 bit_..name
++ | gettp TMP0, CARG1
++ | beq TMP0, TISNUM, >1
++ | sltui TMP1, TMP0, LJ_TISNUM
++ | bl ->vm_tobit_fb
++ |// or CARG1, CRET1, r0 // CARG1 = CRET1
++ |1:
++ | gettp TMP0, CARG2
++ | bstrpick.d CARG2, CARG2, 31, 0
++ | bne TMP0, TISNUM, ->fff_fallback
++ | slli.w CARG1, CARG1, 0
++ |.if shmod == 1
++ | sub.w CARG2, r0, CARG2
++ |.endif
++ | shins CRET1, CARG1, CARG2
++ | bstrpick.d CRET1, CRET1, 31, 0
++ | b ->fff_resi
++ |.endmacro
++ |
++ |.ffunc_bit_sh lshift, sll.w, 0
++ |.ffunc_bit_sh rshift, srl.w, 0
++ |.ffunc_bit_sh arshift, sra.w, 0
++ |.ffunc_bit_sh rol, rotr.w, 1
++ |.ffunc_bit_sh ror, rotr.w, 0
++ |
++ |//-----------------------------------------------------------------------
++ |
++ |->fff_fallback: // Call fast function fallback handler.
++ | // BASE = new base, RB = CFUNC, RC = nargs*8
++ | ld.d PC, FRAME_PC(BASE) // Fallback may overwrite PC.
++ | ld.d CARG3, CFUNC:RB->f
++ | add.d TMP1, BASE, NARGS8:RC
++ | st.d BASE, L->base
++ | addi.d TMP0, TMP1, 8*LUA_MINSTACK
++ | ld.d TMP2, L->maxstack
++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value).
++ | st.d TMP1, L->top
++ | or CARG1, L, r0
++ | bltu TMP2, TMP0, >5 // Need to grow stack.
++ | jirl r1, CARG3, 0 // (lua_State *L)
++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
++ | ld.d BASE, L->base
++ | slli.w RD, CRET1, 3
++ | blt r0, CRET1, ->fff_res // Returned nresults+1?
++ |1: // Returned 0 or -1: retry fast path.
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
++ | ld.d TMP0, L->top
++ | sub.d NARGS8:RC, TMP0, BASE
++ | cleartp LFUNC:RB
++ | bnez CRET1, ->vm_call_tail // Returned -1?
++ | ins_callt // Returned 0: retry fast path.
++ |
++ |// Reconstruct previous base for vmeta_call during tailcall.
++ |->vm_call_tail:
++ | andi TMP0, PC, FRAME_TYPE
++ | addi.d TMP2, r0, ~FRAME_TYPEP // TODO
++ | and TMP1, PC, TMP2
++ | bnez TMP0, >3
++ | ld.bu TMP1, OFS_RA(PC)
++ | slli.w TMP1, TMP1, 3
++ | addi.w TMP1, TMP1, 16
++ |3:
++ | sub.d TMP2, BASE, TMP1
++ | b ->vm_call_dispatch // Resolve again for tailcall.
++ |
++ |5: // Grow stack for fallback handler.
++ | addi.d CARG2, r0, LUA_MINSTACK
++ | or CARG1, L, r0
++ | bl extern lj_state_growstack // (lua_State *L, int n)
++ | ld.d BASE, L->base
++ | addi.d CRET1, r0, 0 // Set zero-flag to force retry.
++ | b <1
++ |
++ |->fff_gcstep: // Call GC step function.
++ | // BASE = new base, RC = nargs*8
++ | or MULTRES, ra, r0
++ | add.d TMP0, BASE, NARGS8:RC // Calculate L->top.
++ | st.d BASE, L->base
++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value).
++ | or CARG1, L, r0
++ | st.d TMP0, L->top
++ | bl extern lj_gc_step // (lua_State *L)
++ | ld.d BASE, L->base
++ |// or ra, MULTRES, r0
++ | ld.d TMP0, L->top
++ | ld.d CFUNC:RB, FRAME_FUNC(BASE)
++ | cleartp CFUNC:RB
++ | sub.d NARGS8:RC, TMP0, BASE
++ | jirl r0, MULTRES, 0
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Special dispatch targets -------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_record: // Dispatch target for recording phase.
++ |.if JIT
++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
++ | andi TMP1, TMP3, HOOK_VMEVENT // No recording while in vmevent.
++ | bnez TMP1, >5
++ | // Decrement the hookcount for consistency, but always do the call.
++ | .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
++ | andi TMP1, TMP3, HOOK_ACTIVE
++ | bnez TMP1, >1
++ | addi.w TMP2, TMP2, -1
++ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
++ | beqz TMP1, >1
++ | .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
++ | b >1
++ |.endif
++ |
++ |->vm_rethook: // Dispatch target for return hooks.
++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
++ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active?
++ | beqz TMP1, >1
++ |5: // Re-dispatch to static ins.
++ | ld.d TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4.
++ | jirl r0, TMP1, 0
++ |
++ |->vm_inshook: // Dispatch target for instr/line hooks.
++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
++ | .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
++ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active?
++ | bnez TMP1, <5
++ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
++ | addi.w TMP2, TMP2, -1
++ | beqz TMP1, <5
++ | .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
++ | beqz TMP2, >1
++ | andi TMP1, TMP3, LUA_MASKLINE
++ | beqz TMP1, <5
++ |1:
++ | st.w MULTRES, TMPD(sp)
++ | or CARG2, PC, r0
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
++ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
++ |3:
++ | ld.d BASE, L->base
++ |4: // Re-dispatch to static ins.
++ | ld.w INS, -4(PC)
++ | decode_OP TMP1, INS
++ | decode_BC8b TMP1
++ | add.d TMP0, DISPATCH, TMP1
++ | decode_RD RD, INS
++ | ld.d TMP1, GG_DISP2STATIC(TMP0)
++ | decode_RA RA, INS
++ | jirl r0, TMP1, 0
++ |
++ |->cont_hook: // Continue from hook yield.
++ | addi.d PC, PC, 4
++ | ld.w MULTRES, -24(RB) // Restore MULTRES for *M ins.
++ | b <4
++ |
++ |->vm_hotloop: // Hot loop counter underflow.
++ |.if JIT
++ | ld.d LFUNC:TMP1, FRAME_FUNC(BASE)
++ | .ADD16I CARG1, DISPATCH, GG_DISP2J
++ | cleartp LFUNC:TMP1
++ | st.d PC, SAVE_PC(sp)
++ | ld.d TMP1, LFUNC:TMP1->pc
++ | or CARG2, PC, r0
++ | .STXD L, DISPATCH, DISPATCH_J(L)
++ | ld.bu TMP1, PC2PROTO(framesize)(TMP1)
++ | st.d BASE, L->base
++ | slli.d TMP1, TMP1, 3
++ | add.d TMP1, BASE, TMP1
++ | st.d TMP1, L->top
++ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
++ | b <3
++ |.endif
++ |
++ |
++ |->vm_callhook: // Dispatch target for call hooks.
++ | or CARG2, PC, r0
++ |.if JIT
++ | b >1
++ |.endif
++ |
++ |->vm_hotcall: // Hot call counter underflow.
++ |.if JIT
++ | ori CARG2, PC, 1
++ |1:
++ |.endif
++ | add.d TMP0, BASE, RC
++ | st.d PC, SAVE_PC(sp)
++ | st.d BASE, L->base
++ | sub.d RA, RA, BASE
++ | st.d TMP0, L->top
++ | or CARG1, L, r0
++ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
++ | // Returns ASMFunction.
++ | ld.d BASE, L->base
++ | ld.d TMP0, L->top
++ | st.d r0, SAVE_PC(sp) // Invalidate for subsequent line hook.
++ | add.d RA, BASE, RA
++ | sub.d NARGS8:RC, TMP0, BASE
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
++ | cleartp LFUNC:RB
++ | ld.w INS, -4(PC)
++ | jirl r0, CRET1, 0
++ |
++ |->cont_stitch: // Trace stitching.
++ |.if JIT
++ | // RA = resultptr, RB = meta base
++ | ld.w INS, -4(PC)
++ | ld.d TRACE:TMP2, -40(RB) // Save previous trace.
++ | decode_RA RC, INS
++ | addi.d TMP1, MULTRES, -8
++ | cleartp TRACE:TMP2
++ | add.d RC, BASE, RC // Call base.
++ | beqz TMP1, >2
++ |1: // Move results down.
++ | ld.d CARG1, 0(RA)
++ | addi.d TMP1, TMP1, -8
++ | addi.d RA, RA, 8
++ | st.d CARG1, 0(RC)
++ | addi.d RC, RC, 8
++ | bnez TMP1, <1
++ |2:
++ | decode_RA RA, INS
++ | decode_RB RB, INS
++ | add.d RA, RA, RB
++ | add.d RA, BASE, RA
++ |3:
++ | sltu TMP1, RC, RA
++ | bnez TMP1, >9 // More results wanted?
++ |
++ | ld.hu TMP3, TRACE:TMP2->traceno
++ | ld.hu RD, TRACE:TMP2->link
++ | beq RD, TMP3, ->cont_nop // Blacklisted.
++ | slli.w RD, RD, 3
++ | bnez RD, =>BC_JLOOP // Jump to stitched trace.
++ |
++ | // Stitch a new trace to the previous trace.
++ | st.w TMP3, DISPATCH_J(exitno)(DISPATCH)
++ | .STXD L, DISPATCH, DISPATCH_J(L)
++ | st.d BASE, L->base
++ | .ADD16I CARG1, DISPATCH, GG_DISP2J
++ | or CARG2, PC, r0
++ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
++ | ld.d BASE, L->base
++ | b ->cont_nop
++ |
++ |9:
++ | st.d TISNIL, 0(RC)
++ | addi.d RC, RC, 8
++ | b <3
++ |.endif
++ |
++ |->vm_profhook: // Dispatch target for profiler hook.
++#if LJ_HASPROFILE
++ | or CARG1, L, r0
++ | or CARG2, PC, r0
++ | st.d BASE, L->base
++ | st.w MULTRES, TMPD(sp)
++ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
++ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
++ | addi.d PC, PC, -4
++ | ld.d BASE, L->base
++ | b ->cont_nop
++#endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Trace exit handler -------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |.macro savex_, a, b
++ | fst.d f..a, a*8(sp)
++ | fst.d f..b, b*8(sp)
++ | st.d r..a, 32*8+a*8(sp)
++ | st.d r..b, 32*8+b*8(sp)
++ |.endmacro
++ |
++ |->vm_exit_handler:
++ |.if JIT
++ | addi.d sp, sp, -(32*8+32*8)
++ | savex_ 0, 2
++ | savex_ 4, 5
++ | savex_ 6, 7
++ | savex_ 8, 9
++ | savex_ 10, 11
++ | savex_ 12, 13
++ | savex_ 14, 15
++ | savex_ 16, 17
++ | savex_ 18, 19
++ | savex_ 20, 21
++ | savex_ 22, 23
++ | savex_ 24, 25
++ | savex_ 26, 27
++ | savex_ 28, 29
++ | savex_ 30, 31
++ | fst.d f1, 1*8(sp)
++ | fst.d f3, 3*8(sp)
++ | st.d r0, 32*8+1*8(sp) // Clear RID_TMP.
++ | addi.d TMP2, sp, 32*8+32*8 // Recompute original value of sp.
++ | st.d TMP2, 32*8+3*8(sp) // Store sp in RID_SP
++ | li_vmstate EXIT
++ | .ADD16I DISPATCH, JGL, -GG_DISP2G-32768
++ | ld.w TMP1, 0(TMP2) // Load exit number.
++ | st_vmstate
++ | .LDXD L, DISPATCH, DISPATCH_GL(cur_L)
++ | .LDXD BASE, DISPATCH, DISPATCH_GL(jit_base)
++ | .STXD L, DISPATCH, DISPATCH_J(L)
++ | st.w ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
++ | st.d BASE, L->base
++ | st.w TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
++ | .ADD16I CARG1, DISPATCH, GG_DISP2J
++ | .STXD r0, DISPATCH, DISPATCH_GL(jit_base)
++ | or CARG2, sp, r0
++ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
++ | // Returns MULTRES (unscaled) or negated error code.
++ | ld.d TMP1, L->cframe
++ | addi.d TMP2, r0, -4
++ | ld.d BASE, L->base
++ | and sp, TMP1, TMP2
++ | ld.d PC, SAVE_PC(sp) // Get SAVE_PC.
++ | st.d L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield).
++ | b >1
++ |.endif
++ |
++ |->vm_exit_interp:
++ |.if JIT
++ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
++ | ld.d L, SAVE_L(sp)
++ | .ADD16I DISPATCH, JGL, -GG_DISP2G-32768
++ | st.d BASE, L->base
++ |1:
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
++ | sltui TMP0, CRET1, -LUA_ERRERR
++ | beqz TMP0, >9
++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | slli.d MULTRES, CRET1, 3
++ | cleartp LFUNC:RB
++ | st.w MULTRES, TMPD(sp)
++ | addi.d TISNIL, r0, LJ_TNIL
++ | addi.d TISNUM, r0, LJ_TISNUM // Setup type comparison constants.
++ | movgr2fr.w TOBIT, TMP3
++ | ld.d TMP1, LFUNC:RB->pc
++ | .STXD r0, DISPATCH, DISPATCH_GL(jit_base)
++ | ld.d KBASE, PC2PROTO(k)(TMP1)
++ | fcvt.d.s TOBIT, TOBIT
++ | // Modified copy of ins_next which handles function header dispatch, too.
++ | ld.w INS, 0(PC)
++ | addi.d PC, PC, 4
++ | addi.d CRET1, CRET1, 17
++ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
++ | .STXW TISNIL, DISPATCH, DISPATCH_GL(vmstate)
++ | decode_RD RD, INS
++ | beqz CRET1, >5
++ | decode_OP TMP1, INS
++ | decode_BC8b TMP1
++ | add.d TMP0, DISPATCH, TMP1
++ | sltui TMP2, TMP1, BC_FUNCF*8
++ | ld.d TMP3, 0(TMP0)
++ | decode_RA RA, INS
++ | beqz TMP2, >2
++ | jirl r0, TMP3, 0
++ |2:
++ | sltui TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
++ | ld.d TMP1, FRAME_PC(BASE)
++ | bnez TMP2, >3
++ | // Check frame below fast function.
++ | andi TMP0, TMP1, FRAME_TYPE
++ | bnez TMP0, >3 // Trace stitching continuation?
++ | // Otherwise set KBASE for Lua function below fast function.
++ | ld.w TMP2, -4(TMP1)
++ | decode_RA TMP0, TMP2
++ | sub.d TMP1, BASE, TMP0
++ | ld.d LFUNC:TMP2, -32(TMP1)
++ | cleartp LFUNC:TMP2
++ | ld.d TMP1, LFUNC:TMP2->pc
++ | ld.d KBASE, PC2PROTO(k)(TMP1)
++ |3:
++ | addi.d RC, MULTRES, -8
++ | add.d RA, RA, BASE
++ | jirl r0, TMP3, 0
++ |
++ |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
++ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace)
++ | add.d TMP0, TMP0, RD
++ | ld.d TRACE:TMP2, 0(TMP0)
++ | ld.w INS, TRACE:TMP2->startins
++ | decode_OP TMP1, INS
++ | decode_BC8b TMP1
++ | add.d TMP0, DISPATCH, TMP1
++ | decode_RD RD, INS
++ | ld.d TMP4, GG_DISP2STATIC(TMP0)
++ | decode_RA RA, INS
++ | jirl r0, TMP4, 0
++ |
++ |9: // Rethrow error from the right C frame.
++ | sub.w CARG2, r0, CRET1 //TODO LA: sub.w no trap
++ | or CARG1, L, r0
++ | bl extern lj_err_trace // (lua_State *L, int errcode)
++ |.endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Math helper functions ----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |// Hard-float round to integer.
++ |// Modifies TMP0, TMP1, FARG1, FARG5, FTMP1, FTMP3, FTMP4
++ |.macro vm_round_hf, func
++ | addu16i.d TMP0, r0, 0x4330 // Hiword of 2^52 (double).
++ | slli.d TMP0, TMP0, 32
++ | movgr2fr.d FARG5, TMP0
++ | fabs.d FTMP4, FARG1 // |x|
++ | movfr2gr.d TMP1, FARG1
++ | fcmp.clt.d FCC0, FTMP4, FARG5
++ | fadd.d FTMP3, FTMP4, FARG5 // (|x| + 2^52) - 2^52
++ | fsub.d FTMP3, FTMP3, FARG5
++ | bceqz FCC0, >1 // Truncate only if |x| < 2^52.
++ | slt TMP1, TMP1, r0
++ |.if "func" == "ceil"
++ | addu16i.d TMP0, r0, 0xbff0
++ |.else
++ | addu16i.d TMP0, r0, 0x3ff0 // Hiword of +1 (double).
++ |.endif
++ |.if "func" == "trunc"
++ | slli.d TMP0, TMP0, 32
++ | movgr2fr.d FARG5, TMP0
++ | fcmp.clt.d FCC0, FTMP4, FRET1 // |x| < result?
++ | fsub.d FTMP4, FTMP3, FARG5
++ | fsel FTMP1, FTMP3, FTMP4, FCC0
++ | movgr2fr.d FTMP3, TMP1
++ | fneg.d FTMP4, FTMP1
++ | movfr2cf FCC0, FTMP3
++ | fsel FTMP3, FTMP1, FTMP4, FCC0
++ | jirl r0, ra, 0
++ |.else
++ | fneg.d FTMP4, FTMP3
++ | slli.d TMP0, TMP0, 32
++ | movgr2fr.d FARG5, TMP0
++ | movgr2fr.d FTMP1, TMP1
++ | movfr2cf FCC0, FTMP1
++ | fsel FTMP1, FTMP3, FTMP4, FCC0
++ |.if "func" == "ceil"
++ | fcmp.clt.d FCC0, FTMP1, FARG1 // x > result?
++ |.else
++ | fcmp.clt.d FCC0, FARG1, FTMP1 // x < result?
++ |.endif
++ | fsub.d FTMP4, FTMP1, FARG5 // If yes, subtract +-1.
++ | fsel FTMP3, FTMP1, FTMP4, FCC0
++ | fmov.d FARG1, FTMP3
++ | jirl r0, ra, 0
++ |.endif
++ |1:
++ | fmov.d FTMP3, FARG1
++ | jirl r0, ra, 0
++ |.endmacro
++ |
++ |
++ |->vm_floor:
++ | vm_round_hf floor
++ |->vm_ceil:
++ | vm_round_hf ceil
++ |->vm_trunc:
++ |.if JIT
++ | vm_round_hf trunc
++ |.endif
++ |
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Miscellaneous functions --------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |.define NEXT_TAB, TAB:CARG1
++ |.define NEXT_IDX, CARG2
++ |.define NEXT_ASIZE, CARG3
++ |.define NEXT_NIL, CARG4
++ |.define NEXT_TMP0, TMP0
++ |.define NEXT_TMP1, TMP1
++ |.define NEXT_TMP2, TMP2
++ |.define NEXT_RES_VK, CRET1
++ |.define NEXT_RES_IDX, CRET2
++ |.define NEXT_RES_PTR, sp
++ |.define NEXT_RES_VAL, 0(sp)
++ |.define NEXT_RES_KEY, 8(sp)
++ |
++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++ |// Next idx returned in CRET2.
++ |->vm_next:
++ |.if JIT
++ | ld.w NEXT_ASIZE, NEXT_TAB->asize
++ | ld.d NEXT_TMP0, NEXT_TAB->array
++ | addi.d NEXT_NIL, r0, LJ_TNIL
++ |1: // Traverse array part.
++ | sltu TMP3, NEXT_IDX, NEXT_ASIZE
++ | slli.w NEXT_TMP1, NEXT_IDX, 3
++ | add.d NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
++ | beqz TMP3, >5
++ | addi.d TMP3, r0, LJ_TISNUM
++ | ld.d NEXT_TMP2, 0(NEXT_TMP1)
++ | slli.d TMP3, TMP3, 47
++ | or NEXT_TMP1, NEXT_IDX, TMP3
++ | addi.w NEXT_IDX, NEXT_IDX, 1
++ | beq NEXT_TMP2, NEXT_NIL, <1
++ | st.d NEXT_TMP2, NEXT_RES_VAL
++ | st.d NEXT_TMP1, NEXT_RES_KEY
++ | or NEXT_RES_VK, NEXT_RES_PTR, r0
++ | or NEXT_RES_IDX, NEXT_IDX, r0
++ | jirl r0, ra, 0
++ |
++ |5: // Traverse hash part.
++ | sub.w NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
++ | ld.w NEXT_TMP0, NEXT_TAB->hmask
++ | ld.d NODE:NEXT_RES_VK, NEXT_TAB->node
++ | slli.w NEXT_TMP2, NEXT_RES_IDX, 5
++ | slli.w TMP3, NEXT_RES_IDX, 3
++ | sub.w TMP3, NEXT_TMP2, TMP3
++ | add.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3
++ |6:
++ | sltu TMP3, NEXT_TMP0, NEXT_RES_IDX
++ | bnez TMP3, >8
++ | ld.d NEXT_TMP2, NODE:NEXT_RES_VK->val
++ | addi.w NEXT_RES_IDX, NEXT_RES_IDX, 1
++ | bne NEXT_TMP2, NEXT_NIL, >9
++ | // Skip holes in hash part.
++ | addi.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
++ | b <6
++ |
++ |8: // End of iteration. Set the key to nil (not the value).
++ | st.d NEXT_NIL, NEXT_RES_KEY
++ | or NEXT_RES_VK, NEXT_RES_PTR, r0
++ |9:
++ | add.w NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
++ | jirl r0, ra, 0
++ |.endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- FFI helper functions -----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |// Handler for callback functions. Callback slot number in r19, g in r17.
++ |->vm_ffi_callback:
++ |.if FFI
++ |.type CTSTATE, CTState, PC
++ | saveregs
++ | ld.d CTSTATE, GL:r17->ctype_state
++ | .ADD16I DISPATCH, r17, GG_G2DISP
++ | st.w r19, CTSTATE->cb.slot
++ | st.d CARG1, CTSTATE->cb.gpr[0]
++ | fst.d FARG1, CTSTATE->cb.fpr[0]
++ | st.d CARG2, CTSTATE->cb.gpr[1]
++ | fst.d FARG2, CTSTATE->cb.fpr[1]
++ | st.d CARG3, CTSTATE->cb.gpr[2]
++ | fst.d FARG3, CTSTATE->cb.fpr[2]
++ | st.d CARG4, CTSTATE->cb.gpr[3]
++ | fst.d FARG4, CTSTATE->cb.fpr[3]
++ | st.d CARG5, CTSTATE->cb.gpr[4]
++ | fst.d FARG5, CTSTATE->cb.fpr[4]
++ | st.d CARG6, CTSTATE->cb.gpr[5]
++ | fst.d FARG6, CTSTATE->cb.fpr[5]
++ | st.d CARG7, CTSTATE->cb.gpr[6]
++ | fst.d FARG7, CTSTATE->cb.fpr[6]
++ | st.d CARG8, CTSTATE->cb.gpr[7]
++ | fst.d FARG8, CTSTATE->cb.fpr[7]
++ | addi.d TMP0, sp, CFRAME_SPACE
++ | st.d TMP0, CTSTATE->cb.stack
++ | st.d r0, SAVE_PC(sp) // Any value outside of bytecode is ok.
++ | or CARG1, CTSTATE, r0
++ | or CARG2, sp, r0
++ | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
++ | // Returns lua_State *.
++ | ld.d BASE, L:CRET1->base
++ | ld.d RC, L:CRET1->top
++ | or L, CRET1, r0
++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
++ | movgr2fr.w TOBIT, TMP3
++ | addi.d TISNIL, r0, LJ_TNIL
++ | addi.d TISNUM, r0, LJ_TISNUM
++ | li_vmstate INTERP
++ | sub.w RC, RC, BASE
++ | cleartp LFUNC:RB
++ | st_vmstate
++ | fcvt.d.s TOBIT, TOBIT
++ | ins_callt
++ |.endif
++ |
++ |->cont_ffi_callback: // Return from FFI callback.
++ |.if FFI
++ | .LDXD CTSTATE, DISPATCH, DISPATCH_GL(ctype_state)
++ | st.d BASE, L->base
++ | st.d RB, L->top
++ | st.d L, CTSTATE->L
++ | or CARG1, CTSTATE, r0
++ | or CARG2, RA, r0
++ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
++ | fld.d FRET1, CTSTATE->cb.fpr[0]
++ | ld.d CRET1, CTSTATE->cb.gpr[0]
++ | fld.d FRET2, CTSTATE->cb.fpr[1]
++ | ld.d CRET2, CTSTATE->cb.gpr[1]
++ | b ->vm_leave_unw
++ |.endif
++ |
++ |->vm_ffi_call: // Call C function via FFI.
++ | // Caveat: needs special frame unwinding, see below.
++ |.if FFI
++ | .type CCSTATE, CCallState, CARG1
++ | ld.w TMP1, CCSTATE->spadj
++ | ld.bu CARG2, CCSTATE->nsp
++ | ld.bu CARG3, CCSTATE->nfpr
++ | or TMP2, sp, r0
++ | sub.d sp, sp, TMP1
++ | st.d ra, -8(TMP2)
++ | slli.w CARG2, CARG2, 3
++ | st.d r23, -16(TMP2)
++ | st.d CCSTATE, -24(TMP2)
++ | or r23, TMP2, r0
++ | addi.d TMP1, CCSTATE, offsetof(CCallState, stack)
++ | or TMP2, sp, r0
++ | add.d TMP3, TMP1, CARG2
++ | beqz CARG2, >2
++ |1:
++ | ld.d TMP0, 0(TMP1)
++ | addi.d TMP1, TMP1, 8
++ | sltu TMP4, TMP1, TMP3
++ | st.d TMP0, 0(TMP2)
++ | addi.d TMP2, TMP2, 8
++ | bnez TMP4, <1
++ |2:
++ | beqz CARG3, >3
++ | fld.d FARG1, CCSTATE->fpr[0]
++ | fld.d FARG2, CCSTATE->fpr[1]
++ | fld.d FARG3, CCSTATE->fpr[2]
++ | fld.d FARG4, CCSTATE->fpr[3]
++ | fld.d FARG5, CCSTATE->fpr[4]
++ | fld.d FARG6, CCSTATE->fpr[5]
++ | fld.d FARG7, CCSTATE->fpr[6]
++ | fld.d FARG8, CCSTATE->fpr[7]
++ |3:
++ | ld.d TMP3, CCSTATE->func
++ | ld.d CARG2, CCSTATE->gpr[1]
++ | ld.d CARG3, CCSTATE->gpr[2]
++ | ld.d CARG4, CCSTATE->gpr[3]
++ | ld.d CARG5, CCSTATE->gpr[4]
++ | ld.d CARG6, CCSTATE->gpr[5]
++ | ld.d CARG7, CCSTATE->gpr[6]
++ | ld.d CARG8, CCSTATE->gpr[7]
++ | ld.d CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
++ | jirl r1, TMP3, 0
++ | ld.d CCSTATE:TMP1, -24(r23)
++ | ld.d TMP2, -16(r23)
++ | ld.d ra, -8(r23)
++ | st.d CRET1, CCSTATE:TMP1->gpr[0]
++ | st.d CRET2, CCSTATE:TMP1->gpr[1]
++ | fst.d FRET1, CCSTATE:TMP1->fpr[0]
++ | fst.d FRET2, CCSTATE:TMP1->fpr[1]
++ | or sp, r23, r0
++ | or r23, TMP2, r0
++ | jirl r0, ra, 0
++ |.endif
++ |// Note: vm_ffi_call must be the last function in this object file!
++ |
++ |//-----------------------------------------------------------------------
++}
++
++/* Generate the code for a single instruction. */
++static void build_ins(BuildCtx *ctx, BCOp op, int defop)
++{
++ int vk = 0;
++ |=>defop:
++
++ switch (op) {
++
++ /* -- Comparison ops ---------------------------------------------------- */
++
++ /* Remember: all ops branch for a true comparison, fall through otherwise. */
++
++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
++ | // RA = src1*8, RD = src2*8, JMP with RD = target
++ | add.d RA, BASE, RA
++ | add.d RD, BASE, RD
++ if (op == BC_ISLT || op == BC_ISGE) {
++ | ld.d CARG1, 0(RA)
++ | ld.d CARG2, 0(RD)
++ | gettp CARG3, CARG1
++ | gettp CARG4, CARG2
++ } else {
++ | ld.d CARG2, 0(RA)
++ | ld.d CARG1, 0(RD)
++ | gettp CARG3, CARG2
++ | gettp CARG4, CARG1
++ }
++ | ld.hu TMP2, OFS_RD(PC) // TMP2=jump
++ | addi.d PC, PC, 4
++ | bne CARG3, TISNUM, >2
++ | decode_BC4b TMP2
++ | bne CARG4, TISNUM, >5
++ | slli.w CARG1, CARG1, 0
++ | slli.w CARG2, CARG2, 0
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ | slt TMP1, CARG1, CARG2
++ | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
++ if (op == BC_ISLT || op == BC_ISGT) {
++ | maskeqz TMP2, TMP2, TMP1
++ } else {
++ | masknez TMP2, TMP2,TMP1
++ }
++ |1:
++ | add.d PC, PC, TMP2
++ | ins_next
++ |
++ |2: // RA is not an integer.
++ | sltui TMP1, CARG3, LJ_TISNUM
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ | beqz TMP1, ->vmeta_comp
++ | sltui TMP1, CARG4, LJ_TISNUM
++ | decode_BC4b TMP2
++ | beqz TMP1, >4
++ | movgr2fr.d FTMP0, CARG1
++ | movgr2fr.d FTMP2, CARG2
++ |3: // RA and RD are both numbers.
++ if (op == BC_ISLT || op == BC_ISGE) {
++ | fcmp.clt.d FCC0, FTMP0, FTMP2
++ } else {
++ | fcmp.cult.d FCC0, FTMP0, FTMP2
++ }
++ | add.w TMP2, TMP2, TMP3
++ | movcf2gr TMP3, FCC0
++ if (op == BC_ISLT || op == BC_ISGT) {
++ | maskeqz TMP2, TMP2, TMP3
++ } else {
++ | masknez TMP2, TMP2, TMP3
++ }
++ | b <1
++ |
++ |4: // RA is a number, RD is not a number.
++ | // RA is a number, RD is an integer. Convert RD to a number.
++ | bne CARG4, TISNUM, ->vmeta_comp
++ if (op == BC_ISLT || op == BC_ISGE) {
++ | movgr2fr.w FTMP2, CARG2
++ | movgr2fr.d FTMP0, CARG1
++ | ffint.d.w FTMP2, FTMP2
++ } else {
++ | movgr2fr.w FTMP0, CARG1
++ | movgr2fr.d FTMP2, CARG2
++ | ffint.d.w FTMP0, FTMP0
++ }
++ | b <3
++ |
++ |5: // RA is an integer, RD is not an integer
++ | sltui TMP1, CARG4, LJ_TISNUM
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ | beqz TMP1, ->vmeta_comp
++ | // RA is an integer, RD is a number. Convert RA to a number.
++ if (op == BC_ISLT || op == BC_ISGE) {
++ | movgr2fr.w FTMP0, CARG1
++ | movgr2fr.d FTMP2, CARG2
++ | ffint.d.w FTMP0, FTMP0
++ } else {
++ | movgr2fr.w FTMP2, CARG2
++ | movgr2fr.d FTMP0, CARG1
++ | ffint.d.w FTMP2, FTMP2
++ }
++ | b <3
++ break;
++
++ case BC_ISEQV: case BC_ISNEV:
++ vk = op == BC_ISEQV;
++ | // RA = src1*8, RD = src2*8, JMP with RD = target
++ | add.d RA, BASE, RA
++ | add.d RD, BASE, RD
++ | addi.d PC, PC, 4
++ | ld.d CARG1, 0(RA)
++ | ld.d CARG2, 0(RD)
++ | ld.hu TMP2, -4+OFS_RD(PC)
++ | gettp CARG3, CARG1
++ | gettp CARG4, CARG2
++ | sltu TMP0, TISNUM, CARG3
++ | sltu TMP1, TISNUM, CARG4
++ | or TMP0, TMP0, TMP1
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ if (vk) {
++ | beqz TMP0, ->BC_ISEQN_Z
++ } else {
++ | beqz TMP0, ->BC_ISNEN_Z
++ }
++ |// Either or both types are not numbers.
++ |.if FFI
++ | // Check if RA or RD is a cdata.
++ | addi.w TMP0, r0, LJ_TCDATA
++ | beq CARG3, TMP0, ->vmeta_equal_cd
++ | beq CARG4, TMP0, ->vmeta_equal_cd
++ |.endif
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ | decode_BC4b TMP2
++ | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2
++ | bne CARG1, CARG2, >2
++ | // Tag and value are equal.
++ if (vk) {
++ |->BC_ISEQV_Z:
++ | add.d PC, PC, TMP2
++ }
++ |1:
++ | ins_next
++ |
++ |2: // Check if the tags are the same and it's a table or userdata.
++ | xor TMP3, CARG3, CARG4 // Same type?
++ | sltui TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1
++ | masknez TMP0, TMP0, TMP3 // TMP0=0: not same type, or same type table/userdata
++ | cleartp TAB:TMP1, CARG1
++ if (vk) {
++ | beqz TMP0, <1
++ } else {
++ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
++ }
++ | // Different tables or userdatas. Need to check __eq metamethod.
++ | // Field metatable must be at same offset for GCtab and GCudata!
++ | ld.d TAB:TMP3, TAB:TMP1->metatable
++ if (vk) {
++ | beqz TAB:TMP3, <1 // No metatable?
++ | ld.bu TMP3, TAB:TMP3->nomm
++ | andi TMP3, TMP3, 1<<MM_eq
++ | addi.w TMP0, r0, 0 // ne = 0
++ | bnez TMP3, <1 // Or 'no __eq' flag set?
++ } else {
++ | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
++ | ld.bu TMP3, TAB:TMP3->nomm
++ | andi TMP3, TMP3, 1<<MM_eq
++ | addi.w TMP0, r0, 1 // ne = 1
++ | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
++ }
++ | b ->vmeta_equal // Handle __eq metamethod.
++ break;
++
++ case BC_ISEQS: case BC_ISNES:
++ vk = op == BC_ISEQS;
++ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
++ | add.d RA, BASE, RA
++ | addi.d PC, PC, 4
++ | ld.d CARG1, 0(RA)
++ | sub.d RD, KBASE, RD
++ | ld.hu TMP2, -4+OFS_RD(PC)
++ | ld.d CARG2, -8(RD) // KBASE-8-str_const*8
++ |.if FFI
++ | gettp CARG3, CARG1
++ | addi.w TMP1, r0, LJ_TCDATA
++ |.endif
++ | addi.w TMP0, r0, LJ_TSTR
++ | decode_BC4b TMP2
++ | settp CARG2, TMP0
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ |.if FFI
++ | beq CARG3, TMP1, ->vmeta_equal_cd
++ |.endif
++ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D
++ | add.w TMP2, TMP2, TMP3
++ if (vk) {
++ | masknez TMP2, TMP2, TMP0
++ } else {
++ | maskeqz TMP2, TMP2, TMP0
++ }
++ | add.d PC, PC, TMP2
++ | ins_next
++ break;
++
++ case BC_ISEQN: case BC_ISNEN:
++ vk = op == BC_ISEQN;
++ | // RA = src*8, RD = num_const*8, JMP with RD = target
++ | add.d RA, BASE, RA
++ | add.d RD, KBASE, RD
++ | ld.d CARG1, 0(RA)
++ | ld.d CARG2, 0(RD)
++ | ld.hu TMP2, OFS_RD(PC)
++ | addi.d PC, PC, 4
++ | gettp CARG3, CARG1
++ | gettp CARG4, CARG2
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ if (vk) {
++ |->BC_ISEQN_Z:
++ } else {
++ |->BC_ISNEN_Z:
++ }
++ | decode_BC4b TMP2
++ | bne CARG3, TISNUM, >4
++ | add.w TMP2, TMP2, TMP3
++ | bne CARG4, TISNUM, >6
++ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D
++ |1:
++ if (vk) {
++ | masknez TMP2, TMP2, TMP0
++ | add.d PC, PC, TMP2
++ |2:
++ } else {
++ | maskeqz TMP2, TMP2, TMP0
++ |2:
++ | add.d PC, PC, TMP2
++ }
++ |3:
++ | ins_next
++ |
++ |4: // RA is not an integer.
++ | sltu TMP0, CARG3, TISNUM
++ | add.w TMP2, TMP2, TMP3
++ |.if FFI
++ | beqz TMP0, >7
++ |.else
++ | beqz TMP0, <2
++ |.endif
++ | movgr2fr.d FTMP0, CARG1
++ | movgr2fr.d FTMP2, CARG2
++ | bne CARG4, TISNUM, >5
++ |// RA is a number, RD is an integer.
++ | ffint.d.w FTMP2, FTMP2
++ |
++ |5: // RA and RD are both numbers.
++ | fcmp.cune.d FCC0, FTMP0, FTMP2
++ | movcf2gr TMP0, FCC0
++ | b <1
++ |
++ |6: // RA is an integer, RD is a number.
++ | sltu TMP0, CARG4, TISNUM
++ |.if FFI
++ | beqz TMP0, >8
++ |.else
++ | beqz TMP0, <2
++ |.endif
++ | movgr2fr.w FTMP0, CARG1
++ | movgr2fr.d FTMP2, CARG2
++ | ffint.d.w FTMP0, FTMP0
++ | b <5
++ |
++ |.if FFI
++ |7: // RA not int, not number
++ | addi.w TMP0, r0, LJ_TCDATA
++ | bne CARG3, TMP0, <2
++ | b ->vmeta_equal_cd
++ |
++ |8: // RD not int, not number
++ | addi.w TMP0, r0, LJ_TCDATA
++ | bne CARG4, TMP0, <2
++ | b ->vmeta_equal_cd
++ |.endif
++ break;
++
++ case BC_ISEQP: case BC_ISNEP:
++ vk = op == BC_ISEQP;
++ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
++ | add.d RA, BASE, RA
++ | srli.w TMP0, RD, 3
++ | ld.d TMP1, 0(RA)
++ | nor TMP0, TMP0, r0 // ~TMP0: ~0 ~1 ~2
++ | ld.hu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target
++ | gettp TMP1, TMP1
++ | addi.d PC, PC, 4
++ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D
++ |.if FFI
++ | addi.w TMP3, r0, LJ_TCDATA
++ | beq TMP1, TMP3, ->vmeta_equal_cd
++ |.endif
++ | decode_BC4b TMP2
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
++ if (vk) {
++ | masknez TMP2, TMP2, TMP0
++ } else {
++ | maskeqz TMP2, TMP2, TMP0
++ }
++ | add.d PC, PC, TMP2
++ | ins_next
++ break;
++
++ /* -- Unary test and copy ops ------------------------------------------- */
++
++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
++ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
++ | add.d RD, BASE, RD
++ | ld.hu TMP2, OFS_RD(PC)
++ | ld.d TMP0, 0(RD)
++ | addi.d PC, PC, 4
++ | gettp TMP0, TMP0
++ | add.d RA, BASE, RA
++ | sltui TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false
++ | decode_BC4b TMP2
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ | ld.d CRET1, 0(RD)
++ | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2
++ if (op == BC_IST || op == BC_ISTC) {
++ | beqz TMP0, >1
++ if (op == BC_ISTC) {
++ | st.d CRET1, 0(RA)
++ }
++ } else {
++ | bnez TMP0, >1
++ if (op == BC_ISFC) {
++ | st.d CRET1, 0(RA)
++ }
++ }
++ | add.d PC, PC, TMP2
++ |1:
++ | ins_next
++ break;
++
++ case BC_ISTYPE:
++ | // RA = src*8, RD = -type*8
++ | add.d TMP0, BASE, RA
++ | srli.w TMP1, RD, 3
++ | ld.d TMP0, 0(TMP0)
++ | gettp TMP0, TMP0
++ | add.d TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0
++ | bnez TMP0, ->vmeta_istype
++ | ins_next
++ break;
++ case BC_ISNUM:
++ | // RA = src*8, RD = -(TISNUM-1)*8
++ | add.d TMP0, BASE, RA
++ | ld.d TMP0, 0(TMP0)
++ | checknum TMP0, ->vmeta_istype
++ | ins_next
++ break;
++
++ /* -- Unary ops --------------------------------------------------------- */
++
++ case BC_MOV:
++ | // RA = dst*8, RD = src*8
++ | add.d RD, BASE, RD
++ | add.d RA, BASE, RA
++ | ld.d TMP0, 0(RD)
++ | ins_next1
++ | st.d TMP0, 0(RA)
++ | ins_next2
++ break;
++ case BC_NOT:
++ | // RA = dst*8, RD = src*8
++ | add.d RD, BASE, RD
++ | add.d RA, BASE, RA
++ | ld.d TMP0, 0(RD)
++ | addi.d TMP1, r0, LJ_TTRUE
++ | ins_next1
++ | gettp TMP0, TMP0
++ | sltu TMP0, TMP1, TMP0
++ | addi.w TMP0, TMP0, 1
++ | slli.d TMP0, TMP0, 47
++ | nor TMP0, TMP0, r0
++ | st.d TMP0, 0(RA)
++ | ins_next2
++ break;
++ case BC_UNM:
++ | // RA = dst*8, RD = src*8
++ | add.d RB, BASE, RD
++ | add.d RA, BASE, RA
++ | ld.d TMP0, 0(RB)
++ | addu16i.d TMP1, r0, 0x8000
++ | gettp CARG3, TMP0
++ | bne CARG3, TISNUM, >1
++ | sub.w TMP0, r0, TMP0
++ | beq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
++ | bstrpick.d TMP0, TMP0, 31, 0
++ | settp TMP0, TISNUM
++ | b >2
++ |1:
++ | sltui TMP3, CARG3, LJ_TISNUM
++ | slli.d TMP1, TMP1, 32
++ | beqz TMP3, ->vmeta_unm
++ | xor TMP0, TMP0, TMP1 // sign => ~sign
++ |2:
++ | st.d TMP0, 0(RA)
++ | ins_next
++ break;
++ case BC_LEN:
++ | // RA = dst*8, RD = src*8
++ | add.d CARG2, BASE, RD
++ | ld.d TMP0, 0(CARG2)
++ | add.d RA, BASE, RA
++ | gettp TMP1, TMP0
++ | addi.d TMP2, TMP1, -LJ_TSTR
++ | cleartp STR:CARG1, TMP0
++ | bnez TMP2, >2
++ | ld.w CARG1, STR:CARG1->len
++ |1:
++ | settp CARG1, TISNUM
++ | st.d CARG1, 0(RA)
++ | ins_next
++ |2:
++ | addi.d TMP2, TMP1, -LJ_TTAB
++ | bnez TMP2, ->vmeta_len
++#if LJ_52
++ | ld.d TAB:TMP2, TAB:CARG1->metatable
++ | bnez TAB:TMP2, >9
++ |3:
++#endif
++ |->BC_LEN_Z:
++ | bl extern lj_tab_len // (GCtab *t)
++ | // Returns uint32_t (but less than 2^31).
++ | b <1
++#if LJ_52
++ |9:
++ | ld.bu TMP0, TAB:TMP2->nomm
++ | andi TMP0, TMP0, 1<<MM_len
++ | bnez TMP0, <3 // 'no __len' flag set: done.
++ | b ->vmeta_len
++#endif
++ break;
++
++ /* -- Binary ops -------------------------------------------------------- */
++
++ |.macro fpmod, a, b, c
++ | fdiv.d FARG1, b, c
++ | bl ->vm_floor // floor(b/c)
++ | fmul.d a, FRET1, c
++ | fsub.d a, b, a // b - floor(b/c)*c
++ |.endmacro
++ |
++ |.macro ins_arithpre
++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
++ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
++ ||if (vk == 1) {
++ | // RA = dst*8, RB = num_const*8, RC = src1*8
++ | decode_RB RC, INS
++ | decode_RDtoRC8 RB, RD
++ ||} else {
++ | // RA = dst*8, RB = src1*8, RC = num_const*8
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ ||}
++ ||switch (vk) {
++ ||case 0: // suffix is VN
++ | add.d RB, BASE, RB
++ | add.d RC, KBASE, RC
++ || break;
++ ||case 1: // suffix is NV
++ | add.d RC, BASE, RC
++ | add.d RB, KBASE, RB
++ || break;
++ ||default: // CAT or suffix is VV
++ | add.d RB, BASE, RB
++ | add.d RC, BASE, RC
++ || break;
++ ||}
++ |.endmacro
++ |
++ |.macro ins_arithfp, fpins, itype1, itype2
++ | fld.d FTMP0, 0(RB)
++ | sltu itype1, itype1, TISNUM
++ | sltu itype2, itype2, TISNUM
++ | fld.d FTMP2, 0(RC)
++ | and itype1, itype1, itype2
++ | add.d RA, BASE, RA
++ | beqz itype1, ->vmeta_arith
++ | fpins FRET1, FTMP0, FTMP2
++ | ins_next1
++ | fst.d FRET1, 0(RA)
++ | ins_next2
++ |.endmacro
++ |
++ |.macro ins_arithead, itype1, itype2, tval1, tval2
++ | ld.d tval1, 0(RB)
++ | ld.d tval2, 0(RC)
++ | // Check for two integers.
++ | gettp itype1, tval1
++ | gettp itype2, tval2
++ |.endmacro
++ |
++ |.macro ins_arithdn, intins, fpins
++ | ins_arithpre
++ | ins_arithead TMP0, TMP1, CARG1, CARG2
++ | bne TMP0, TISNUM, >1
++ | bne TMP1, TISNUM, >1
++ | slli.w CARG3, CARG1, 0
++ | slli.w CARG4, CARG2, 0
++ |.if "intins" == "add.w"
++ | intins CRET1, CARG3, CARG4
++ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
++ | xor TMP2, CRET1, CARG4
++ | and TMP1, TMP1, TMP2
++ | add.d RA, BASE, RA
++ | blt TMP1, r0, ->vmeta_arith
++ |.elif "intins" == "sub.w"
++ | intins CRET1, CARG3, CARG4
++ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
++ | xor TMP2, CARG3, CARG4
++ | and TMP1, TMP1, TMP2
++ | add.d RA, BASE, RA
++ | blt TMP1, r0, ->vmeta_arith
++ |.elif "intins" == "mulw.d.w"
++ | mul.w CRET1, CARG3, CARG4
++ | mulh.w TMP2, CARG3, CARG4
++ | srai.w TMP1, CRET1, 31 // 63-32bit not all 0 or 1: overflow.
++ | add.d RA, BASE, RA
++ | bne TMP1, TMP2, ->vmeta_arith
++ |.endif
++ | bstrpick.d CRET1, CRET1, 31, 0
++ | settp CRET1, TISNUM
++ | st.d CRET1, 0(RA)
++ | ins_next
++ |1: // Check for two numbers.
++ | ins_arithfp, fpins, TMP0, TMP1
++ |.endmacro
++ |
++ |.macro ins_arithdiv, fpins
++ | ins_arithpre
++ | ins_arithead TMP0, TMP1, CARG1, CARG2
++ | ins_arithfp, fpins, TMP0, TMP1
++ |.endmacro
++ |
++ |.macro ins_arithmod, fpins
++ | ins_arithpre
++ | ins_arithead TMP0, TMP1, CARG1, CARG2
++ | bne TMP0, TISNUM, >1
++ | bne TMP1, TISNUM, >1
++ | slli.w CARG1, CARG1, 0
++ | slli.w CARG2, CARG2, 0
++ | add.d RA, BASE, RA
++ | beqz CARG2, ->vmeta_arith
++ | bl extern lj_vm_modi
++ | bstrpick.d CRET1, CRET1, 31, 0
++ | settp CRET1, TISNUM
++ | st.d CRET1, 0(RA)
++ | ins_next
++ |1: // Check for two numbers.
++ | ins_arithfp, fpins, TMP0, TMP1
++ |.endmacro
++
++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
++ | ins_arithdn add.w, fadd.d
++ break;
++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
++ | ins_arithdn sub.w, fsub.d
++ break;
++ case BC_MULVN: case BC_MULNV: case BC_MULVV:
++ | ins_arithdn mulw.d.w, fmul.d
++ break;
++ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
++ | ins_arithdiv fdiv.d
++ break;
++ case BC_MODVN: case BC_MODNV: case BC_MODVV:
++ | ins_arithmod fpmod
++ break;
++ case BC_POW:
++ | ins_arithpre
++ | ld.d CARG1, 0(RB)
++ | ld.d CARG2, 0(RC)
++ | gettp TMP0, CARG1
++ | gettp TMP1, CARG2
++ | sltui TMP0, TMP0, LJ_TISNUM
++ | sltui TMP1, TMP1, LJ_TISNUM
++ | and TMP0, TMP0, TMP1
++ | add.d RA, BASE, RA
++ | beqz TMP0, ->vmeta_arith
++ | fld.d FARG1, 0(RB)
++ | fld.d FARG2, 0(RC)
++ | bl extern pow
++ | ins_next1
++ | fst.d FRET1, 0(RA)
++ | ins_next2
++ break;
++
++ case BC_CAT:
++ | // RA = dst*8, RB = src_start*8, RC = src_end*8
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ | sub.d CARG3, RC, RB
++ | st.d BASE, L->base
++ | add.d CARG2, BASE, RC
++ | or MULTRES, RB, r0
++ |->BC_CAT_Z:
++ | srli.w CARG3, CARG3, 3
++ | st.d PC, SAVE_PC(sp)
++ | or CARG1, L, r0
++ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
++ | // Returns NULL (finished) or TValue * (metamethod).
++ | ld.d BASE, L->base
++ | bnez CRET1, ->vmeta_binop
++ | add.d RB, BASE, MULTRES
++ | ld.d TMP0, 0(RB)
++ | add.d RA, BASE, RA
++ | st.d TMP0, 0(RA)
++ | ins_next
++ break;
++
++ /* -- Constant ops ------------------------------------------------------ */
++
++ case BC_KSTR:
++ | // RA = dst*8, RD = str_const*8 (~)
++ | sub.d TMP1, KBASE, RD
++ | addi.w TMP2, r0, LJ_TSTR
++ | ld.d TMP0, -8(TMP1) // KBASE-8-str_const*8
++ | add.d RA, BASE, RA
++ | settp TMP0, TMP2
++ | st.d TMP0, 0(RA)
++ | ins_next
++ break;
++ case BC_KCDATA:
++ |.if FFI
++ | // RA = dst*8, RD = cdata_const*8 (~)
++ | sub.d TMP1, KBASE, RD
++ | ld.d TMP0, -8(TMP1) // KBASE-8-cdata_const*8
++ | addi.w TMP2, r0, LJ_TCDATA
++ | add.d RA, BASE, RA
++ | settp TMP0, TMP2
++ | st.d TMP0, 0(RA)
++ | ins_next
++ |.endif
++ break;
++ case BC_KSHORT:
++ | // RA = dst*8, RD = int16_literal*8
++ | srai.w RD, INS, 16
++ | add.d RA, BASE, RA
++ | bstrpick.d RD, RD, 31, 0
++ | settp RD, TISNUM
++ | st.d RD, 0(RA)
++ | ins_next
++ break;
++ case BC_KNUM:
++ | // RA = dst*8, RD = num_const*8
++ | add.d RD, KBASE, RD
++ | add.d RA, BASE, RA
++ | ld.d TMP0, 0(RD)
++ | st.d TMP0, 0(RA)
++ | ins_next
++ break;
++ case BC_KPRI:
++ | // RA = dst*8, RD = primitive_type*8 (~)
++ | add.d RA, BASE, RA
++ | slli.d TMP0, RD, 44 // 44+3
++ | nor TMP0, TMP0, r0
++ | st.d TMP0, 0(RA)
++ | ins_next
++ break;
++ case BC_KNIL:
++ | // RA = base*8, RD = end*8
++ | add.d RA, BASE, RA
++ | st.d TISNIL, 0(RA)
++ | addi.d RA, RA, 8
++ | add.d RD, BASE, RD
++ |1:
++ | st.d TISNIL, 0(RA)
++ | slt TMP0, RA, RD
++ | addi.d RA, RA, 8
++ | bnez TMP0, <1
++ | ins_next
++ break;
++
++ /* -- Upvalue and function ops ------------------------------------------ */
++
++ case BC_UGET:
++ | // RA = dst*8, RD = uvnum*8
++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
++ | add.d RA, BASE, RA
++ | cleartp LFUNC:TMP0
++ | add.d RD, RD, LFUNC:TMP0
++ | ld.d UPVAL:TMP0, LFUNC:RD->uvptr
++ | ld.d TMP1, UPVAL:TMP0->v
++ | ld.d TMP2, 0(TMP1)
++ | ins_next1
++ | st.d TMP2, 0(RA)
++ | ins_next2
++ break;
++ case BC_USETV:
++ | // RA = uvnum*8, RD = src*8
++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
++ | add.d RD, BASE, RD
++ | cleartp LFUNC:TMP0
++ | add.d RA, RA, LFUNC:TMP0
++ | ld.d UPVAL:TMP0, LFUNC:RA->uvptr
++ | ld.d CRET1, 0(RD)
++ | ld.bu TMP3, UPVAL:TMP0->marked
++ | ld.d CARG2, UPVAL:TMP0->v
++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
++ | ld.bu TMP0, UPVAL:TMP0->closed
++ | gettp TMP2, CRET1
++ | st.d CRET1, 0(CARG2)
++ | or TMP3, TMP3, TMP0
++ | addi.d TMP0, r0, LJ_GC_BLACK|1
++ | addi.d TMP2, TMP2, -(LJ_TNUMX+1)
++ | beq TMP3, TMP0, >2 // Upvalue is closed and black?
++ |1:
++ | ins_next
++ |
++ |2: // Check if new value is collectable.
++ | sltui TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
++ | cleartp GCOBJ:CRET1, CRET1
++ | beqz TMP0, <1 // tvisgcv(v)
++ | ld.bu TMP3, GCOBJ:CRET1->gch.marked
++ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
++ | beqz TMP3, <1
++ | // Crossed a write barrier. Move the barrier forward.
++ | .ADD16I CARG1, DISPATCH, GG_DISP2G
++ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
++ | b <1
++ break;
++ case BC_USETS:
++ | // RA = uvnum*8, RD = str_const*8 (~)
++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
++ | sub.d TMP1, KBASE, RD
++ | cleartp LFUNC:TMP0
++ | add.d RA, RA, LFUNC:TMP0
++ | ld.d UPVAL:TMP0, LFUNC:RA->uvptr
++ | ld.d STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
++ | ld.bu TMP2, UPVAL:TMP0->marked
++ | ld.d CARG2, UPVAL:TMP0->v
++ | ld.bu TMP3, STR:TMP1->marked
++ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv)
++ | ld.bu TMP2, UPVAL:TMP0->closed
++ | addi.d TMP0, r0, LJ_TSTR
++ | settp TMP1, TMP0
++ | st.d TMP1, 0(CARG2)
++ | bnez TMP4, >2
++ |1:
++ | ins_next
++ |
++ |2: // Check if string is white and ensure upvalue is closed.
++ | beqz TMP2, <1
++ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str)
++ | beqz TMP0, <1
++ | // Crossed a write barrier. Move the barrier forward.
++ | .ADD16I CARG1, DISPATCH, GG_DISP2G
++ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
++ | b <1
++ break;
++ case BC_USETN:
++ | // RA = uvnum*8, RD = num_const*8
++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
++ | add.d RD, KBASE, RD
++ | cleartp LFUNC:TMP0
++ | add.d TMP0, RA, LFUNC:TMP0
++ | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr
++ | ld.d TMP1, 0(RD)
++ | ld.d TMP0, UPVAL:TMP0->v
++ | st.d TMP1, 0(TMP0)
++ | ins_next
++ break;
++ case BC_USETP:
++ | // RA = uvnum*8, RD = primitive_type*8 (~)
++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
++ | slli.d TMP2, RD, 44
++ | cleartp LFUNC:TMP0
++ | add.d TMP0, RA, LFUNC:TMP0
++ | nor TMP2, TMP2, r0
++ | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr
++ | ld.d TMP1, UPVAL:TMP0->v
++ | st.d TMP2, 0(TMP1)
++ | ins_next
++ break;
++
++ case BC_UCLO:
++ | // RA = level*8, RD = target
++ | ld.d TMP2, L->openupval
++ | branch_RD // Do this first since RD is not saved.
++ | st.d BASE, L->base
++ | or CARG1, L, r0
++ | beqz TMP2, >1
++ | add.d CARG2, BASE, RA
++ | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
++ | ld.d BASE, L->base
++ |1:
++ | ins_next
++ break;
++
++ case BC_FNEW:
++ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
++ | sub.d TMP1, KBASE, RD
++ | ld.d CARG3, FRAME_FUNC(BASE)
++ | ld.d CARG2, -8(TMP1) // KBASE-8-tab_const*8
++ | st.d BASE, L->base
++ | st.d PC, SAVE_PC(sp)
++ | cleartp CARG3
++ | or CARG1, L, r0
++ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
++ | bl extern lj_func_newL_gc
++ | // Returns GCfuncL *.
++ | addi.d TMP0, r0, LJ_TFUNC
++ | ld.d BASE, L->base
++ | settp CRET1, TMP0
++ | add.d RA, BASE, RA
++ | st.d CRET1, 0(RA)
++ | ins_next
++ break;
++
++ /* -- Table ops --------------------------------------------------------- */
++
++ case BC_TNEW:
++ case BC_TDUP:
++ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
++ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total)
++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold)
++ | st.d BASE, L->base
++ | sltu TMP2, TMP0, TMP1
++ | st.d PC, SAVE_PC(sp)
++ | beqz TMP2, >5
++ |1:
++ if (op == BC_TNEW) {
++ | srli.w CARG2, RD, 3
++ | andi CARG2, CARG2, 0x7ff
++ | ori TMP0, r0, 0x801
++ | addi.w TMP2, CARG2, -0x7ff
++ | srli.w CARG3, RD, 14
++ | masknez TMP0, TMP0, TMP2
++ | maskeqz CARG2, CARG2, TMP2
++ | or CARG2, CARG2, TMP0
++ | // (lua_State *L, int32_t asize, uint32_t hbits)
++ | or CARG1, L, r0
++ | bl extern lj_tab_new
++ | // Returns Table *.
++ } else {
++ | sub.d TMP1, KBASE, RD
++ | or CARG1, L, r0
++ | ld.d CARG2, -8(TMP1) // KBASE-8-str_const*8
++ | bl extern lj_tab_dup // (lua_State *L, Table *kt)
++ | // Returns Table *.
++ }
++ | addi.d TMP0, r0, LJ_TTAB
++ | ld.d BASE, L->base
++ | ins_next1
++ | settp CRET1, TMP0
++ | add.d RA, BASE, RA
++ | st.d CRET1, 0(RA)
++ | ins_next2
++ |5:
++ | or MULTRES, RD, r0
++ | or CARG1, L, r0
++ | bl extern lj_gc_step_fixtop // (lua_State *L)
++ | or RD, MULTRES, r0
++ | b <1
++ break;
++
++ case BC_GGET:
++ | // RA = dst*8, RD = str_const*8 (~)
++ case BC_GSET:
++ | // RA = src*8, RD = str_const*8 (~)
++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
++ | sub.d TMP1, KBASE, RD
++ | ld.d STR:RC, -8(TMP1) // KBASE-8-str_const*8
++ | cleartp LFUNC:TMP0
++ | ld.d TAB:RB, LFUNC:TMP0->env
++ | add.d RA, BASE, RA
++ if (op == BC_GGET) {
++ | b ->BC_TGETS_Z
++ } else {
++ | b ->BC_TSETS_Z
++ }
++ break;
++
++ case BC_TGETV:
++ | // RA = dst*8, RB = table*8, RC = key*8
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add.d CARG2, BASE, RB
++ | add.d CARG3, BASE, RC
++ | ld.d TAB:RB, 0(CARG2)
++ | ld.d TMP2, 0(CARG3)
++ | add.d RA, BASE, RA
++ | checktab TAB:RB, ->vmeta_tgetv
++ | gettp TMP3, TMP2
++ | ld.w TMP0, TAB:RB->asize
++ | bne TMP3, TISNUM, >5 // Integer key?
++ | slli.w TMP2, TMP2, 0
++ | ld.d TMP1, TAB:RB->array
++ | sltu TMP3, TMP2, TMP0 //array part (keys = [0, asize-1])
++ | slli.w TMP2, TMP2, 3
++ | beqz TMP3, ->vmeta_tgetv // Integer key and in array part?
++ | add.d TMP2, TMP1, TMP2
++ | ld.d CRET1, 0(TMP2)
++ | beq CRET1, TISNIL, >2
++ |1:
++ | st.d CRET1, 0(RA)
++ | ins_next
++ |
++ |2: // Check for __index if table value is nil.
++ | ld.d TAB:TMP2, TAB:RB->metatable
++ | beqz TAB:TMP2, <1 // No metatable: done.
++ | ld.bu TMP0, TAB:TMP2->nomm
++ | andi TMP0, TMP0, 1<<MM_index
++ | bnez TMP0, <1 // 'no __index' flag set: done.
++ | b ->vmeta_tgetv
++ |
++ |5:
++ | addi.d TMP0, r0, LJ_TSTR
++ | cleartp RC, TMP2
++ | bne TMP3, TMP0, ->vmeta_tgetv // String key?
++ | b ->BC_TGETS_Z
++ break;
++ case BC_TGETS:
++ | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add.d CARG2, BASE, RB
++ | sub.d CARG3, KBASE, RC
++ | ld.d TAB:RB, 0(CARG2)
++ | add.d RA, BASE, RA
++ | ld.d STR:RC, -8(CARG3) // KBASE-8-str_const*8
++ | checktab TAB:RB, ->vmeta_tgets1
++ |->BC_TGETS_Z:
++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
++ | ld.w TMP0, TAB:RB->hmask
++ | ld.w TMP1, STR:RC->sid
++ | ld.d NODE:TMP2, TAB:RB->node
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
++ | slli.w TMP0, TMP1, 5
++ | slli.w TMP1, TMP1, 3
++ | sub.w TMP1, TMP0, TMP1
++ | addi.d TMP3, r0, LJ_TSTR
++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
++ | settp STR:RC, TMP3 // Tagged key to look for.
++ |1:
++ | ld.d CARG1, NODE:TMP2->key
++ | ld.d CARG2, NODE:TMP2->val
++ | ld.d NODE:TMP1, NODE:TMP2->next
++ | ld.d TAB:TMP3, TAB:RB->metatable
++ | bne CARG1, RC, >4
++ | beq CARG2, TISNIL, >5 // Key found, but nil value?
++ |3:
++ | st.d CARG2, 0(RA)
++ | ins_next
++ |
++ |4: // Follow hash chain.
++ | or NODE:TMP2, NODE:TMP1, r0
++ | bnez NODE:TMP1, <1
++ | // End of hash chain: key not found, nil result.
++ |
++ |5: // Check for __index if table value is nil.
++ | or CARG2, TISNIL, r0
++ | beqz TAB:TMP3, <3 // No metatable: done.
++ | ld.bu TMP0, TAB:TMP3->nomm
++ | andi TMP0, TMP0, 1<<MM_index
++ | bnez TMP0, <3 // 'no __index' flag set: done.
++ | b ->vmeta_tgets
++ break;
++ case BC_TGETB:
++ | // RA = dst*8, RB = table*8, RC = index*8
++ | decode_RB RB, INS
++ | add.d CARG2, BASE, RB
++ | decode_RDtoRC8 RC, RD
++ | ld.d TAB:RB, 0(CARG2)
++ | add.d RA, BASE, RA
++ | srli.w TMP0, RC, 3
++ | checktab TAB:RB, ->vmeta_tgetb
++ | ld.w TMP1, TAB:RB->asize
++ | ld.d TMP2, TAB:RB->array
++ | sltu TMP1, TMP0, TMP1
++ | add.d RC, TMP2, RC
++ | beqz TMP1, ->vmeta_tgetb
++ | ld.d CRET1, 0(RC)
++ | beq CRET1, TISNIL, >5
++ |1:
++ | st.d CRET1, 0(RA)
++ | ins_next
++ |
++ |5: // Check for __index if table value is nil.
++ | ld.d TAB:TMP2, TAB:RB->metatable
++ | beqz TAB:TMP2, <1 // No metatable: done.
++ | ld.bu TMP1, TAB:TMP2->nomm
++ | andi TMP1, TMP1, 1<<MM_index
++ | bnez TMP1, <1 // 'no __index' flag set: done.
++ | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
++ break;
++ case BC_TGETR:
++ | // RA = dst*8, RB = table*8, RC = key*8
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add.d RB, BASE, RB
++ | add.d RC, BASE, RC
++ | ld.d TAB:CARG1, 0(RB)
++ | ld.w CARG2, 0(RC)
++ | add.d RA, BASE, RA
++ | cleartp TAB:CARG1
++ | ld.w TMP0, TAB:CARG1->asize
++ | ld.d TMP1, TAB:CARG1->array
++ | sltu TMP0, CARG2, TMP0
++ | slli.w TMP2, CARG2, 3
++ | add.d TMP3, TMP1, TMP2
++ | beqz TMP0, ->vmeta_tgetr // In array part?
++ | ld.d TMP1, 0(TMP3)
++ |->BC_TGETR_Z:
++ | ins_next1
++ | st.d TMP1, 0(RA)
++ | ins_next2
++ break;
++
++ case BC_TSETV:
++ | // RA = src*8, RB = table*8, RC = key*8
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add.d CARG2, BASE, RB
++ | add.d CARG3, BASE, RC
++ | ld.d TAB:RB, 0(CARG2)
++ | ld.d TMP2, 0(CARG3)
++ | add.d RA, BASE, RA
++ | checktab TAB:RB, ->vmeta_tsetv
++ | slli.w RC, TMP2, 0
++ | checkint TMP2, >5
++ | ld.w TMP0, TAB:RB->asize
++ | ld.d TMP1, TAB:RB->array
++ | sltu TMP0, RC, TMP0
++ | slli.w TMP2, RC, 3
++ | beqz TMP0, ->vmeta_tsetv // Integer key and in array part?
++ | add.d TMP1, TMP1, TMP2
++ | ld.bu TMP3, TAB:RB->marked
++ | ld.d TMP0, 0(TMP1)
++ | ld.d CRET1, 0(RA)
++ | beq TMP0, TISNIL, >3
++ |1:
++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
++ | st.d CRET1, 0(TMP1)
++ | bnez TMP2, >7
++ |2:
++ | ins_next
++ |
++ |3: // Check for __newindex if previous value is nil.
++ | ld.d TAB:TMP2, TAB:RB->metatable
++ | beqz TAB:TMP2, <1 // No metatable: done.
++ | ld.bu TMP2, TAB:TMP2->nomm
++ | andi TMP2, TMP2, 1<<MM_newindex
++ | bnez TMP2, <1 // 'no __newindex' flag set: done.
++ | b ->vmeta_tsetv
++ |5:
++ | gettp TMP0, TMP2
++ | addi.d TMP0, TMP0, -LJ_TSTR
++ | bnez TMP0, ->vmeta_tsetv
++ | cleartp STR:RC, TMP2
++ | b ->BC_TSETS_Z // String key?
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMP3, TMP0, <2
++ break;
++ case BC_TSETS:
++ | // RA = src*8, RB = table*8, RC = str_const*8 (~)
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add.d CARG2, BASE, RB
++ | sub.d CARG3, KBASE, RC
++ | ld.d TAB:RB, 0(CARG2)
++ | ld.d RC, -8(CARG3) // KBASE-8-str_const*8
++ | add.d RA, BASE, RA
++ | cleartp STR:RC
++ | checktab TAB:RB, ->vmeta_tsets1
++ |->BC_TSETS_Z:
++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
++ | ld.w TMP0, TAB:RB->hmask
++ | ld.w TMP1, STR:RC->sid
++ | ld.d NODE:TMP2, TAB:RB->node
++ | st.b r0, TAB:RB->nomm // Clear metamethod cache.
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
++ | slli.w TMP0, TMP1, 5
++ | slli.w TMP1, TMP1, 3
++ | sub.w TMP1, TMP0, TMP1
++ | addi.d TMP3, r0, LJ_TSTR
++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
++ | settp STR:RC, TMP3 // Tagged key to look for.
++ | fld.d FTMP0, 0(RA)
++ |1:
++ | ld.d TMP0, NODE:TMP2->key
++ | ld.d CARG2, NODE:TMP2->val
++ | ld.d NODE:TMP1, NODE:TMP2->next
++ | ld.bu TMP3, TAB:RB->marked
++ | bne TMP0, RC, >5
++ | ld.d TAB:TMP0, TAB:RB->metatable
++ | beq CARG2, TISNIL, >4 // Key found, but nil value?
++ |2:
++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table)
++ | fst.d FTMP0, NODE:TMP2->val
++ | bnez TMP3, >7
++ |3:
++ | ins_next
++ |
++ |4: // Check for __newindex if previous value is nil.
++ | beqz TAB:TMP0, <2 // No metatable: done.
++ | ld.bu TMP0, TAB:TMP0->nomm
++ | andi TMP0, TMP0, 1<<MM_newindex
++ | bnez TMP0, <2 // 'no __newindex' flag set: done.
++ | b ->vmeta_tsets
++ |
++ |5: // Follow hash chain.
++ | or NODE:TMP2, NODE:TMP1, r0
++ | bnez NODE:TMP1, <1
++ | // End of hash chain: key not found, add a new one
++ |
++ | // But check for __newindex first.
++ | ld.d TAB:TMP2, TAB:RB->metatable
++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
++ | beqz TAB:TMP2, >6 // No metatable: continue.
++ | ld.bu TMP0, TAB:TMP2->nomm
++ | andi TMP0, TMP0, 1<<MM_newindex
++ | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
++ |6:
++ | st.d RC, 0(CARG3)
++ | st.d BASE, L->base
++ | or CARG2, TAB:RB, r0
++ | st.d PC, SAVE_PC(sp)
++ | or CARG1, L, r0
++ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k
++ | // Returns TValue *.
++ | ld.d BASE, L->base
++ | fst.d FTMP0, 0(CRET1)
++ | b <3 // No 2nd write barrier needed.
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMP3, TMP0, <3
++ break;
++ case BC_TSETB:
++ | // RA = src*8, RB = table*8, RC = index*8
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add.d CARG2, BASE, RB
++ | add.d RA, BASE, RA
++ | ld.d TAB:RB, 0(CARG2)
++ | srli.w TMP0, RC, 3
++ | checktab RB, ->vmeta_tsetb
++ | ld.w TMP1, TAB:RB->asize
++ | ld.d TMP2, TAB:RB->array
++ | sltu TMP1, TMP0, TMP1
++ | add.d RC, TMP2, RC
++ | beqz TMP1, ->vmeta_tsetb
++ | ld.d TMP1, 0(RC)
++ | ld.bu TMP3, TAB:RB->marked
++ | beq TMP1, TISNIL, >5
++ |1:
++ | ld.d CRET1, 0(RA)
++ | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table)
++ | st.d CRET1, 0(RC)
++ | bnez TMP1, >7
++ |2:
++ | ins_next
++ |
++ |5: // Check for __newindex if previous value is nil.
++ | ld.d TAB:TMP2, TAB:RB->metatable
++ | beqz TAB:TMP2, <1 // No metatable: done.
++ | ld.bu TMP1, TAB:TMP2->nomm
++ | andi TMP1, TMP1, 1<<MM_newindex
++ | bnez TMP1, <1 // 'no __newindex' flag set: done.
++ | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMP3, TMP0, <2
++ break;
++ case BC_TSETR:
++ | // RA = dst*8, RB = table*8, RC = key*8
++ | decode_RB RB, INS
++ | decode_RDtoRC8 RC, RD
++ | add.d CARG1, BASE, RB
++ | add.d CARG3, BASE, RC
++ | ld.d TAB:CARG2, 0(CARG1)
++ | ld.w CARG3, 0(CARG3)
++ | cleartp TAB:CARG2
++ | ld.bu TMP3, TAB:CARG2->marked
++ | ld.w TMP0, TAB:CARG2->asize
++ | ld.d TMP1, TAB:CARG2->array
++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
++ | add.d RA, BASE, RA
++ | bnez TMP2, >7
++ |2:
++ | sltu TMP0, CARG3, TMP0
++ | slli.w TMP2, CARG3, 3
++ | add.d CRET1, TMP1, TMP2
++ | beqz TMP0, ->vmeta_tsetr // In array part?
++ |->BC_TSETR_Z:
++ | ld.d TMP1, 0(RA)
++ | ins_next1
++ | st.d TMP1, 0(CRET1)
++ | ins_next2
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:CARG2, TMP3, CRET1, <2
++ break;
++
++ case BC_TSETM:
++ | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
++ | add.d RA, BASE, RA
++ |1:
++ | add.d TMP3, KBASE, RD
++ | ld.d TAB:CARG2, -8(RA) // Guaranteed to be a table.
++ | addi.w TMP0, MULTRES, -8
++ | ld.w TMP3, 0(TMP3) // Integer constant is in lo-word.
++ | srli.w CARG3, TMP0, 3
++ | beqz TMP0, >4 // Nothing to copy?
++ | cleartp TAB:CARG2
++ | add.w CARG3, CARG3, TMP3
++ | ld.w TMP2, TAB:CARG2->asize
++ | slli.w TMP1, TMP3, 3
++ | ld.bu TMP3, TAB:CARG2->marked
++ | ld.d CARG1, TAB:CARG2->array
++ | sltu TMP4, TMP2, CARG3
++ | add.d TMP2, RA, TMP0
++ | bnez TMP4, >5
++ | add.d TMP1, TMP1, CARG1
++ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
++ |3: // Copy result slots to table.
++ | ld.d CRET1, 0(RA)
++ | addi.d RA, RA, 8
++ | sltu TMP4, RA, TMP2
++ | st.d CRET1, 0(TMP1)
++ | addi.d TMP1, TMP1, 8
++ | bnez TMP4, <3
++ | bnez TMP0, >7
++ |4:
++ | ins_next
++ |
++ |5: // Need to resize array part.
++ | st.d BASE, L->base
++ | st.d PC, SAVE_PC(sp)
++ | or BASE, RD, r0
++ | or CARG1, L, r0
++ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
++ | // Must not reallocate the stack.
++ | or RD, BASE, r0
++ | ld.d BASE, L->base // Reload BASE for lack of a saved register.
++ | b <1
++ |
++ |7: // Possible table write barrier for any value. Skip valiswhite check.
++ | barrierback TAB:CARG2, TMP3, TMP0, <4
++ break;
++
++ /* -- Calls and vararg handling ----------------------------------------- */
++
++ case BC_CALLM:
++ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
++ | decode_RDtoRC8 NARGS8:RC, RD
++ | add.w NARGS8:RC, NARGS8:RC, MULTRES
++ | b ->BC_CALL_Z
++ break;
++ case BC_CALL:
++ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
++ | decode_RDtoRC8 NARGS8:RC, RD
++ |->BC_CALL_Z:
++ | or TMP2, BASE, r0
++ | add.d BASE, BASE, RA
++ | ld.d LFUNC:RB, 0(BASE)
++ | addi.d BASE, BASE, 16
++ | addi.w NARGS8:RC, NARGS8:RC, -8
++ | checkfunc RB, ->vmeta_call
++ | ins_call
++ break;
++
++ case BC_CALLMT:
++ | // RA = base*8, (RB = 0,) RC = extra_nargs*8
++ | add.w NARGS8:RD, NARGS8:RD, MULTRES
++ | b ->BC_CALLT_Z1
++ break;
++ case BC_CALLT:
++ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
++ |->BC_CALLT_Z1:
++ | add.d RA, BASE, RA
++ | ld.d LFUNC:RB, 0(RA)
++ | or NARGS8:RC, RD, r0
++ | ld.d TMP1, FRAME_PC(BASE)
++ | addi.d RA, RA, 16
++ | addi.w NARGS8:RC, NARGS8:RC, -8
++ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt
++ |->BC_CALLT_Z:
++ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'.
++ | ld.bu TMP3, LFUNC:CARG3->ffid
++ | xori TMP2, TMP1, FRAME_VARG
++ | bnez TMP0, >7
++ |1:
++ | st.d LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
++ | sltui CARG4, TMP3, 2 // (> FF_C) Calling a fast function?
++ | or TMP2, BASE, r0
++ | or RB, CARG3, r0
++ | or TMP3, NARGS8:RC, r0
++ | beqz NARGS8:RC, >3
++ |2:
++ | ld.d CRET1, 0(RA)
++ | addi.d RA, RA, 8
++ | addi.w TMP3, TMP3, -8
++ | st.d CRET1, 0(TMP2)
++ | addi.d TMP2, TMP2, 8
++ | bnez TMP3, <2
++ |3:
++ | or TMP0, TMP0, CARG4
++ | beqz TMP0, >5
++ |4:
++ | ins_callt
++ |
++ |5: // Tailcall to a fast function with a Lua frame below.
++ | ld.w INS, -4(TMP1)
++ | decode_RA RA, INS
++ | sub.d TMP1, BASE, RA
++ | ld.d TMP1, -32(TMP1)
++ | cleartp LFUNC:TMP1
++ | ld.d TMP1, LFUNC:TMP1->pc
++ | ld.d KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
++ | b <4
++ |
++ |7: // Tailcall from a vararg function.
++ | andi CARG4, TMP2, FRAME_TYPEP
++ | sub.d TMP2, BASE, TMP2 // Relocate BASE down.
++ | bnez CARG4, <1 // Vararg frame below?
++ | or BASE, TMP2, r0
++ | ld.d TMP1, FRAME_PC(TMP2)
++ | andi TMP0, TMP1, FRAME_TYPE
++ | b <1
++ break;
++
++ case BC_ITERC:
++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
++ | or TMP2, BASE, r0 // Save old BASE for vmeta_call.
++ | add.d BASE, BASE, RA
++ | ld.d RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1.
++ | ld.d CARG1, -16(BASE)
++ | ld.d CARG2, -8(BASE)
++ | addi.d NARGS8:RC, r0, 16 // Iterators get 2 arguments.
++ | st.d RB, 0(BASE) // Copy callable.
++ | st.d CARG1, 16(BASE) // Copy state.
++ | st.d CARG2, 24(BASE) // Copy control var.
++ | addi.d BASE, BASE, 16
++ | checkfunc RB, ->vmeta_call
++ | ins_call
++ break;
++
++ case BC_ITERN:
++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
++ |.if JIT
++ | hotloop
++ |.endif
++ |->vm_IITERN:
++ | add.d RA, BASE, RA
++ | ld.d TAB:RB, -16(RA)
++ | ld.w RC, -8(RA) // Get index from control var.
++ | cleartp TAB:RB
++ | addi.d PC, PC, 4
++ | ld.w TMP0, TAB:RB->asize
++ | ld.d TMP1, TAB:RB->array
++ | slli.d CARG3, TISNUM, 47
++ |1: // Traverse array part.
++ | sltu TMP2, RC, TMP0
++ | slli.w TMP3, RC, 3
++ | beqz TMP2, >5 // Index points after array part?
++ | add.d TMP3, TMP1, TMP3
++ | ld.d CARG1, 0(TMP3)
++ | ld.hu RD, -4+OFS_RD(PC) // ITERL RD
++ | or TMP2, RC, CARG3
++ | addi.w RC, RC, 1
++ | beq CARG1, TISNIL, <1 // Skip holes in array part.
++ | st.d TMP2, 0(RA)
++ | st.d CARG1, 8(RA)
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ | decode_BC4b RD
++ | add.d RD, RD, TMP3
++ | st.w RC, -8(RA) // Update control var.
++ | add.d PC, PC, RD
++ |3:
++ | ins_next
++ |
++ |5: // Traverse hash part.
++ | ld.w TMP1, TAB:RB->hmask
++ | sub.w RC, RC, TMP0
++ | ld.d TMP2, TAB:RB->node
++ |6:
++ | sltu CARG1, TMP1, RC // End of iteration? Branch to ITERL+1.
++ | slli.w TMP3, RC, 5
++ | bnez CARG1, <3
++ | slli.w RB, RC, 3
++ | sub.w TMP3, TMP3, RB
++ | add.d NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8)
++ | ld.d CARG1, 0(NODE:TMP3)
++ | ld.hu RD, -4+OFS_RD(PC) // ITERL RD
++ | addi.w RC, RC, 1
++ | beq CARG1, TISNIL, <6 // Skip holes in hash part.
++ | ld.d CARG2, NODE:TMP3->key
++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
++ | st.d CARG1, 8(RA)
++ | add.w RC, RC, TMP0
++ | decode_BC4b RD
++ | add.w RD, RD, TMP3
++ | st.d CARG2, 0(RA)
++ | add.d PC, PC, RD
++ | st.w RC, -8(RA) // Update control var.
++ | b <3
++ break;
++
++ case BC_ISNEXT:
++ | // RA = base*8, RD = target (points to ITERN)
++ | add.d RA, BASE, RA
++ | srli.w TMP0, RD, 1
++ | ld.d CFUNC:CARG1, -24(RA)
++ | add.d TMP0, PC, TMP0
++ | ld.d CARG2, -16(RA)
++ | ld.d CARG3, -8(RA)
++ | addu16i.d TMP2, r0, -0x2 // -BCBIAS_J*4
++ | checkfunc CFUNC:CARG1, >5
++ | gettp CARG2, CARG2
++ | addi.d CARG2, CARG2, -LJ_TTAB
++ | ld.bu TMP1, CFUNC:CARG1->ffid
++ | addi.d CARG3, CARG3, -LJ_TNIL
++ | or TMP3, CARG2, CARG3
++ | addi.d TMP1, TMP1, -FF_next_N
++ | or TMP3, TMP3, TMP1
++ | addu16i.d TMP1, r0, 0xfffe // LJ_KEYINDEX >> 16
++ | bnez TMP3, >5
++ | add.d PC, TMP0, TMP2
++ | slli.d TMP1, TMP1, 16
++ | addu16i.d TMP1, TMP1, 0x7fff // LJ_KEYINDEX & 0xffff
++ | slli.d TMP1, TMP1, 16
++ | st.d TMP1, -8(RA)
++ |1:
++ | ins_next
++ |5: // Despecialize bytecode if any of the checks fail.
++ | addi.d TMP3, r0, BC_JMP
++ | addi.d TMP1, r0, BC_ITERC
++ | st.b TMP3, -4+OFS_OP(PC)
++ | add.d PC, TMP0, TMP2
++ |.if JIT
++ | ld.b TMP0, OFS_OP(PC)
++ | addi.d TMP3, r0, BC_ITERN
++ | ld.hu TMP2, OFS_RD(PC)
++ | bne TMP0, TMP3, >6
++ |.endif
++ | st.b TMP1, OFS_OP(PC)
++ | b <1
++ |.if JIT
++ |6: // Unpatch JLOOP.
++ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace)
++ | slli.w TMP2, TMP2, 3
++ | add.d TMP0, TMP0, TMP2
++ | ld.d TRACE:TMP2, 0(TMP0)
++ | ld.w TMP0, TRACE:TMP2->startins
++ | addi.d TMP3, r0, -256
++ | and TMP0, TMP0, TMP3
++ | or TMP0, TMP0, TMP1
++ | st.w TMP0, 0(PC)
++ | b <1
++ |.endif
++ break;
++
++ case BC_VARG:
++ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
++ | ld.d TMP0, FRAME_PC(BASE)
++ | decode_RDtoRC8 RC, RD
++ | decode_RB RB, INS
++ | add.d RC, BASE, RC
++ | add.d RA, BASE, RA
++ | addi.d RC, RC, FRAME_VARG
++ | add.d TMP2, RA, RB
++ | addi.d TMP3, BASE, -16 // TMP3 = vtop
++ | sub.d RC, RC, TMP0 // RC = vbase
++ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
++ | sub.d TMP1, TMP3, RC
++ | beqz RB, >5 // Copy all varargs?
++ | addi.d TMP2, TMP2, -16
++ |1: // Copy vararg slots to destination slots.
++ | ld.d CARG1, 0(RC)
++ | sltu TMP0, RC, TMP3
++ | addi.d RC, RC, 8
++ | maskeqz CARG1, CARG1, TMP0
++ | masknez TMP0, TISNIL, TMP0
++ | or CARG1, CARG1, TMP0
++ | st.d CARG1, 0(RA)
++ | sltu TMP0, RA, TMP2
++ | addi.d RA, RA, 8
++ | bnez TMP0, <1
++ |3:
++ | ins_next
++ |
++ |5: // Copy all varargs.
++ | ld.d TMP0, L->maxstack
++ | addi.d MULTRES, r0, 8 // MULTRES = (0+1)*8
++ | bge r0, TMP1, <3 // No vararg slots?
++ | add.d TMP2, RA, TMP1
++ | sltu TMP2, TMP0, TMP2
++ | addi.d MULTRES, TMP1, 8
++ | bnez TMP2, >7
++ |6:
++ | ld.d CRET1, 0(RC)
++ | addi.d RC, RC, 8
++ | st.d CRET1, 0(RA)
++ | sltu TMP0, RC, TMP3
++ | addi.d RA, RA, 8
++ | bnez TMP0, <6 // More vararg slots?
++ | b <3
++ |
++ |7: // Grow stack for varargs.
++ | st.d RA, L->top
++ | sub.d RA, RA, BASE
++ | st.d BASE, L->base
++ | sub.d BASE, RC, BASE // Need delta, because BASE may change.
++ | st.d PC, SAVE_PC(sp)
++ | srli.w CARG2, TMP1, 3
++ | or CARG1, L, r0
++ | bl extern lj_state_growstack // (lua_State *L, int n)
++ | or RC, BASE, r0
++ | ld.d BASE, L->base
++ | add.d RA, BASE, RA
++ | add.d RC, BASE, RC
++ | addi.d TMP3, BASE, -16
++ | b <6
++ break;
++
++ /* -- Returns ----------------------------------------------------------- */
++
++ case BC_RETM:
++ | // RA = results*8, RD = extra_nresults*8
++ | add.w RD, RD, MULTRES
++ | b ->BC_RET_Z1
++ break;
++
++ case BC_RET:
++ | // RA = results*8, RD = (nresults+1)*8
++ |->BC_RET_Z1:
++ | ld.d PC, FRAME_PC(BASE)
++ | add.d RA, BASE, RA
++ | or MULTRES, RD, r0
++ |1:
++ | andi TMP0, PC, FRAME_TYPE
++ | xori TMP1, PC, FRAME_VARG
++ | bnez TMP0, ->BC_RETV_Z
++ |
++ |->BC_RET_Z:
++ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
++ | ld.w INS, -4(PC)
++ | addi.d TMP2, BASE, -16
++ | addi.d RC, RD, -8
++ | decode_RA TMP0, INS
++ | decode_RB RB, INS
++ | add.d TMP3, TMP2, RB
++ | sub.d BASE, TMP2, TMP0
++ | beqz RC, >3
++ |2:
++ | ld.d CRET1, 0(RA)
++ | addi.d RA, RA, 8
++ | addi.d RC, RC, -8
++ | st.d CRET1, 0(TMP2)
++ | addi.d TMP2, TMP2, 8
++ | bnez RC, <2
++ |3:
++ | addi.d TMP3, TMP3, -8
++ |5:
++ | sltu TMP0, TMP2, TMP3
++ | ld.d LFUNC:TMP1, FRAME_FUNC(BASE)
++ | bnez TMP0, >6
++ | cleartp LFUNC:TMP1
++ | ld.d TMP1, LFUNC:TMP1->pc
++ | ld.d KBASE, PC2PROTO(k)(TMP1)
++ | ins_next
++ |
++ |6: // Fill up results with nil.
++ | st.d TISNIL, 0(TMP2)
++ | addi.d TMP2, TMP2, 8
++ | b <5
++ |
++ |->BC_RETV_Z: // Non-standard return case.
++ | andi TMP2, TMP1, FRAME_TYPEP
++ | bnez TMP2, ->vm_return
++ | // Return from vararg function: relocate BASE down.
++ | sub.d BASE, BASE, TMP1
++ | ld.d PC, FRAME_PC(BASE)
++ | b <1
++ break;
++
++ case BC_RET0: case BC_RET1:
++ | // RA = results*8, RD = (nresults+1)*8
++ | ld.d PC, FRAME_PC(BASE)
++ | add.d RA, BASE, RA
++ | or MULTRES, RD, r0
++ | andi TMP0, PC, FRAME_TYPE
++ | xori TMP1, PC, FRAME_VARG
++ | bnez TMP0, ->BC_RETV_Z
++ | ld.w INS, -4(PC)
++ | addi.d TMP2, BASE, -16
++ if (op == BC_RET1) {
++ | ld.d CRET1, 0(RA)
++ }
++ | decode_RB RB, INS
++ | decode_RA RA, INS
++ | sub.d BASE, TMP2, RA
++ if (op == BC_RET1) {
++ | st.d CRET1, 0(TMP2)
++ }
++ |5:
++ | sltu TMP0, RD, RB
++ | ld.d TMP1, FRAME_FUNC(BASE)
++ | bnez TMP0, >6
++ | cleartp LFUNC:TMP1
++ | ld.d TMP1, LFUNC:TMP1->pc
++ | ins_next1
++ | ld.d KBASE, PC2PROTO(k)(TMP1)
++ | ins_next2
++ |
++ |6: // Fill up results with nil.
++ | addi.d TMP2, TMP2, 8
++ | addi.d RD, RD, 8
++ if (op == BC_RET1) {
++ | st.d TISNIL, 0(TMP2)
++ } else {
++ | st.d TISNIL, -8(TMP2)
++ }
++ | b <5
++ break;
++
++ /* -- Loops and branches ------------------------------------------------ */
++
++ case BC_FORL:
++ |.if JIT
++ | hotloop
++ |.endif
++ | // Fall through. Assumes BC_IFORL follows.
++ break;
++
++ case BC_JFORI:
++ case BC_JFORL:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_FORI:
++ case BC_IFORL:
++ | // RA = base*8, RD = target (after end of loop or start of loop)
++ vk = (op == BC_IFORL || op == BC_JFORL);
++ | add.d RA, BASE, RA
++ | ld.d CARG1, FORL_IDX*8(RA) // CARG1 = IDX
++ | ld.d CARG2, FORL_STEP*8(RA) // CARG2 = STEP
++ | ld.d CARG3, FORL_STOP*8(RA) // CARG3 = STOP
++ | gettp CARG4, CARG1
++ | gettp CARG5, CARG2
++ | gettp CARG6, CARG3
++ if (op != BC_JFORL) {
++ | srli.w RD, RD, 1
++ | addu16i.d TMP2, r0, -0x2 // -BCBIAS_J<<2
++ | add.d TMP2, RD, TMP2
++ }
++ | bne CARG4, TISNUM, >3
++ | slli.w CARG4, CARG1, 0 // start
++ | slli.w CARG3, CARG3, 0 // stop
++ if (!vk) { // init
++ | bne CARG6, TISNUM, ->vmeta_for
++ | bne CARG5, TISNUM, ->vmeta_for
++ | bstrpick.d TMP0, CARG2, 31, 31 // sign
++ | slt CARG2, CARG3, CARG4
++ | slt TMP1, CARG4, CARG3
++ | maskeqz TMP1, TMP1, TMP0
++ | masknez CARG2, CARG2, TMP0
++ | or CARG2, CARG2, TMP1 // CARG2=0: +,start <= stop or -,start >= stop
++ } else {
++ | slli.w CARG5, CARG2, 0 // step
++ | add.w CARG1, CARG4, CARG5 // start + step
++ | xor TMP3, CARG1, CARG4 // y^a
++ | xor TMP1, CARG1, CARG5 // y^b
++ | and TMP3, TMP3, TMP1
++ | slt TMP1, CARG1, CARG3 // start+step < stop ?
++ | slt CARG3, CARG3, CARG1 // stop < start+step ?
++ | slt TMP0, CARG5, r0 // step < 0 ?
++ | slt TMP3, TMP3, r0 // ((y^a) & (y^b)) < 0: overflow.
++ | maskeqz TMP1, TMP1, TMP0
++ | masknez CARG3, CARG3, TMP0
++ | or CARG3, CARG3, TMP1
++ | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue
++ | bstrpick.d CARG1, CARG1, 31, 0
++ | settp CARG1, TISNUM
++ | st.d CARG1, FORL_IDX*8(RA)
++ }
++ |1:
++ if (op == BC_FORI) {
++ | maskeqz TMP2, TMP2, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS
++ | add.d PC, PC, TMP2
++ } else if (op == BC_JFORI) {
++ | add.d PC, PC, TMP2
++ | ld.hu RD, -4+OFS_RD(PC)
++ } else if (op == BC_IFORL) {
++ | masknez TMP2, TMP2, CARG2 // CARG2!=0: next INS; CARG2==0: jump back
++ | add.d PC, PC, TMP2
++ }
++ | ins_next1
++ | st.d CARG1, FORL_EXT*8(RA)
++ |2:
++ if (op == BC_JFORI) {
++ | decode_BC8b RD
++ | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop
++ } else if (op == BC_JFORL) {
++ | beqz CARG2, =>BC_JLOOP
++ }
++ | ins_next2
++ |
++ |3: // FP loop.
++ | fld.d FTMP0, FORL_IDX*8(RA) // start
++ | fld.d FTMP1, FORL_STOP*8(RA) // stop
++ | ld.d TMP0, FORL_STEP*8(RA) // step
++ | slt TMP0, TMP0, r0 // step < 0 ?
++ | movgr2fr.d FTMP2, TMP0
++ if (!vk) {
++ | sltui TMP3, CARG4, LJ_TISNUM // start is number ?
++ | sltui TMP0, CARG5, LJ_TISNUM // step is number ?
++ | sltui TMP1, CARG6, LJ_TISNUM // stop is number ?
++ | and TMP3, TMP3, TMP1
++ | and TMP0, TMP0, TMP3
++ | beqz TMP0, ->vmeta_for // if start or step or stop isn't number
++ | fcmp.clt.d FCC0, FTMP0, FTMP1 // start < stop ?
++ | fcmp.clt.d FCC1, FTMP1, FTMP0 // stop < start ?
++ | movcf2fr FTMP3, FCC0
++ | movcf2fr FTMP4, FCC1
++ | movfr2cf FCC0, FTMP2
++ | fsel FTMP2, FTMP4, FTMP3, FCC0
++ | movfr2gr.d CARG2, FTMP2 // CARG2=0:+,start<stop or -,start>stop
++ | b <1
++ } else {
++ | fld.d FTMP3, FORL_STEP*8(RA)
++ | fadd.d FTMP0, FTMP0, FTMP3 // start + step
++ | fcmp.clt.d FCC0, FTMP0, FTMP1 // start + step < stop ?
++ | fcmp.clt.d FCC1, FTMP1, FTMP0
++ | movcf2fr FTMP3, FCC0
++ | movcf2fr FTMP4, FCC1
++ | movfr2cf FCC0, FTMP2
++ | fsel FTMP2, FTMP4, FTMP3, FCC0
++ | movfr2gr.d CARG2, FTMP2
++ if (op == BC_IFORL) {
++ | masknez TMP2, TMP2, CARG2
++ | add.d PC, PC, TMP2
++ }
++ | fst.d FTMP0, FORL_IDX*8(RA)
++ | ins_next1
++ | fst.d FTMP0, FORL_EXT*8(RA)
++ | b <2
++ }
++ break;
++
++ case BC_ITERL:
++ |.if JIT
++ | hotloop
++ |.endif
++ | // Fall through. Assumes BC_IITERL follows.
++ break;
++
++ case BC_JITERL:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_IITERL:
++ | // RA = base*8, RD = target
++ | add.d RA, BASE, RA
++ | ld.d TMP1, 0(RA)
++ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil.
++ if (op == BC_JITERL) {
++ | st.d TMP1,-8(RA)
++ | b =>BC_JLOOP
++ } else {
++ | branch_RD // Otherwise save control var + branch.
++ | st.d TMP1, -8(RA)
++ }
++ |1:
++ | ins_next
++ break;
++
++ case BC_LOOP:
++ | // RA = base*8, RD = target (loop extent)
++ | // Note: RA/RD is only used by trace recorder to determine scope/extent
++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
++ |.if JIT
++ | hotloop
++ |.endif
++ | // Fall through. Assumes BC_ILOOP follows.
++ break;
++
++ case BC_ILOOP:
++ | // RA = base*8, RD = target (loop extent)
++ | ins_next
++ break;
++
++ case BC_JLOOP:
++ |.if JIT
++ | // RA = base*8 (ignored), RD = traceno*8
++ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace)
++ | add.d TMP0, TMP0, RD
++ | // Traces on LOONGARCH don't store the trace number, so use 0.
++ | .STXD r0, DISPATCH, DISPATCH_GL(vmstate)
++ | ld.d TRACE:TMP1, 0(TMP0)
++ | .STXD BASE, DISPATCH, DISPATCH_GL(jit_base) // store Current JIT code L->base
++ | ld.d TMP1, TRACE:TMP1->mcode
++ | .ADD16I JGL, DISPATCH, GG_DISP2G+32768
++ | .STXD L, DISPATCH, DISPATCH_GL(tmpbuf.L)
++ | jirl r0, TMP1, 0
++ |.endif
++ break;
++
++ case BC_JMP:
++ | // RA = base*8 (only used by trace recorder), RD = target
++ | branch_RD // PC + (jump - 0x8000)<<2
++ | ins_next
++ break;
++
++ /* -- Function headers -------------------------------------------------- */
++
++ case BC_FUNCF:
++ |.if JIT
++ | hotcall
++ |.endif
++ case BC_FUNCV: /* NYI: compiled vararg functions. */
++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
++ break;
++
++ case BC_JFUNCF:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_IFUNCF:
++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
++ | ld.d TMP2, L->maxstack
++ | ld.bu TMP1, -4+PC2PROTO(numparams)(PC)
++ | ld.d KBASE, -4+PC2PROTO(k)(PC)
++ | sltu TMP0, TMP2, RA
++ | slli.w TMP1, TMP1, 3 // numparams*8
++ | bnez TMP0, ->vm_growstack_l
++ |2:
++ | sltu TMP0, NARGS8:RC, TMP1 // Check for missing parameters.
++ | bnez TMP0, >3
++ if (op == BC_JFUNCF) {
++ | decode_RD RD, INS
++ | b =>BC_JLOOP
++ } else {
++ | ins_next
++ }
++ |
++ |3: // Clear missing parameters.
++ | add.d TMP0, BASE, NARGS8:RC
++ | st.d TISNIL, 0(TMP0)
++ | addi.w NARGS8:RC, NARGS8:RC, 8
++ | b <2
++ break;
++
++ case BC_JFUNCV:
++#if !LJ_HASJIT
++ break;
++#endif
++ | NYI // NYI: compiled vararg functions
++ break; /* NYI: compiled vararg functions. */
++
++ case BC_IFUNCV:
++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
++ | addi.w TMP0, r0, LJ_TFUNC
++ | add.d TMP1, BASE, RC
++ | ld.d TMP2, L->maxstack
++ | settp LFUNC:RB, TMP0
++ | add.d TMP0, RA, RC
++ | st.d LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
++ | addi.d TMP3, RC, 16+FRAME_VARG
++ | sltu TMP0, TMP0, TMP2
++ | ld.d KBASE, -4+PC2PROTO(k)(PC)
++ | st.d TMP3, 8(TMP1) // Store delta + FRAME_VARG.
++ | beqz TMP0, ->vm_growstack_l
++ | ld.bu TMP2, -4+PC2PROTO(numparams)(PC)
++ | or RA, BASE, r0
++ | or RC, TMP1, r0
++ | ins_next1
++ | addi.d BASE, TMP1, 16
++ | beqz TMP2, >2
++ |1:
++ | ld.d TMP0, 0(RA)
++ | sltu CARG2, RA, RC // Less args than parameters?
++ | or CARG1, TMP0, r0
++ | addi.d RA, RA, 8
++ | addi.d TMP1, TMP1, 8
++ | addi.w TMP2, TMP2, -1
++ | beqz CARG2, >3
++ | masknez TMP3, CARG1, CARG2 // Clear old fixarg slot (help the GC).
++ | maskeqz CARG1, TISNIL, CARG2
++ | or CARG1, CARG1, TMP3
++ | st.d CARG1, -8(RA)
++ | st.d TMP0, 8(TMP1)
++ | bnez TMP2, <1
++ |2:
++ | ins_next2
++ |3:
++ | maskeqz TMP0, TMP0, CARG2 // Clear missing fixargs.
++ | masknez TMP3, TISNIL, CARG2
++ | or TMP0, TMP0, TMP3
++ | st.d TMP0, 8(TMP1)
++ | bnez TMP2, <1
++ | b <2
++ break;
++
++ case BC_FUNCC:
++ case BC_FUNCCW:
++ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
++ if (op == BC_FUNCC) {
++ | ld.d CARG4, CFUNC:RB->f
++ } else {
++ | .LDXD CARG4, DISPATCH, DISPATCH_GL(wrapf)
++ }
++ | add.d TMP1, RA, NARGS8:RC
++ | ld.d TMP2, L->maxstack
++ | add.d RC, BASE, NARGS8:RC
++ | st.d BASE, L->base // base of currently excuting function
++ | st.d RC, L->top
++ | sltu TMP3, TMP2, TMP1
++ | li_vmstate C // addi.w TMP0, r0, ~LJ_VMST_C
++ if (op == BC_FUNCCW) {
++ | ld.d CARG2, CFUNC:RB->f
++ }
++ | or CARG1, L, r0
++ | bnez TMP3, ->vm_growstack_c // Need to grow stack.
++ | st_vmstate // .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate)
++ | jirl r1, CARG4, 0 // (lua_State *L [, lua_CFunction f])
++ | // Returns nresults.
++ | ld.d BASE, L->base
++ | ld.d TMP1, L->top
++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++ | slli.w RD, CRET1, 3
++ | li_vmstate INTERP
++ | ld.d PC, FRAME_PC(BASE) // Fetch PC of caller.
++ | sub.d RA, TMP1, RD // RA = L->top - nresults*8
++ | st_vmstate
++ | b ->vm_returnc
++ break;
++
++ /* ---------------------------------------------------------------------- */
++
++ default:
++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
++ exit(2);
++ break;
++ }
++}
++
++static int build_backend(BuildCtx *ctx)
++{
++ int op;
++
++ dasm_growpc(Dst, BC__MAX);
++
++ build_subroutines(ctx);
++
++ |.code_op
++ for (op = 0; op < BC__MAX; op++)
++ build_ins(ctx, (BCOp)op, op);
++
++ return BC__MAX;
++}
++
++/* Emit pseudo frame-info for all assembler functions. */
++static void emit_asm_debug(BuildCtx *ctx)
++{
++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
++ int i;
++ switch (ctx->mode) {
++ case BUILD_elfasm:
++ fprintf(ctx->fp, "\t.section .debug_frame,\"\", at progbits\n");
++ fprintf(ctx->fp,
++ ".Lframe0:\n"
++ "\t.4byte .LECIE0-.LSCIE0\n"
++ ".LSCIE0:\n"
++ "\t.4byte 0xffffffff\n"
++ "\t.byte 0x1\n"
++ "\t.string \"\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -4\n"
++ "\t.byte 1\n" /* Return address is in ra. */
++ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */
++ "\t.align 3\n"
++ ".LECIE0:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE0:\n"
++ "\t.4byte .LEFDE0-.LASFDE0\n"
++ ".LASFDE0:\n"
++ "\t.4byte .Lframe0\n"
++ "\t.8byte .Lbegin\n"
++ "\t.8byte %d\n"
++ "\t.byte 0xe\n\t.uleb128 %d\n"
++ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/
++ "\t.byte 0x96\n\t.uleb128 2*6\n", /* offset fp */
++ fcofs, CFRAME_SIZE);
++ for (i = 31; i >= 23; i--) /* offset r31-r23 */
++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(31-i+7));
++ for (i = 31; i >= 24; i--) /* offset f31-f24 */
++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(31-i+16));
++ fprintf(ctx->fp,
++ "\t.align 3\n"
++ ".LEFDE0:\n\n");
++#if LJ_HASFFI
++ fprintf(ctx->fp,
++ ".LSFDE1:\n"
++ "\t.4byte .LEFDE1-.LASFDE1\n"
++ ".LASFDE1:\n"
++ "\t.4byte .Lframe0\n"
++ "\t.4byte lj_vm_ffi_call\n"
++ "\t.4byte %d\n"
++ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/
++ "\t.byte 0x96\n\t.uleb128 2*6\n" /* offset fp */
++ "\t.align 3\n"
++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
++#endif
++#if !LJ_NO_UNWIND
++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\", at progbits\n");
++ fprintf(ctx->fp,
++ ".Lframe1:\n"
++ "\t.4byte .LECIE1-.LSCIE1\n"
++ ".LSCIE1:\n"
++ "\t.4byte 0\n"
++ "\t.byte 0x1\n"
++ "\t.string \"zPR\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -4\n"
++ "\t.byte 1\n" /* Return address is in ra. */
++ "\t.uleb128 6\n" /* augmentation length */
++ "\t.byte 0x1b\n"
++ "\t.4byte lj_err_unwind_dwarf-.\n"
++ "\t.byte 0x1b\n"
++ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */
++ "\t.align 2\n"
++ ".LECIE1:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE2:\n"
++ "\t.4byte .LEFDE2-.LASFDE2\n"
++ ".LASFDE2:\n"
++ "\t.4byte .LASFDE2-.Lframe1\n"
++ "\t.4byte .Lbegin-.\n"
++ "\t.4byte %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/
++ "\t.byte 0x96\n\t.uleb128 2*6\n", /* offset fp */
++ fcofs);
++ for (i = 31; i >= 23; i--) /* offset r23-r31 */
++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(31-i+7));
++ for (i = 31; i >= 24; i--) /* offset f24-f31 */
++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(31-i+16));
++ fprintf(ctx->fp,
++ "\t.align 2\n"
++ ".LEFDE2:\n\n");
++#if LJ_HASFFI
++ fprintf(ctx->fp,
++ ".Lframe2:\n"
++ "\t.4byte .LECIE2-.LSCIE2\n"
++ ".LSCIE2:\n"
++ "\t.4byte 0\n"
++ "\t.byte 0x1\n"
++ "\t.string \"zR\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -4\n"
++ "\t.byte 1\n" /* Return address is in ra. */
++ "\t.uleb128 1\n" /* augmentation length */
++ "\t.byte 0x1b\n"
++ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */
++ "\t.align 2\n"
++ ".LECIE2:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE3:\n"
++ "\t.4byte .LEFDE3-.LASFDE3\n"
++ ".LASFDE3:\n"
++ "\t.4byte .LASFDE3- .Lframe2\n"
++ "\t.4byte lj_vm_ffi_call-.\n"
++ "\t.4byte %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/
++ "\t.byte 0x96\n\t.uleb128 2*6\n" /* offset fp */
++ "\t.align 2\n"
++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
++#endif
++#endif
++#if !LJ_NO_UNWIND
++ /* NYI */
++#endif
++ break;
++ default:
++ break;
++ }
++}
++
diff -Nru luajit-2.1.0+openresty20240815/debian/patches/series luajit-2.1.0+openresty20240815/debian/patches/series
--- luajit-2.1.0+openresty20240815/debian/patches/series 2024-08-18 23:22:02.000000000 +0200
+++ luajit-2.1.0+openresty20240815/debian/patches/series 2024-11-02 10:07:38.000000000 +0100
@@ -1,2 +1,4 @@
0001-consider-Hurd-as-a-POSIX-system.patch
0002-Get-rid-of-LUAJIT_VERSION_SYM-that-changes-ABI-on-ev.patch
+0003_support_riscv64.patch
+0004_support_loong64.patch
More information about the pkg-lua-devel
mailing list