diff --git a/.patch/xbyak/0001-rvalue-optimize.patch b/.patch/xbyak/0001-rvalue-optimize.patch index baf76db1b1..97ca01b0d9 100644 --- a/.patch/xbyak/0001-rvalue-optimize.patch +++ b/.patch/xbyak/0001-rvalue-optimize.patch @@ -4408,3 +4408,778 @@ index 8375f06..bf935f5 100644 +void vpbroadcastq(Xmm x, Reg64 r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); } #endif #endif +diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h +index 176ee58..18c6aa7 100644 +--- a/xbyak/xbyak.h ++++ b/xbyak/xbyak.h +@@ -764,7 +764,7 @@ public: + bool operator!=(const Operand& rhs) const { return !operator==(rhs); } + const Address& getAddress() const; + Address getAddress(int immSize) const; +- const Reg& getReg() const; ++ Reg getReg() const; + }; + + inline void Operand::setBit(int bit) +@@ -845,10 +845,10 @@ public: + Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; } + }; + +-inline const Reg& Operand::getReg() const ++inline Reg Operand::getReg() const + { + assert(!isMEM()); +- return static_cast(*this); ++ return static_cast(*this); + } + + struct Reg8 : public Reg { +@@ -1007,7 +1007,7 @@ public: + #endif + XBYAK_CONSTEXPR RegExp() : scale_(0), disp_(0), label_(0), rip_(false), asPtr_(false) { } + XBYAK_CONSTEXPR RegExp(size_t disp) : scale_(0), disp_(disp), label_(0), rip_(false), asPtr_(false) { } +- XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1) ++ XBYAK_CONSTEXPR RegExp(Reg r, int scale = 1) + : scale_(scale) + , disp_(0) + , label_(0) +@@ -1035,7 +1035,7 @@ public: + { + } + #ifdef XBYAK64 +- XBYAK_CONSTEXPR RegExp(const RegRip& /*rip*/) ++ XBYAK_CONSTEXPR RegExp(RegRip /*rip*/) + : scale_(0) + , disp_(0) + , label_(0) +@@ -1055,12 +1055,12 @@ public: + } + return exp; + } +- bool operator==(const RegExp& rhs) const ++ bool operator==(RegExp rhs) const + { + return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_ && scale_ == rhs.scale_; + } +- const Reg& getBase() const { return base_; } +- const Reg& getIndex() const { return index_; } ++ Reg getBase() const { return base_; } ++ Reg getIndex() const { return index_; } + const Label *getLabel() const { return label_; } + bool isOnlyDisp() const { return !base_.getBit() && !index_.getBit(); } // for mov eax + int getScale() const { return scale_; } +@@ -1073,9 +1073,9 @@ public: + if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + } + } +- friend RegExp operator+(const RegExp& a, const RegExp& b); +- friend RegExp operator+(const RegExp& e, unsigned long long disp); +- friend RegExp operator-(const RegExp& e, size_t disp); ++ friend RegExp operator+(RegExp a, RegExp b); ++ friend RegExp operator+(RegExp e, unsigned long long disp); ++ friend RegExp operator-(RegExp e, size_t disp); + private: + /* + [base_ + index_ * scale_ + disp_] +@@ -1090,7 +1090,7 @@ private: + bool asPtr_; // disp_ contains a pointer + }; + +-inline RegExp operator+(const RegExp& a, const RegExp& b) ++inline RegExp operator+(RegExp a, RegExp b) + { + if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) + if (a.label_ && b.label_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) +@@ -1116,33 +1116,33 @@ inline RegExp operator+(const RegExp& a, const RegExp& b) + ret.disp_ += b.disp_; + return ret; + } +-inline RegExp operator*(const Reg& r, int scale) ++inline RegExp operator*(Reg r, int scale) + { + return RegExp(r, scale); + } +-inline RegExp operator*(int scale, const Reg& r) ++inline RegExp operator*(int scale, Reg r) + { + return r * scale; + } + + // backward compatibility for eax+&x (pointer address) +-inline RegExp operator+(const RegExp& a, const void* b) { return a + RegExp(b); } ++inline RegExp operator+(RegExp a, const void* b) { return a + RegExp(b); } + + // since what size_t is typedef'd to depends on the implementation, use unsigned long long (assume u64) for the implementation. +-inline RegExp operator+(const RegExp& e, unsigned long long disp) ++inline RegExp operator+(RegExp e, unsigned long long disp) + { + RegExp ret = e; + ret.disp_ += static_cast(disp); + return ret; + } + // overload for integer literals (e.g. eax+0) to avoid ambiguity with the void* overload +-inline RegExp operator+(const RegExp& e, int disp) { return e + static_cast(disp); } +-inline RegExp operator+(const RegExp& e, long disp) { return e + static_cast(disp); } +-inline RegExp operator+(const RegExp& e, long long disp) { return e + static_cast(disp); } +-inline RegExp operator+(const RegExp& e, unsigned int disp) { return e + static_cast(disp); } +-inline RegExp operator+(const RegExp& e, unsigned long disp) { return e + static_cast(disp); } ++inline RegExp operator+(RegExp e, int disp) { return e + static_cast(disp); } ++inline RegExp operator+(RegExp e, long disp) { return e + static_cast(disp); } ++inline RegExp operator+(RegExp e, long long disp) { return e + static_cast(disp); } ++inline RegExp operator+(RegExp e, unsigned int disp) { return e + static_cast(disp); } ++inline RegExp operator+(RegExp e, unsigned long disp) { return e + static_cast(disp); } + +-inline RegExp operator-(const RegExp& e, size_t disp) ++inline RegExp operator-(RegExp e, size_t disp) + { + RegExp ret = e; + ret.disp_ -= disp; +@@ -1392,7 +1392,7 @@ public: + XBYAK_CONSTEXPR Address() + : Operand(0, MEM, 0), e_(), label_(NULL), mode_(inner::M_ModRM), immSize(0), + disp8N(0), permitVsib(false), broadcast_(false), optimize_(true) { } +- XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e) ++ XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, RegExp e) + : Operand(0, MEM, sizeBit), e_(e), label_(e.label_), mode_(), immSize(0), + disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true) + { +@@ -1467,13 +1467,11 @@ inline XBYAK_CONSTEXPR bool Operand::hasRex2() const + } + + class AddressFrame { +- void operator=(const AddressFrame&); +- AddressFrame(const AddressFrame&); + public: + const uint32_t bit_; + const bool broadcast_; + explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { } +- Address operator[](const RegExp& e) const ++ Address operator[](RegExp e) const + { + return Address(bit_, broadcast_, e); + } +@@ -1861,7 +1859,7 @@ private: + // SSE instructions do not support XMM16 - XMM31 + return !(op.isXMM() && op.getIdx() >= 16); + } +- static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg()) ++ static inline uint8_t rexRXB(int bit, int bit3, Reg r, Reg b, Reg x = Reg()) + { + int v = bit3 ? 8 : 0; + if (r.hasIdxBit(bit)) v |= 4; +@@ -1869,7 +1867,7 @@ private: + if (b.hasIdxBit(bit)) v |= 1; + return uint8_t(v); + } +- void rex2(int bit3, int rex4bit, const Reg& r, const Reg& b, const Reg& x = Reg()) ++ void rex2(int bit3, int rex4bit, Reg r, Reg b, Reg x = Reg()) + { + db(0xD5); + db((rexRXB(4, bit3, r, b, x) << 4) | rex4bit); +@@ -1894,11 +1892,11 @@ private: + } + bool is0F = type & T_0F; + if (p2->isMEM()) { +- const Reg& r = *static_cast(p1); ++ Reg r = *static_cast(p1); + const Address& addr = p2->getAddress(); + const RegExp e = addr.getRegExp(); +- const Reg& base = e.getBase(); +- const Reg& idx = e.getIndex(); ++ Reg base = e.getBase(); ++ Reg idx = e.getIndex(); + if (BIT == 64 && addr.is32bit()) db(0x67); + rex = rexRXB(3, r.isREG(64), r, base, idx); + if (r.hasRex2() || addr.hasRex2()) { +@@ -1908,8 +1906,8 @@ private: + } + if (rex || r.isExt8bit()) rex |= 0x40; + } else { +- const Reg& r1 = static_cast(op1); +- const Reg& r2 = static_cast(op2); ++ Reg r1 = static_cast(op1); ++ Reg r2 = static_cast(op2); + // ModRM(reg, base); + rex = rexRXB(3, r1.isREG(64) || r2.isREG(64), r2, r1); + if (r1.hasRex2() || r2.hasRex2()) { +@@ -1980,7 +1978,7 @@ private: + if (type & T_MAP5) return 5; + return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; + } +- void vex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, bool x = false) ++ void vex(Reg reg, Reg base, const Operand *v, uint64_t type, int code, bool x = false) + { + int w = (type & T_W1) ? 1 : 0; + bool is256 = (type & T_L1) ? true : reg.isYMM(); +@@ -1998,12 +1996,12 @@ private: + } + db(code); + } +- void verifySAE(const Reg& r, uint64_t type) const ++ void verifySAE(Reg r, uint64_t type) const + { + if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return; + XBYAK_THROW(ERR_SAE_IS_INVALID) + } +- void verifyER(const Reg& r, uint64_t type) const ++ void verifyER(Reg r, uint64_t type) const + { + if ((type & T_ER_R) && r.isREG(32|64)) return; + if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return; +@@ -2016,7 +2014,7 @@ private: + if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0) + return v; + } +- int evex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, const Reg *x = 0, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false) ++ int evex(Reg reg, Reg base, const Operand *v, uint64_t type, int code, const Reg *x = 0, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false) + { + if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0) + int w = (type & T_EW1) ? 1 : 0; +@@ -2072,7 +2070,7 @@ private: + return disp8N; + } + // evex of Legacy +- void evexLeg(const Reg& r, const Reg& b, const Reg& x, const Reg& v, uint64_t type, int sc = NONE) ++ void evexLeg(Reg r, Reg b, Reg x, Reg v, uint64_t type, int sc = NONE) + { + int M = getMap(type); if (M == 0) M = 4; // legacy + int R3 = !r.isExtIdx(); +@@ -2105,7 +2103,7 @@ private: + } + void setSIB(const Address& addr, int reg) + { +- const RegExp& e = addr.getRegExp(); ++ RegExp e = addr.getRegExp(); + const Label *label = e.getLabel(); + int disp8N = addr.disp8N; + uint64_t disp64 = e.getDisp(); +@@ -2121,8 +2119,8 @@ private: + #endif + #endif + uint32_t disp = static_cast(disp64); +- const Reg& base = e.getBase(); +- const Reg& index = e.getIndex(); ++ Reg base = e.getBase(); ++ Reg index = e.getIndex(); + const int baseIdx = base.getIdx(); + const int baseBit = base.getBit(); + const int indexBit = index.getBit(); +@@ -2175,7 +2173,7 @@ private: + } + } + LabelManager labelMgr_; +- void writeCode(uint64_t type, const Reg& r, int code, bool rex2 = false) ++ void writeCode(uint64_t type, Reg r, int code, bool rex2 = false) + { + if (!(type&T_APX || rex2)) { + if (type & T_0F) { +@@ -2188,7 +2186,7 @@ private: + } + db(code | (((type & T_SENTRY) == 0 || (type & T_CODE1_IF1)) && !r.isBit(8))); + } +- void opRR(const Reg& r1, const Reg& r2, uint64_t type, int code) ++ void opRR(Reg r1, Reg r2, uint64_t type, int code) + { + if (!(type & T_ALLOW_DIFF_SIZE) && r1.isREG() && r2.isREG() && r1.getBit() != r2.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + if (!(type & T_ALLOW_ABCDH) && (isBadCombination(r1, r2) || isBadCombination(r2, r1))) XBYAK_THROW(ERR_CANT_USE_ABCDH) +@@ -2196,7 +2194,7 @@ private: + writeCode(type, r1, code, rex2); + setModRM(3, r1.getIdx(), r2.getIdx()); + } +- void opMR(const Address& addr, const Reg& r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE) ++ void opMR(const Address& addr, Reg r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE) + { + if (code2 == NONE) code2 = code; + if (type2 && opROO(Reg(), addr, r, type2, code2)) return; +@@ -2208,7 +2206,7 @@ private: + writeCode(type, r, code, rex2); + opAddr(addr, r.getIdx()); + } +- void opLoadSeg(const Address& addr, const Reg& reg, uint64_t type, int code) ++ void opLoadSeg(const Address& addr, Reg reg, uint64_t type, int code) + { + if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) +@@ -2219,7 +2217,7 @@ private: + opAddr(addr, reg.getIdx()); + } + // for only MPX(bnd*) +- void opMIB(const Address& addr, const Reg& reg, uint64_t type, int code) ++ void opMIB(const Address& addr, Reg reg, uint64_t type, int code) + { + if (addr.getMode() != inner::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS) + opMR(addr.cloneNoOptimize(), reg, type, code); +@@ -2307,14 +2305,14 @@ private: + } + } + } +- void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&) = 0, int imm8 = NONE) ++ void opSSE(Reg r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&) = 0, int imm8 = NONE) + { + if (isValid && !isValid(r, op)) XBYAK_THROW(ERR_BAD_COMBINATION) + if (!isValidSSE(r) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED) + opRO(r, op, type, code, true, (imm8 != NONE) ? 1 : 0); + if (imm8 != NONE) db(imm8); + } +- void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext) ++ void opMMX_IMM(Mmx mmx, int imm8, int code, int ext) + { + if (!isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED) + uint64_t type = T_0F; +@@ -2322,7 +2320,7 @@ private: + opRR(Reg32(ext), mmx, type, code); + db(imm8); + } +- void opMMX(const Mmx& mmx, const Operand& op, int code, uint64_t type = T_0F, uint64_t pref = T_66, int imm8 = NONE) ++ void opMMX(Mmx mmx, const Operand& op, int code, uint64_t type = T_0F, uint64_t pref = T_66, int imm8 = NONE) + { + if (mmx.isXMM()) type |= pref; + opSSE(mmx, op, type, code, isXMMorMMX_MEM, imm8); +@@ -2339,7 +2337,7 @@ private: + } + } + // pextr{w,b,d}, extractps +- void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false) ++ void opExt(const Operand& op, Mmx mmx, int code, int imm, bool hasMMX2 = false) + { + if (!isValidSSE(op) || !isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED) + if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */ +@@ -2350,21 +2348,21 @@ private: + } + } + // r1 is [abcd]h and r2 is reg with rex +- bool isBadCombination(const Reg& r1, const Reg& r2) const ++ bool isBadCombination(Reg r1, Reg r2) const + { + if (!r1.isHigh8bit()) return false; + if (r2.isExt8bit() || r2.getIdx() >= 8) return true; + return false; + } + // (r, r, m) or (r, m, r) +- bool opROO(const Reg& d, const Operand& op1, const Operand& op2, uint64_t type, int code, int immSize = 0, int sc = NONE) ++ bool opROO(Reg d, const Operand& op1, const Operand& op2, uint64_t type, int code, int immSize = 0, int sc = NONE) + { + if (!(type & T_MUST_EVEX) && !d.isREG() && !(d.hasRex2NFZU() || op1.hasRex2NFZU() || op2.hasRex2NFZU())) return false; + const Operand *p1 = &op1, *p2 = &op2; + if (p1->isMEM()) { std::swap(p1, p2); } else { if (p2->isMEM()) code |= 2; } + if (p1->isMEM()) XBYAK_THROW_RET(ERR_BAD_COMBINATION, false) + if (p2->isMEM()) { +- const Reg& r = *static_cast(p1); ++ Reg r = *static_cast(p1); + Address addr = p2->getAddress(); + const RegExp e = addr.getRegExp(); + evexLeg(r, e.getBase(), e.getIndex(), d, type, sc); +@@ -2372,7 +2370,7 @@ private: + addr.immSize = immSize; + opAddr(addr, r.getIdx()); + } else { +- evexLeg(static_cast(op2), static_cast(op1), Reg(), d, type, sc); ++ evexLeg(static_cast(op2), static_cast(op1), Reg(), d, type, sc); + writeCode(type, d, code); + setModRM(3, op2.getIdx(), op1.getIdx()); + } +@@ -2405,7 +2403,7 @@ private: + opRext(op, 0, ext&7, type, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), false, (imm != 1) ? 1 : 0, d); + if (imm != 1) db(imm); + } +- void opShift(const Operand& op, const Reg8& _cl, int ext, const Reg *d = 0) ++ void opShift(const Operand& op, Reg8 _cl, int ext, const Reg *d = 0) + { + if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) + if (d && op.getBit() != 0 && d->getBit() != op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) +@@ -2413,7 +2411,7 @@ private: + opRext(op, 0, ext&7, type, 0xD2, false, 0, d); + } + // condR assumes that op.isREG() is true +- void opRO(const Reg& r, const Operand& op, uint64_t type, int code, bool condR = true, int immSize = 0) ++ void opRO(Reg r, const Operand& op, uint64_t type, int code, bool condR = true, int immSize = 0) + { + if (op.isMEM()) { + opMR(op.getAddress(immSize), r, type, code); +@@ -2423,7 +2421,7 @@ private: + XBYAK_THROW(ERR_BAD_COMBINATION) + } + } +- void opShxd(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm, int code, int code2, const Reg8 *_cl = 0) ++ void opShxd(Reg d, const Operand& op, Reg reg, uint8_t imm, int code, int code2, const Reg8 *_cl = 0) + { + if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) + if (!reg.isREG(16|i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) +@@ -2443,7 +2441,7 @@ private: + if (!op1.isREG()) XBYAK_THROW(ERR_BAD_COMBINATION) + opMR(op2.getAddress(), op1.getReg(), 0, code | 2); + } else { +- opRO(static_cast(op2), op1, 0, code, op1.getKind() == op2.getKind()); ++ opRO(static_cast(op2), op1, 0, code, op1.getKind() == op2.getKind()); + } + } + bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } +@@ -2472,14 +2470,14 @@ private: + db(imm, immBit / 8); + } + // (r, r/m, imm) +- void opROI(const Reg& d, const Operand& op, uint32_t imm, uint64_t type, int ext) ++ void opROI(Reg d, const Operand& op, uint32_t imm, uint64_t type, int ext) + { + uint32_t immBit = getImmBit(d, imm); + int code = immBit < (std::min)(d.getBit(), 32U) ? 2 : 0; + opROO(d, op, Reg(ext, Operand::REG, d.getBit()), type, 0x80 | code, immBit / 8); + db(imm, immBit / 8); + } +- void opIncDec(const Reg& d, const Operand& op, int ext) ++ void opIncDec(Reg d, const Operand& op, int ext) + { + #ifdef XBYAK64 + if (d.isREG()) { +@@ -2504,7 +2502,7 @@ private: + void opPushPop(const Operand& op, int code, int ext, int alt) + { + if (op.isREG() && op.hasRex2()) { +- const Reg& r = static_cast(op); ++ Reg r = static_cast(op); + rex2(0, rexRXB(3, 0, Reg(), r), Reg(), r); + db(alt | (r.getIdx() & 7)); + return; +@@ -2531,7 +2529,7 @@ private: + /* + mov(r, imm) = db(imm, mov_imm(r, imm)) + */ +- int mov_imm(const Reg& reg, uint64_t imm) ++ int mov_imm(Reg reg, uint64_t imm) + { + int bit = reg.getBit(); + const int idx = reg.getIdx(); +@@ -2571,7 +2569,7 @@ private: + JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp); + labelMgr_.addUndefinedLabel(label, jmp); + } +- void opMovxx(const Reg& reg, const Operand& op, uint8_t code) ++ void opMovxx(Reg reg, const Operand& op, uint8_t code) + { + if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) + int w = op.isBit(16); +@@ -2590,24 +2588,24 @@ private: + } + // use code1 if reg1 == st0 + // use code2 if reg1 != st0 && reg2 == st0 +- void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2) ++ void opFpuFpu(Fpu reg1, Fpu reg2, uint32_t code1, uint32_t code2) + { + uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; + if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION) + db(uint8_t(code >> 8)); + db(uint8_t(code | (reg1.getIdx() | reg2.getIdx()))); + } +- void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2) ++ void opFpu(Fpu reg, uint8_t code1, uint8_t code2) + { + db(code1); db(code2 | reg.getIdx()); + } +- void opVex(const Reg& r, const Operand *p1, const Operand& op2, uint64_t type, int code, int imm8 = NONE) ++ void opVex(Reg r, const Operand *p1, const Operand& op2, uint64_t type, int code, int imm8 = NONE) + { + if (op2.isMEM()) { + Address addr = op2.getAddress(); +- const RegExp& regExp = addr.getRegExp(); +- const Reg& base = regExp.getBase(); +- const Reg& index = regExp.getIndex(); ++ RegExp regExp = addr.getRegExp(); ++ Reg base = regExp.getBase(); ++ Reg index = regExp.getIndex(); + if (BIT == 64 && addr.is32bit()) db(0x67); + int disp8N = 0; + if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx() || addr.hasRex2()) { +@@ -2628,7 +2626,7 @@ private: + if (imm8 != NONE) addr.immSize = 1; + opAddr(addr, r.getIdx()); + } else { +- const Reg& base = op2.getReg(); ++ Reg base = op2.getReg(); + if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) { + evex(r, base, p1, type, code); + } else { +@@ -2640,7 +2638,7 @@ private: + } + // (r, r, r/m) + // opRRO(a, b, c) == opROO(b, c, a) +- void opRRO(const Reg& d, const Reg& r1, const Operand& op2, uint64_t type, uint8_t code, int imm8 = NONE) ++ void opRRO(Reg d, Reg r1, const Operand& op2, uint64_t type, uint8_t code, int imm8 = NONE) + { + const unsigned int bit = d.getBit(); + if (r1.getBit() != bit || (op2.isREG() && op2.getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION) +@@ -2652,7 +2650,7 @@ private: + opVex(d, &r1, op2, type, code, imm8); + } + } +- void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, uint64_t type, int code, int imm8 = NONE) ++ void opAVX_X_X_XM(Xmm x1, const Operand& op1, const Operand& op2, uint64_t type, int code, int imm8 = NONE) + { + const Xmm *x2 = static_cast(&op1); + const Operand *op = &op2; +@@ -2664,7 +2662,7 @@ private: + if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION) + opVex(x1, x2, *op, type, code, imm8); + } +- void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, uint64_t type, int code, int imm8 = NONE) ++ void opAVX_K_X_XM(const Opmask& k, Xmm x2, const Operand& op3, uint64_t type, int code, int imm8 = NONE) + { + if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION) + opVex(k, &x2, op3, type, code, imm8); +@@ -2675,21 +2673,21 @@ private: + if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) + } + // (x, x/m), (x, y/m256), (y, z/m) +- void checkCvt2(const Xmm& x, const Operand& op) const ++ void checkCvt2(Xmm x, const Operand& op) const + { + if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) + } +- void opCvt(const Xmm& x, const Operand& op, uint64_t type, int code) ++ void opCvt(Xmm x, const Operand& op, uint64_t type, int code) + { + Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM; + opVex(x.copyAndSetKind(kind), &xm0, op, type, code); + } +- void opCvt2(const Xmm& x, const Operand& op, uint64_t type, int code) ++ void opCvt2(Xmm x, const Operand& op, uint64_t type, int code) + { + checkCvt2(x, op); + opCvt(x, op, type, code); + } +- void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, uint64_t type64, uint64_t type32, uint8_t code) ++ void opCvt3(Xmm x1, Xmm x2, const Operand& op, uint64_t type, uint64_t type64, uint64_t type32, uint8_t code) + { + if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + Xmm x(op.getIdx()); +@@ -2697,19 +2695,19 @@ private: + opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code); + } + // (x, x/y/xword/yword), (y, z/m) +- void checkCvt4(const Xmm& x, const Operand& op) const ++ void checkCvt4(Xmm x, const Operand& op) const + { + if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) + } + // (x, x/y/z/xword/yword/zword) +- void opCvt5(const Xmm& x, const Operand& op, uint64_t type, int code) ++ void opCvt5(Xmm x, const Operand& op, uint64_t type, int code) + { + if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION) + Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM; + opVex(x.copyAndSetKind(kind), &xm0, op, type, code); + } + // (x, x, x/m), (x, y, y/m), (y, z, z/m) +- void opCvt6(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code) ++ void opCvt6(Xmm x1, Xmm x2, const Operand& op, uint64_t type, int code) + { + int b1 = x1.getBit(); + int b2 = x2.getBit(); +@@ -2720,25 +2718,25 @@ private: + } + XBYAK_THROW(ERR_BAD_COMBINATION); + } +- const Xmm& cvtIdx0(const Operand& x) const ++ Xmm cvtIdx0(const Operand& x) const + { + return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0; + } + // support (x, x/m, imm), (y, y/m, imm) +- void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, uint64_t type, int code, int imm8 = NONE) ++ void opAVX_X_XM_IMM(Xmm x, const Operand& op, uint64_t type, int code, int imm8 = NONE) + { + opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8); + } +- void opCnt(const Reg& reg, const Operand& op, uint8_t code) ++ void opCnt(Reg reg, const Operand& op, uint8_t code) + { + if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); + if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) + opRO(reg, op, T_F3 | T_0F, code); + } +- void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, uint64_t type, uint8_t code, int mode) ++ void opGather(Xmm x1, const Address& addr, Xmm x2, uint64_t type, uint8_t code, int mode) + { +- const RegExp& regExp = addr.getRegExp(); ++ RegExp regExp = addr.getRegExp(); + if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + const int y_vx_y = 0; + const int y_vy_y = 1; +@@ -2766,7 +2764,7 @@ private: + xx_yx_zy = 1, + xx_xy_yz = 2 + }; +- void checkGather2(const Xmm& x1, const Reg& x2, int mode) const ++ void checkGather2(Xmm x1, Reg x2, int mode) const + { + if (x1.isXMM() && x2.isXMM()) return; + switch (mode) { +@@ -2779,10 +2777,10 @@ private: + } + XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + } +- void opGather2(const Xmm& x, const Address& addr, uint64_t type, uint8_t code, int mode) ++ void opGather2(Xmm x, const Address& addr, uint64_t type, uint8_t code, int mode) + { + if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) +- const RegExp& regExp = addr.getRegExp(); ++ RegExp regExp = addr.getRegExp(); + checkGather2(x, regExp.getIndex(), mode); + int maskIdx = x.getOpmaskIdx(); + if ((type & T_M_K) && addr.getOpmaskIdx()) maskIdx = addr.getOpmaskIdx(); +@@ -2794,7 +2792,7 @@ private: + xx_xy_yz ; mode = true + xx_xy_xz ; mode = false + */ +- void opVmov(const Operand& op, const Xmm& x, uint64_t type, uint8_t code, bool mode) ++ void opVmov(const Operand& op, Xmm x, uint64_t type, uint8_t code, bool mode) + { + if (mode) { + if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION) +@@ -2803,13 +2801,13 @@ private: + } + opVex(x, 0, op, type, code); + } +- void opGatherFetch(const Address& addr, const Xmm& x, uint64_t type, uint8_t code, Operand::Kind kind) ++ void opGatherFetch(const Address& addr, Xmm x, uint64_t type, uint8_t code, Operand::Kind kind) + { + if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) + if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + opVex(x, 0, addr, type, code); + } +- void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0) ++ void opEncoding(Xmm x1, Xmm x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0) + { + opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm); + } +@@ -2828,7 +2826,7 @@ private: + enc = getEncoding(enc, sel); + return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex); + } +- void opInOut(const Reg& a, const Reg& d, uint8_t code) ++ void opInOut(Reg a, Reg d, uint8_t code) + { + if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) { + switch (a.getBit()) { +@@ -2839,7 +2837,7 @@ private: + } + XBYAK_THROW(ERR_BAD_COMBINATION) + } +- void opInOut(const Reg& a, uint8_t code, uint8_t v) ++ void opInOut(Reg a, uint8_t code, uint8_t v) + { + if (a.getIdx() == Operand::AL) { + switch (a.getBit()) { +@@ -2873,7 +2871,7 @@ private: + opROO(Reg(15 - dfv, Operand::REG, opBit), op, Reg(0, Operand::REG, opBit), T_APX|T_CODE1_IF1, 0xF6, immBit / 8, sc); + db(imm, immBit / 8); + } +- void opCfcmov(const Reg& d, const Operand& op1, const Operand& op2, int code) ++ void opCfcmov(Reg d, const Operand& op1, const Operand& op2, int code) + { + const int dBit = d.getBit(); + const int op2Bit = op2.getBit(); +@@ -2884,7 +2882,7 @@ private: + uint64_t type = dBit > 0 ? (T_MUST_EVEX|T_NF) : T_MUST_EVEX; + opROO(d, op2, op1, type, code); + } else { +- opROO(d, op1, static_cast(op2)|T_nf, T_MUST_EVEX|T_NF, code); ++ opROO(d, op1, static_cast(op2)|T_nf, T_MUST_EVEX|T_NF, code); + } + } + #ifdef XBYAK64 +@@ -2922,9 +2920,9 @@ private: + const Operand *p1 = &k, *p2 = &op; + if (code == 0x93) { std::swap(p1, p2); } + if (opROO(Reg(), *p2, *p1, T_APX|type, code)) return; +- opVex(static_cast(*p1), 0, *p2, type, code); ++ opVex(static_cast(*p1), 0, *p2, type, code); + } +- void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2) ++ void opEncodeKey(Reg32 r1, Reg32 r2, uint8_t code1, uint8_t code2) + { + if (r1.getIdx() < 8 && r2.getIdx() < 8) { + db(0xF3); db(0x0F); db(0x38); db(code1); setModRM(3, r1.getIdx(), r2.getIdx()); +@@ -2932,7 +2930,7 @@ private: + } + opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2); + } +- void opSSE_APX(const Xmm& x, const Operand& op, uint64_t type1, uint8_t code1, uint64_t type2, uint8_t code2, int imm = NONE) ++ void opSSE_APX(Xmm x, const Operand& op, uint64_t type1, uint8_t code1, uint64_t type2, uint8_t code2, int imm = NONE) + { + if (x.getIdx() <= 15 && op.hasRex2() && opROO(Reg(), op, x, type2, code2, imm != NONE ? 1 : 0)) { + if (imm != NONE) db(imm); +@@ -2972,9 +2970,9 @@ public: + const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; +- const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; +- const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; +- const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; ++ Xmm xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; ++ Ymm ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; ++ Zmm zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; + const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi; + const Reg16 ax, cx, dx, bx, sp, bp, si, di; + const Reg8 al, cl, dl, bl, ah, ch, dh, bh; +@@ -3007,15 +3005,15 @@ public: + const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23; + const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31; + const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7; +- const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience +- const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23; +- const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31; +- const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15; +- const Ymm &ym16, &ym17, &ym18, &ym19, &ym20, &ym21, &ym22, &ym23; +- const Ymm &ym24, &ym25, &ym26, &ym27, &ym28, &ym29, &ym30, &ym31; +- const Zmm &zm8, &zm9, &zm10, &zm11, &zm12, &zm13, &zm14, &zm15; +- const Zmm &zm16, &zm17, &zm18, &zm19, &zm20, &zm21, &zm22, &zm23; +- const Zmm &zm24, &zm25, &zm26, &zm27, &zm28, &zm29, &zm30, &zm31; ++ Xmm xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience ++ Xmm xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23; ++ Xmm xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31; ++ Ymm ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15; ++ Ymm ym16, &ym17, &ym18, &ym19, &ym20, &ym21, &ym22, &ym23; ++ Ymm ym24, &ym25, &ym26, &ym27, &ym28, &ym29, &ym30, &ym31; ++ Zmm zm8, &zm9, &zm10, &zm11, &zm12, &zm13, &zm14, &zm15; ++ Zmm zm16, &zm17, &zm18, &zm19, &zm20, &zm21, &zm22, &zm23; ++ Zmm zm24, &zm25, &zm26, &zm27, &zm28, &zm29, &zm30, &zm31; + const RegRip rip; + #endif + #ifndef XBYAK_DISABLE_SEGMENT +@@ -3064,7 +3062,7 @@ public: + #endif + void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); } + +- void test(const Operand& op, const Reg& reg) ++ void test(const Operand& op, Reg reg) + { + opRO(reg, op, 0, 0x84, op.getKind() == reg.getKind()); + } +@@ -3080,7 +3078,7 @@ public: + } + db(imm, immSize); + } +- void imul(const Reg& reg, const Operand& op, int imm) ++ void imul(Reg reg, const Operand& op, int imm) + { + int s = inner::IsInDisp8(imm) ? 1 : 0; + int immSize = s ? 1 : reg.isREG(16) ? 2 : 4; +@@ -3203,7 +3201,7 @@ public: + return; + } + if (p1->isREG() && p2->isREG()) std::swap(p1, p2); // adapt to NASM 2.16.03 behavior to pass tests +- opRO(static_cast(*p1), *p2, 0, 0x86 | (p1->isBit(8) ? 0 : 1), (p1->isREG() && (p1->getBit() == p2->getBit()))); ++ opRO(static_cast(*p1), *p2, 0, 0x86 | (p1->isBit(8) ? 0 : 1), (p1->isREG() && (p1->getBit() == p2->getBit()))); + } + + #ifndef XBYAK_DISABLE_SEGMENT +@@ -3370,7 +3368,7 @@ public: + defaultEncoding_[1] = enc; + } + +- void bswap(const Reg32e& r) ++ void bswap(Reg32e r) + { + int idx = r.getIdx(); + uint8_t rex = (r.isREG(64) ? 8 : 0) | ((idx & 8) ? 1 : 0); diff --git a/.patch/xbyak/0002-address-frame-ctor.patch b/.patch/xbyak/0002-address-frame-ctor.patch deleted file mode 100644 index 43892c9940..0000000000 --- a/.patch/xbyak/0002-address-frame-ctor.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h -index 176ee58..b44d62a 100644 ---- a/xbyak/xbyak.h -+++ b/xbyak/xbyak.h -@@ -1467,8 +1467,6 @@ inline XBYAK_CONSTEXPR bool Operand::hasRex2() const - } - - class AddressFrame { -- void operator=(const AddressFrame&); -- AddressFrame(const AddressFrame&); - public: - const uint32_t bit_; - const bool broadcast_; diff --git a/externals/cpmfile.json b/externals/cpmfile.json index 7f4e552159..9d292daff0 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -63,8 +63,7 @@ "bundled": true, "skip_updates": true, "patches": [ - "0001-rvalue-optimize.patch", - "0002-address-frame-ctor.patch" + "0001-rvalue-optimize.patch" ] }, "oaknut": { diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index e003380832..45f4d3fdce 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -384,7 +384,7 @@ endif() if (CMAKE_SYSTEM_NAME STREQUAL "Windows") target_compile_definitions(dynarmic PRIVATE FMT_USE_WINDOWS_H=0) endif() -if (xbyak_ADDED) +if (NOT DEFINED xbyak_ADDED) target_compile_definitions(dynarmic PRIVATE XBYAK_BUNDLED=1) endif() target_compile_definitions(dynarmic PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)