From 17a10ca9a3c0c4035825861e9bb7d319713dc3ea Mon Sep 17 00:00:00 2001 From: Nikita Edel Date: Sun, 8 Mar 2026 22:39:41 +0100 Subject: [PATCH 1/3] fix wstring_to_utf8 --- .../Platform/Linux/Linux_Minecraft.cpp | 44 +++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp b/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp index a9722780b..8e31ac885 100644 --- a/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp +++ b/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp @@ -991,15 +991,53 @@ Vec3::resetPool(); std::vector vRichPresenceStrings; // convert std::wstring to UTF-8 string +// wchar_t is 32bit on all Linux systems, and interpreted as UTF-32 +// the code base stores all strings internally as UCS-2 (16bit, subset of UTF-16), +// which, scince it only stores BMP code points, is trivially convertable +// to UTF-32 as well as UTF-16. hence this parser simply parses UTF-32 + +// all implementations of libc (including glibc, musl, uClibc...) implement +// wchar_t as 4byte/32bit (scince around 1999), it would break the libc ABI, +// if this ever will get changed, hence this assert +static_assert( sizeof(wchar_t) == 4, "Linux with non 32bit wchar_t") + std::string wstring_to_utf8 (const std::wstring& str) { - std::wstring_convert> myconv; - return myconv.to_bytes(str); + std::string result; + // generous pre allocation. will never need to resize. + // it well get destructed instantly in the function that it gets called + result.reserve(str.size() * 4); + + for (size_t i = 0; i < str.size(); ++i) { + uint32_t cp = static_cast(str[i]); + + // outside of valid unicode range or preserved UTF-16LE surrogate pair + if (cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) { + cp = 0xFFFD; // unicode replacement character + } + + if (cp < 0x80) { + // ASCII + result += static_cast(cp); + // extract multibyte unicode into multiple bytes of UTF-8 + } else if (cp < 0x800) { + result += static_cast(0xC0 | (cp >> 6)); + result += static_cast(0x80 | (cp & 0x3F)); + } else if (cp < 0x10000) { + result += static_cast(0xE0 | (cp >> 12)); + result += static_cast(0x80 | ((cp >> 6) & 0x3F)); + result += static_cast(0x80 | (cp & 0x3F)); + } else { + result += static_cast(0xF0 | (cp >> 18)); + result += static_cast(0x80 | ((cp >> 12) & 0x3F)); + result += static_cast(0x80 | ((cp >> 6) & 0x3F)); + result += static_cast(0x80 | (cp & 0x3F)); + } + } } uint8_t *mallocAndCreateUTF8ArrayFromString(int iID) { - int result; LPCWSTR wchString=app.GetString(iID); std::wstring srcString = wchString; From 85df07e232e182702881d374a8dcf5677fdd95c7 Mon Sep 17 00:00:00 2001 From: Nikita Edel Date: Sun, 8 Mar 2026 22:59:29 +0100 Subject: [PATCH 2/3] fixed typos --- Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp b/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp index 8e31ac885..8399e4ed2 100644 --- a/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp +++ b/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp @@ -999,19 +999,20 @@ std::vector vRichPresenceStrings; // all implementations of libc (including glibc, musl, uClibc...) implement // wchar_t as 4byte/32bit (scince around 1999), it would break the libc ABI, // if this ever will get changed, hence this assert -static_assert( sizeof(wchar_t) == 4, "Linux with non 32bit wchar_t") +static_assert( sizeof(wchar_t) == 4, "Linux with non 32bit wchar_t"); std::string wstring_to_utf8 (const std::wstring& str) { std::string result; - // generous pre allocation. will never need to resize. - // it well get destructed instantly in the function that it gets called + // preallocation, so it will never need to resize. + // same allocation size as for the 4byte wstring representation. + // it well get destructed instantly, in the function that it gets called from result.reserve(str.size() * 4); for (size_t i = 0; i < str.size(); ++i) { uint32_t cp = static_cast(str[i]); - // outside of valid unicode range or preserved UTF-16LE surrogate pair + // outside of valid unicode range or preserved UTF-16 surrogate pairs (just in case) if (cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) { cp = 0xFFFD; // unicode replacement character } From 1c4aa32b13e0ca92c24f55150a6c33574311c175 Mon Sep 17 00:00:00 2001 From: Nikita Edel Date: Sun, 8 Mar 2026 23:06:52 +0100 Subject: [PATCH 3/3] typo --- Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp b/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp index 8399e4ed2..2779b7c1a 100644 --- a/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp +++ b/Minecraft.Client/Platform/Linux/Linux_Minecraft.cpp @@ -1035,6 +1035,8 @@ std::string wstring_to_utf8 (const std::wstring& str) result += static_cast(0x80 | (cp & 0x3F)); } } + + return result; } uint8_t *mallocAndCreateUTF8ArrayFromString(int iID)