diff --git a/Minecraft.Client/ChatScreen.cpp b/Minecraft.Client/ChatScreen.cpp index d1a6784b..b61fcf0a 100644 --- a/Minecraft.Client/ChatScreen.cpp +++ b/Minecraft.Client/ChatScreen.cpp @@ -6,6 +6,7 @@ #include "..\Minecraft.World\SharedConstants.h" #include "..\Minecraft.World\StringHelpers.h" #include "..\Minecraft.World\ChatPacket.h" +#include "..\Minecraft.World\ArabicShaping.h" const wstring ChatScreen::allowedChars = SharedConstants::acceptableLetters; vector ChatScreen::s_chatHistory; @@ -154,14 +155,21 @@ void ChatScreen::render(int xm, int ym, float a) int x = 4; drawString(font, prefix, x, height - 12, 0xe0e0e0); x += font->width(prefix); - wstring beforeCursor = message.substr(0, cursorIndex); - wstring afterCursor = message.substr(cursorIndex); - drawStringLiteral(font, beforeCursor, x, height - 12, 0xe0e0e0); - x += font->widthLiteral(beforeCursor); + + // Shape the full message as one unit so letter connections and word order + // are correct. Track where the logical cursor maps in the visual string. + int visualCursorPos = 0; + wstring shaped = shapeArabicText(message, cursorIndex, &visualCursorPos); + + // Render the full shaped message without re-shaping it + drawStringPreshaped(font, shaped, x, height - 12, 0xe0e0e0); + + // Place the cursor at the correct visual position + wstring beforeCursorVisual = shaped.substr(0, visualCursorPos); + int cursorX = x + font->widthPreshaped(beforeCursorVisual); if (frame / 6 % 2 == 0) - drawString(font, L"_", x, height - 12, 0xe0e0e0); - x += font->width(L"_"); - drawStringLiteral(font, afterCursor, x, height - 12, 0xe0e0e0); + drawString(font, L"_", cursorX, height - 12, 0xe0e0e0); + Screen::render(xm, ym, a); } diff --git a/Minecraft.Client/Common/UI/UIControl_Base.cpp b/Minecraft.Client/Common/UI/UIControl_Base.cpp index 87c2862f..4d727fa9 100644 --- a/Minecraft.Client/Common/UI/UIControl_Base.cpp +++ b/Minecraft.Client/Common/UI/UIControl_Base.cpp @@ -3,6 +3,7 @@ #include "UIControl.h" #include "..\..\..\Minecraft.World\StringHelpers.h" #include "..\..\..\Minecraft.World\JavaMath.h" +#include "..\..\..\Minecraft.World\ArabicShaping.h" UIControl_Base::UIControl_Base() { @@ -32,13 +33,16 @@ void UIControl_Base::tick() //app.DebugPrintf("Calling SetLabel - '%ls'\n", m_label.c_str()); m_bLabelChanged = false; + // Shape the text before sending to Iggy; m_label stays unshaped for future updates + wstring shaped = shapeArabicText(m_label.getString()); + IggyDataValue result; IggyDataValue value[1]; value[0].type = IGGY_DATATYPE_string_UTF16; IggyStringUTF16 stringVal; - stringVal.string = (IggyUTF16*) m_label.c_str(); - stringVal.length = m_label.length(); + stringVal.string = (IggyUTF16*) shaped.c_str(); + stringVal.length = (int)shaped.length(); value[0].string16 = stringVal; IggyResult out = IggyPlayerCallMethodRS ( m_parentScene->getMovie() , &result, getIggyValuePath() , m_setLabelFunc , 1 , value ); @@ -56,13 +60,16 @@ void UIControl_Base::setLabel(UIString label, bool instant, bool force) { m_bLabelChanged = false; + // Shape the text before sending to Iggy; m_label stays unshaped for future updates + wstring shaped = shapeArabicText(m_label.getString()); + IggyDataValue result; IggyDataValue value[1]; value[0].type = IGGY_DATATYPE_string_UTF16; IggyStringUTF16 stringVal; - stringVal.string = (IggyUTF16*)m_label.c_str(); - stringVal.length = m_label.length(); + stringVal.string = (IggyUTF16*) shaped.c_str(); + stringVal.length = (int)shaped.length(); value[0].string16 = stringVal; IggyResult out = IggyPlayerCallMethodRS ( m_parentScene->getMovie() , &result, getIggyValuePath() , m_setLabelFunc , 1 , value ); diff --git a/Minecraft.Client/Common/UI/UIControl_Label.cpp b/Minecraft.Client/Common/UI/UIControl_Label.cpp index 47374d21..21666d49 100644 --- a/Minecraft.Client/Common/UI/UIControl_Label.cpp +++ b/Minecraft.Client/Common/UI/UIControl_Label.cpp @@ -2,6 +2,7 @@ #include "UI.h" #include "UIControl_Label.h" #include "..\..\..\Minecraft.World\StringHelpers.h" +#include "..\..\..\Minecraft.World\ArabicShaping.h" UIControl_Label::UIControl_Label() { @@ -22,13 +23,15 @@ void UIControl_Label::init(UIString label) { m_label = label; + wstring shaped = shapeArabicText(m_label.getString()); + IggyDataValue result; IggyDataValue value[1]; value[0].type = IGGY_DATATYPE_string_UTF16; IggyStringUTF16 stringVal; - stringVal.string = (IggyUTF16*)label.c_str(); - stringVal.length = label.length(); + stringVal.string = (IggyUTF16*)shaped.c_str(); + stringVal.length = (int)shaped.length(); value[0].string16 = stringVal; IggyResult out = IggyPlayerCallMethodRS ( m_parentScene->getMovie() , &result, getIggyValuePath() , m_initFunc , 1 , value ); } diff --git a/Minecraft.Client/Common/UI/UIControl_SaveList.cpp b/Minecraft.Client/Common/UI/UIControl_SaveList.cpp index 5ae9c8f0..00aaa93a 100644 --- a/Minecraft.Client/Common/UI/UIControl_SaveList.cpp +++ b/Minecraft.Client/Common/UI/UIControl_SaveList.cpp @@ -1,6 +1,7 @@ #include "stdafx.h" #include "UI.h" #include "UIControl_SaveList.h" +#include "..\..\..\Minecraft.World\ArabicShaping.h" bool UIControl_SaveList::setupControl(UIScene *scene, IggyValuePath *parent, const string &controlName) { @@ -69,12 +70,14 @@ void UIControl_SaveList::addItem(const string &label, const wstring &iconName, i void UIControl_SaveList::addItem(const wstring &label, const wstring &iconName, int data) { + wstring shaped = shapeArabicText(label); + IggyDataValue result; IggyDataValue value[3]; IggyStringUTF16 stringVal; - stringVal.string = (IggyUTF16*)label.c_str(); - stringVal.length = static_cast(label.length()); + stringVal.string = (IggyUTF16*)shaped.c_str(); + stringVal.length = static_cast(shaped.length()); value[0].type = IGGY_DATATYPE_string_UTF16; value[0].string16 = stringVal; diff --git a/Minecraft.Client/Common/UI/UIScene_LoadMenu.cpp b/Minecraft.Client/Common/UI/UIScene_LoadMenu.cpp index b6d9ffe4..7a560550 100644 --- a/Minecraft.Client/Common/UI/UIScene_LoadMenu.cpp +++ b/Minecraft.Client/Common/UI/UIScene_LoadMenu.cpp @@ -255,7 +255,7 @@ UIScene_LoadMenu::UIScene_LoadMenu(int iPad, void *initData, UILayer *parentLaye { wchar_t wSaveName[128]; ZeroMemory(wSaveName, sizeof(wSaveName)); - mbstowcs(wSaveName, params->saveDetails->UTF8SaveName, 127); + MultiByteToWideChar(CP_UTF8, 0, params->saveDetails->UTF8SaveName, -1, wSaveName, 127); m_levelName = wstring(wSaveName); m_labelGameName.init(m_levelName); } diff --git a/Minecraft.Client/Common/UI/UIScene_LoadOrJoinMenu.cpp b/Minecraft.Client/Common/UI/UIScene_LoadOrJoinMenu.cpp index 88f042b6..23a64227 100644 --- a/Minecraft.Client/Common/UI/UIScene_LoadOrJoinMenu.cpp +++ b/Minecraft.Client/Common/UI/UIScene_LoadOrJoinMenu.cpp @@ -49,7 +49,7 @@ static wstring ReadLevelNameFromSaveFile(const wstring& filePath, bool *outHardc if (len > 0) { wchar_t wbuf[128] = {}; - mbstowcs(wbuf, buf, 127); + MultiByteToWideChar(CP_UTF8, 0, buf, -1, wbuf, 127); return wstring(wbuf); } } @@ -785,8 +785,11 @@ void UIScene_LoadOrJoinMenu::tick() if (!levelName.empty()) { m_buttonListSaves.addItem(levelName, wstring(L"")); - wcstombs(m_saveDetails[i].UTF8SaveName, levelName.c_str(), 127); - m_saveDetails[i].UTF8SaveName[127] = '\0'; + { + int n = WideCharToMultiByte(CP_UTF8, 0, levelName.c_str(), -1, m_saveDetails[i].UTF8SaveName, 127, nullptr, nullptr); + if (n <= 0) m_saveDetails[i].UTF8SaveName[0] = '\0'; + m_saveDetails[i].UTF8SaveName[127] = '\0'; + } } else { @@ -1431,9 +1434,9 @@ int UIScene_LoadOrJoinMenu::KeyboardCompleteWorldNameCallback(LPVOID lpParam,boo for (int k = 0; k < 127 && ui16Text[k]; k++) wNewName[k] = static_cast(ui16Text[k]); - // Convert to narrow for storage and in-memory update - char narrowName[128] = {}; - wcstombs(narrowName, wNewName, 127); + // Convert to narrow for storage and in-memory update (UTF-8 to preserve Unicode) + char narrowName[256] = {}; + WideCharToMultiByte(CP_UTF8, 0, wNewName, -1, narrowName, 255, nullptr, nullptr); // Build the sidecar path: Windows64\GameHDD\{folder}\worldname.txt wchar_t wFilename[MAX_SAVEFILENAME_LENGTH] = {}; @@ -1449,7 +1452,7 @@ int UIScene_LoadOrJoinMenu::KeyboardCompleteWorldNameCallback(LPVOID lpParam,boo // Update the in-memory display name so the list reflects it immediately strncpy_s(pClass->m_saveDetails[listPos].UTF8SaveName, narrowName, 127); - pClass->m_saveDetails[listPos].UTF8SaveName[127] = '\0'; + pClass->m_saveDetails[listPos].UTF8SaveName[127] = '\0'; // UTF8SaveName is still 128 bytes; narrowName fits as Arabic is <=2 bytes/char in UTF-8 // Reuse the existing callback to trigger the list repopulate UIScene_LoadOrJoinMenu::RenameSaveDataReturned(pClass, true); @@ -2517,7 +2520,7 @@ int UIScene_LoadOrJoinMenu::SaveOptionsDialogReturned(void *pParam,int iPad,C4JS { wchar_t wSaveName[128]; ZeroMemory(wSaveName, 128 * sizeof(wchar_t)); - mbstowcs_s(nullptr, wSaveName, 128, pClass->m_saveDetails[pClass->m_iSaveListIndex - pClass->m_iDefaultButtonsC].UTF8SaveName, _TRUNCATE); + MultiByteToWideChar(CP_UTF8, 0, pClass->m_saveDetails[pClass->m_iSaveListIndex - pClass->m_iDefaultButtonsC].UTF8SaveName, -1, wSaveName, 127); UIKeyboardInitData kbData; kbData.title = app.GetString(IDS_RENAME_WORLD_TITLE); kbData.defaultText = wSaveName; diff --git a/Minecraft.Client/Common/XUI/XUI_Chat.cpp b/Minecraft.Client/Common/XUI/XUI_Chat.cpp index 64005621..a58f71ec 100644 --- a/Minecraft.Client/Common/XUI/XUI_Chat.cpp +++ b/Minecraft.Client/Common/XUI/XUI_Chat.cpp @@ -2,6 +2,7 @@ #include "XUI_Chat.h" #include "..\..\Minecraft.h" #include "..\..\Gui.h" +#include "..\..\..\Minecraft.World\ArabicShaping.h" HRESULT CScene_Chat::OnInit( XUIMessageInit* pInitData, BOOL& bHandled ) { @@ -29,7 +30,8 @@ HRESULT CScene_Chat::OnTimer( XUIMessageTimer *pXUIMessageTimer, BOOL &bHandled) { m_Backgrounds[i].SetOpacity(opacity); m_Labels[i].SetOpacity(opacity); - m_Labels[i].SetText( pGui->getMessage(m_iPad,i).c_str() ); + wstring shaped = shapeArabicText(pGui->getMessage(m_iPad, i)); + m_Labels[i].SetText( shaped.c_str() ); } else { diff --git a/Minecraft.Client/Common/XUI/XUI_Ctrl_4JList.cpp b/Minecraft.Client/Common/XUI/XUI_Ctrl_4JList.cpp index 7523c523..4b7eb5f6 100644 --- a/Minecraft.Client/Common/XUI/XUI_Ctrl_4JList.cpp +++ b/Minecraft.Client/Common/XUI/XUI_Ctrl_4JList.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "XUI_Ctrl_4JList.h" +#include "..\..\..\Minecraft.World\ArabicShaping.h" static bool TimeSortFn(const void *a, const void *b); @@ -294,8 +295,16 @@ HRESULT CXuiCtrl4JList::OnGetSourceDataText(XUIMessageGetSourceText *pGetSourceT if( ( 0 == pGetSourceTextData->iData ) && ( ( pGetSourceTextData->bItemData ) ) ) { EnterCriticalSection(&m_AccessListData); - pGetSourceTextData->szText = - GetData(pGetSourceTextData->iItem).pwszText; + LPCWSTR rawText = GetData(pGetSourceTextData->iItem).pwszText; + if (rawText) + { + m_shapedTextCache = shapeArabicText(rawText); + pGetSourceTextData->szText = m_shapedTextCache.c_str(); + } + else + { + pGetSourceTextData->szText = rawText; + } LeaveCriticalSection(&m_AccessListData); bHandled = TRUE; } diff --git a/Minecraft.Client/Common/XUI/XUI_Ctrl_4JList.h b/Minecraft.Client/Common/XUI/XUI_Ctrl_4JList.h index 11bdd456..99813ff6 100644 --- a/Minecraft.Client/Common/XUI/XUI_Ctrl_4JList.h +++ b/Minecraft.Client/Common/XUI/XUI_Ctrl_4JList.h @@ -75,4 +75,5 @@ private: static bool IndexSortFn(const void *a, const void *b); HXUIOBJ m_hSelectionChangedHandlerObj; + std::wstring m_shapedTextCache; // temp buffer for Arabic-shaped text in OnGetSourceDataText }; diff --git a/Minecraft.Client/Font.cpp b/Minecraft.Client/Font.cpp index b49328e3..15f6544e 100644 --- a/Minecraft.Client/Font.cpp +++ b/Minecraft.Client/Font.cpp @@ -8,6 +8,7 @@ #include "..\Minecraft.World\net.minecraft.h" #include "..\Minecraft.World\StringHelpers.h" #include "..\Minecraft.World\Random.h" +#include "..\Minecraft.World\ArabicShaping.h" Font::Font(Options *options, const wstring& name, Textures* textures, bool enforceUnicode, ResourceLocation *textureLocation, int cols, int rows, int charWidth, int charHeight, unsigned short charMap[]/* = nullptr */) : textures(textures) { @@ -289,6 +290,74 @@ void Font::drawLiteral(const wstring& str, int x, int y, int color) } } +// Like sanitize() but skips the shapeArabicText() call - for pre-shaped strings. +wstring Font::sanitizePreshaped(const wstring& str) +{ + wstring sb = str; + for (unsigned int i = 0; i < sb.length(); i++) + { + if (CharacterExists(sb[i])) + sb[i] = MapCharacter(sb[i]); + else if (unicodeWidth[sb[i]] != 0) + { + // Leave as-is: raw codepoint for glyph page rendering + } + else + { + sb[i] = 0; + } + } + return sb; +} + +void Font::drawLiteralPreshaped(const wstring& str, int x, int y, int color) +{ + if (str.empty()) return; + if ((color & 0xFC000000) == 0) color |= 0xFF000000; + textures->bindTexture(m_textureLocation); + glColor4f((color >> 16 & 255) / 255.0F, (color >> 8 & 255) / 255.0F, (color & 255) / 255.0F, (color >> 24 & 255) / 255.0F); + xPos = static_cast(x); + yPos = static_cast(y); + wstring cleanStr = sanitizePreshaped(str); + for (size_t i = 0; i < cleanStr.length(); ++i) + { + wchar_t c = cleanStr.at(i); + if (isUnicodeGlyphChar(c)) + { + renderUnicodeCharacter(c); + textures->bindTexture(m_textureLocation); + lastBoundTexture = fontTexture; + } + else + { + renderCharacter(c); + } + } +} + +void Font::drawShadowLiteralPreshaped(const wstring& str, int x, int y, int color) +{ + int shadowColor = (color & 0xFCFCFC) >> 2 | (color & 0xFF000000); + drawLiteralPreshaped(str, x + 1, y + 1, shadowColor); + drawLiteralPreshaped(str, x, y, color); +} + +int Font::widthPreshaped(const wstring& str) +{ + wstring cleanStr = sanitizePreshaped(str); + if (cleanStr.empty()) return 0; + int len = 0; + for (size_t i = 0; i < cleanStr.length(); ++i) + { + wchar_t wc = cleanStr.at(i); + if (isUnicodeGlyphChar(wc)) + len += (int)unicodeCharWidth(wc); + else + len += charWidths[static_cast(wc)]; + } + return len; +} + void Font::drawShadowWordWrap(const wstring &str, int x, int y, int w, int color, int h) { drawWordWrapInternal(str, x + 1, y + 1, w, color, true, h); @@ -473,7 +542,7 @@ int Font::widthLiteral(const wstring& str) wstring Font::sanitize(const wstring& str) { - wstring sb = str; + wstring sb = shapeArabicText(str); for (unsigned int i = 0; i < sb.length(); i++) { diff --git a/Minecraft.Client/Font.h b/Minecraft.Client/Font.h index d711e570..0a50ad22 100644 --- a/Minecraft.Client/Font.h +++ b/Minecraft.Client/Font.h @@ -78,10 +78,14 @@ private: void renderUnicodeCharacter(wchar_t c); float unicodeCharWidth(wchar_t c); bool isUnicodeGlyphChar(wchar_t c); + wstring sanitizePreshaped(const wstring& str); // sanitize without re-shaping Arabic + void drawLiteralPreshaped(const wstring& str, int x, int y, int color); public: int width(const wstring& str); int widthLiteral(const wstring& str); // width without skipping ยง codes (for chat input) + int widthPreshaped(const wstring& str); // width of already-shaped text, no re-shaping + void drawShadowLiteralPreshaped(const wstring& str, int x, int y, int color); wstring sanitize(const wstring& str); void drawWordWrap(const wstring &string, int x, int y, int w, int col, int h); // 4J Added h param diff --git a/Minecraft.Client/GuiComponent.cpp b/Minecraft.Client/GuiComponent.cpp index c6b5f518..f104f0b7 100644 --- a/Minecraft.Client/GuiComponent.cpp +++ b/Minecraft.Client/GuiComponent.cpp @@ -110,6 +110,11 @@ void GuiComponent::drawStringLiteral(Font *font, const wstring& str, int x, int font->drawShadowLiteral(str, x, y, color); } +void GuiComponent::drawStringPreshaped(Font *font, const wstring& str, int x, int y, int color) +{ + font->drawShadowLiteralPreshaped(str, x, y, color); +} + void GuiComponent::blit(int x, int y, int sx, int sy, int w, int h) { float us = 1 / 256.0f; diff --git a/Minecraft.Client/GuiComponent.h b/Minecraft.Client/GuiComponent.h index 41496664..8959582e 100644 --- a/Minecraft.Client/GuiComponent.h +++ b/Minecraft.Client/GuiComponent.h @@ -16,5 +16,6 @@ public: void drawCenteredString(Font *font, const wstring& str, int x, int y, int color); void drawString(Font *font, const wstring& str, int x, int y, int color); void drawStringLiteral(Font* font, const wstring& str, int x, int y, int color); + void drawStringPreshaped(Font* font, const wstring& str, int x, int y, int color); void blit(int x, int y, int sx, int sy, int w, int h); }; diff --git a/Minecraft.World/ArabicShaping.cpp b/Minecraft.World/ArabicShaping.cpp new file mode 100644 index 00000000..a5255818 --- /dev/null +++ b/Minecraft.World/ArabicShaping.cpp @@ -0,0 +1,556 @@ +#include "stdafx.h" +#include "ArabicShaping.h" +#include +#include + +// Arabic text shaping - contextual form selection and RTL visual reordering. +// Ported from "Arabic Writer" JS reference by Omar Muhammad (GPL). + +// Each entry: base char, isolated, initial, medial, final forms +struct ArabicCharEntry +{ + wchar_t base; + wchar_t isolated; + wchar_t initial; + wchar_t medial; + wchar_t final_; + bool connectsLeft; + bool connectsRight; +}; + +// Core Arabic + extended (Farsi/Urdu) characters with presentation forms +static const ArabicCharEntry arabicChars[] = +{ + // base isolated initial medial final cL cR + { 0x0621, 0xFE80, 0xFE80, 0xFE80, 0xFE80, false, false }, // hamza + { 0x0622, 0xFE81, 0xFE81, 0xFE82, 0xFE82, false, true }, // alef madda + { 0x0623, 0xFE83, 0xFE83, 0xFE84, 0xFE84, false, true }, // alef hamza above + { 0x0624, 0xFE85, 0xFE85, 0xFE86, 0xFE86, false, true }, // waw hamza + { 0x0625, 0xFE87, 0xFE87, 0xFE88, 0xFE88, false, true }, // alef hamza below + { 0x0626, 0xFE89, 0xFE8B, 0xFE8C, 0xFE8A, true, true }, // yeh hamza + { 0x0627, 0xFE8D, 0xFE8D, 0xFE8E, 0xFE8E, false, true }, // alef + { 0x0628, 0xFE8F, 0xFE91, 0xFE92, 0xFE90, true, true }, // beh + { 0x0629, 0xFE93, 0xFE93, 0xFE94, 0xFE94, false, true }, // teh marbuta + { 0x062A, 0xFE95, 0xFE97, 0xFE98, 0xFE96, true, true }, // teh + { 0x062B, 0xFE99, 0xFE9B, 0xFE9C, 0xFE9A, true, true }, // theh + { 0x062C, 0xFE9D, 0xFE9F, 0xFEA0, 0xFE9E, true, true }, // jeem + { 0x062D, 0xFEA1, 0xFEA3, 0xFEA4, 0xFEA2, true, true }, // hah + { 0x062E, 0xFEA5, 0xFEA7, 0xFEA8, 0xFEA6, true, true }, // khah + { 0x062F, 0xFEA9, 0xFEA9, 0xFEAA, 0xFEAA, false, true }, // dal + { 0x0630, 0xFEAB, 0xFEAB, 0xFEAC, 0xFEAC, false, true }, // thal + { 0x0631, 0xFEAD, 0xFEAD, 0xFEAE, 0xFEAE, false, true }, // reh + { 0x0632, 0xFEAF, 0xFEAF, 0xFEB0, 0xFEB0, false, true }, // zain + { 0x0633, 0xFEB1, 0xFEB3, 0xFEB4, 0xFEB2, true, true }, // seen + { 0x0634, 0xFEB5, 0xFEB7, 0xFEB8, 0xFEB6, true, true }, // sheen + { 0x0635, 0xFEB9, 0xFEBB, 0xFEBC, 0xFEBA, true, true }, // sad + { 0x0636, 0xFEBD, 0xFEBF, 0xFEC0, 0xFEBE, true, true }, // dad + { 0x0637, 0xFEC1, 0xFEC3, 0xFEC4, 0xFEC2, true, true }, // tah + { 0x0638, 0xFEC5, 0xFEC7, 0xFEC8, 0xFEC6, true, true }, // zah + { 0x0639, 0xFEC9, 0xFECB, 0xFECC, 0xFECA, true, true }, // ain + { 0x063A, 0xFECD, 0xFECF, 0xFED0, 0xFECE, true, true }, // ghain + { 0x0640, 0x0640, 0x0640, 0x0640, 0x0640, true, true }, // tatweel + { 0x0641, 0xFED1, 0xFED3, 0xFED4, 0xFED2, true, true }, // feh + { 0x0642, 0xFED5, 0xFED7, 0xFED8, 0xFED6, true, true }, // qaf + { 0x0643, 0xFED9, 0xFEDB, 0xFEDC, 0xFEDA, true, true }, // kaf + { 0x0644, 0xFEDD, 0xFEDF, 0xFEE0, 0xFEDE, true, true }, // lam + { 0x0645, 0xFEE1, 0xFEE3, 0xFEE4, 0xFEE2, true, true }, // meem + { 0x0646, 0xFEE5, 0xFEE7, 0xFEE8, 0xFEE6, true, true }, // noon + { 0x0647, 0xFEE9, 0xFEEB, 0xFEEC, 0xFEEA, true, true }, // heh + { 0x0648, 0xFEED, 0xFEED, 0xFEEE, 0xFEEE, false, true }, // waw + { 0x0649, 0xFEEF, 0xFEEF, 0xFEF0, 0xFEF0, false, true }, // alef maksura + { 0x064A, 0xFEF1, 0xFEF3, 0xFEF4, 0xFEF2, true, true }, // yeh + // Extended - Farsi/Urdu + { 0x067E, 0xFB56, 0xFB58, 0xFB59, 0xFB57, true, true }, // peh + { 0x0686, 0xFB7A, 0xFB7C, 0xFB7D, 0xFB7B, true, true }, // tcheh + { 0x0698, 0xFB8A, 0xFB8A, 0xFB8B, 0xFB8B, false, true }, // jeh + { 0x06A9, 0xFB8E, 0xFB90, 0xFB91, 0xFB8F, true, true }, // keheh (Farsi kaf) + { 0x06AF, 0xFB92, 0xFB94, 0xFB95, 0xFB93, true, true }, // gaf + { 0x06CC, 0xFBFC, 0xFBFE, 0xFBFF, 0xFBFD, true, true }, // Farsi yeh + // Urdu + { 0x0679, 0xFB66, 0xFB68, 0xFB69, 0xFB67, true, true }, // tteh + { 0x0688, 0xFB88, 0xFB88, 0xFB89, 0xFB89, false, true }, // ddal + { 0x0691, 0xFB8C, 0xFB8C, 0xFB8D, 0xFB8D, false, true }, // rreh + { 0x06C1, 0xFBA6, 0xFBA8, 0xFBA9, 0xFBA7, true, true }, // heh goal + { 0x06D2, 0xFBAE, 0xFBAE, 0xFBAF, 0xFBAF, false, true }, // yeh barree +}; + +static const int ARABIC_CHAR_COUNT = sizeof(arabicChars) / sizeof(arabicChars[0]); + +// Laam-Alef ligatures: when lam (0x0644) is followed by certain alef forms +struct LaamAlefEntry +{ + wchar_t alef; // the alef variant + wchar_t isolated; // ligature isolated form + wchar_t final_; // ligature final form +}; + +static const LaamAlefEntry laamAlefTable[] = +{ + { 0x0622, 0xFEF5, 0xFEF6 }, // lam + alef madda + { 0x0623, 0xFEF7, 0xFEF8 }, // lam + alef hamza above + { 0x0625, 0xFEF9, 0xFEFA }, // lam + alef hamza below + { 0x0627, 0xFEFB, 0xFEFC }, // lam + alef +}; + +// Build lookup map on first use +static std::unordered_map charMap; +static bool tablesInitialized = false; + +static void initTables() +{ + if (tablesInitialized) return; + for (int i = 0; i < ARABIC_CHAR_COUNT; i++) + { + charMap[arabicChars[i].base] = &arabicChars[i]; + } + tablesInitialized = true; +} + +static bool isArabicChar(wchar_t c) +{ + return (c >= 0x0600 && c <= 0x06FF) || (c >= 0xFB50 && c <= 0xFDFF) || (c >= 0xFE70 && c <= 0xFEFF); +} + +static bool isHaraka(wchar_t c) +{ + // Arabic diacritics (tashkeel/harakat): U+0610-U+061A, U+064B-U+065F, U+0670 + return (c >= 0x064B && c <= 0x065F) || (c >= 0x0610 && c <= 0x061A) || c == 0x0670; +} + +static bool isLaamAlefLigature(wchar_t c) +{ + // Laam-Alef presentation forms: U+FEF5-U+FEFC + return c >= 0xFEF5 && c <= 0xFEFC; +} + +static bool isDigit(wchar_t c) +{ + // Western digits and Arabic-Indic digits + return (c >= L'0' && c <= L'9') || (c >= 0x0660 && c <= 0x0669); +} + +// Neutral characters that inherit direction from surrounding context +static bool isNeutralChar(wchar_t c) +{ + return c == L' ' + || c == L'.' || c == L',' || c == L'!' || c == L'?' + || c == L':' || c == L';' || c == L'-' || c == L'(' + || c == L')' || c == L'[' || c == L']' + || c == 0x060C // Arabic comma + || c == 0x061B // Arabic semicolon + || c == 0x061F; // Arabic question mark +} + +static const ArabicCharEntry* findEntry(wchar_t c) +{ + auto it = charMap.find(c); + if (it != charMap.end()) return it->second; + return nullptr; +} + +static const LaamAlefEntry* findLaamAlef(wchar_t alef) +{ + for (int i = 0; i < 4; i++) + { + if (laamAlefTable[i].alef == alef) return &laamAlefTable[i]; + } + return nullptr; +} + +// ------------------------------------------------------------------------- +// Core shaping logic, shared by both public overloads. +// +// logicalToVisual: if non-null, maps input[i] -> position in output string. +// Must be pre-sized to input.size()+1. +// ------------------------------------------------------------------------- +static std::wstring shapeArabicTextInternal(const std::wstring& input, + std::vector* logicalToVisual) +{ + if (input.empty()) return input; + + initTables(); + + // Fast path: check if any base Arabic characters exist + bool hasArabic = false; + for (size_t i = 0; i < input.size(); i++) + { + if (input[i] >= 0x0600 && input[i] <= 0x06FF) + { + hasArabic = true; + break; + } + } + if (!hasArabic) + { + // Identity mapping + if (logicalToVisual) + { + for (size_t i = 0; i <= input.size(); i++) + (*logicalToVisual)[i] = (int)i; + } + return input; + } + + // ----------------------------------------------------------------------- + // Split into runs: Arabic vs non-Arabic. + // Track the starting logical index of each run. + // ----------------------------------------------------------------------- + struct Run + { + std::wstring text; + bool arabic; + int logicalStart; // index into input[] where this run begins + }; + std::vector runs; + + size_t i = 0; + while (i < input.size()) + { + bool curArabic = isArabicChar(input[i]) || isHaraka(input[i]); + Run run; + run.arabic = curArabic; + run.logicalStart = (int)i; + while (i < input.size()) + { + bool charArabic = isArabicChar(input[i]) || isHaraka(input[i]); + if (charArabic == curArabic) + { + run.text += input[i]; + i++; + } + else + { + break; + } + } + runs.push_back(run); + } + + // ----------------------------------------------------------------------- + // Merge neutral runs that sit between two Arabic runs into the preceding + // Arabic run (with the following Arabic run appended too). This keeps + // inter-word spaces inside the Arabic run so the whole phrase reverses + // together, producing correct RTL word order. + // ----------------------------------------------------------------------- + for (size_t r = 1; r + 1 < runs.size(); r++) + { + if (!runs[r].arabic && runs[r - 1].arabic && runs[r + 1].arabic) + { + bool allNeutral = true; + for (wchar_t c : runs[r].text) + { + if (!isNeutralChar(c)) { allNeutral = false; break; } + } + if (allNeutral) + { + // Absorb runs[r] and runs[r+1] into runs[r-1] + runs[r - 1].text += runs[r].text + runs[r + 1].text; + runs.erase(runs.begin() + r, runs.begin() + r + 2); + r--; // re-check from same position + } + } + } + + // Recompute logical starts after merging (run text lengths may have grown) + { + int pos = 0; + for (size_t r = 0; r < runs.size(); r++) + { + runs[r].logicalStart = pos; + pos += (int)runs[r].text.size(); + } + } + + // ----------------------------------------------------------------------- + // Shape each Arabic run. + // For each run we also build a posMap: posMap[localLogical] = localVisual + // (local means within the run's text, before run offsets are added). + // ----------------------------------------------------------------------- + std::vector> runPosMap(runs.size()); // per-run local maps + + for (size_t r = 0; r < runs.size(); r++) + { + if (!runs[r].arabic) + { + // Non-Arabic: identity mapping + runPosMap[r].resize(runs[r].text.size() + 1); + for (size_t j = 0; j <= runs[r].text.size(); j++) + runPosMap[r][j] = (int)j; + continue; + } + + std::wstring& text = runs[r].text; + const int textLen = (int)text.size(); + + // Collect base character indices (skip harakat) + std::vector baseIndices; + for (size_t j = 0; j < text.size(); j++) + { + if (!isHaraka(text[j])) + baseIndices.push_back(j); + } + + // ------------------------------------------------------------------ + // Laam-Alef ligatures + // ------------------------------------------------------------------ + std::vector consumed(text.size(), false); + std::vector> ligatures; // lam idx -> ligature char + + for (size_t bi = 0; bi + 1 < baseIndices.size(); bi++) + { + size_t idx = baseIndices[bi]; + size_t nextIdx = baseIndices[bi + 1]; + if (text[idx] == 0x0644) // lam + { + const LaamAlefEntry* la = findLaamAlef(text[nextIdx]); + if (la) + { + bool connectsToPrev = false; + if (bi > 0) + { + size_t prevIdx = baseIndices[bi - 1]; + const ArabicCharEntry* prevEntry = findEntry(text[prevIdx]); + if (prevEntry && prevEntry->connectsLeft) + connectsToPrev = true; + } + wchar_t ligChar = connectsToPrev ? la->final_ : la->isolated; + ligatures.push_back({ idx, ligChar }); + consumed[nextIdx] = true; + bi++; // skip the alef + } + } + } + + // Apply ligature characters + for (size_t li = 0; li < ligatures.size(); li++) + text[ligatures[li].first] = ligatures[li].second; + + // Rebuild base indices after ligature consumption + baseIndices.clear(); + for (size_t j = 0; j < text.size(); j++) + { + if (!isHaraka(text[j]) && !consumed[j]) + baseIndices.push_back(j); + } + + // ------------------------------------------------------------------ + // Contextual form selection + // ------------------------------------------------------------------ + for (size_t bi = 0; bi < baseIndices.size(); bi++) + { + size_t idx = baseIndices[bi]; + const ArabicCharEntry* entry = findEntry(text[idx]); + if (!entry) continue; + + bool prevConnects = false; + if (bi > 0) + { + size_t prevIdx = baseIndices[bi - 1]; + const ArabicCharEntry* prevEntry = findEntry(text[prevIdx]); + if (prevEntry && prevEntry->connectsLeft) + prevConnects = true; + else if (!prevEntry && text[prevIdx] >= 0xFE70) + prevConnects = false; // lam-alef ligature doesn't connect left + } + + bool nextConnects = false; + if (bi + 1 < baseIndices.size()) + { + size_t nextIdx = baseIndices[bi + 1]; + const ArabicCharEntry* nextEntry = findEntry(text[nextIdx]); + if (nextEntry && nextEntry->connectsRight) + nextConnects = true; + else if (!nextEntry && isLaamAlefLigature(text[nextIdx])) + nextConnects = true; // lam-alef ligatures always connect to the right + } + + bool canConnectPrev = prevConnects && entry->connectsRight; + bool canConnectNext = nextConnects && entry->connectsLeft; + + if (canConnectPrev && canConnectNext) text[idx] = entry->medial; + else if (canConnectPrev) text[idx] = entry->final_; + else if (canConnectNext) text[idx] = entry->initial; + else text[idx] = entry->isolated; + } + + // ------------------------------------------------------------------ + // Build shaped string (drop consumed ligature partners) + // and a forward mapping: shapedIdx[logicalPos] = position in shaped + // (for consumed chars, map to the ligature position) + // ------------------------------------------------------------------ + std::vector logToShaped(textLen + 1, 0); + std::wstring shaped; + + // Track where each position of text[] lands in shaped[] + // consumed chars map to the position of their ligature replacement + { + int shapedPos = 0; + // First record the ligature lam positions so consumed alefs can map there + // We need to process in order + for (int j = 0; j < textLen; j++) + { + logToShaped[j] = shapedPos; + if (!consumed[j]) + { + shaped += text[j]; + shapedPos++; + } + // consumed chars: logToShaped[j] stays pointing to the lam position + // (shapedPos is not incremented, so it already equals lam's slot) + } + logToShaped[textLen] = shapedPos; // end-of-run cursor + } + + // ------------------------------------------------------------------ + // Reverse for RTL visual order, preserving LTR digit sequences. + // Also build reversedPos[posInShaped] -> posInReversed. + // ------------------------------------------------------------------ + const int shapedLen = (int)shaped.size(); + std::wstring reversed; + reversed.reserve(shapedLen); + + // reversedOf[i] = where shaped[i] ended up in reversed[] + std::vector reversedOf(shapedLen, 0); + + // We'll do a two-pass approach: + // 1. Walk shaped backwards, collecting digit runs and individual chars + // 2. For each element we emit, record reversedOf[] + + // Collect output segments: each segment is (isDigitSeq, startInShaped, len) + struct Seg { int start; int len; bool isDigit; }; + std::vector segs; + { + int j = shapedLen - 1; + while (j >= 0) + { + if (isDigit(shaped[j])) + { + // Find the full digit run (going left from j) + int end = j; + while (j >= 0 && isDigit(shaped[j])) j--; + int start = j + 1; + segs.push_back({ start, end - start + 1, true }); + } + else + { + segs.push_back({ j, 1, false }); + j--; + } + } + } + + // Emit segments in order, recording reversedOf[] + { + int outPos = 0; + for (size_t s = 0; s < segs.size(); s++) + { + const Seg& seg = segs[s]; + if (seg.isDigit) + { + // Digit sequence: output in LTR order (start..start+len-1) + for (int k = seg.start; k < seg.start + seg.len; k++) + { + reversedOf[k] = outPos; + reversed += shaped[k]; + outPos++; + } + } + else + { + // Single non-digit char + reversedOf[seg.start] = outPos; + reversed += shaped[seg.start]; + outPos++; + } + } + } + + runs[r].text = reversed; + + // ------------------------------------------------------------------ + // Build the local logical->visual position map for this run. + // Cursor positions are between characters: for RTL text, cursor + // at shaped position p maps to visual position (shapedLen - p). + // ------------------------------------------------------------------ + if (logicalToVisual) + { + runPosMap[r].resize(textLen + 1); + for (int j = 0; j <= textLen; j++) + { + int sp = logToShaped[j]; + runPosMap[r][j] = shapedLen - sp; + } + } + } + + // ----------------------------------------------------------------------- + // Concatenate runs and compute absolute visual positions + // ----------------------------------------------------------------------- + std::wstring result; + result.reserve(input.size()); + + // Compute the visual start offset for each run + std::vector runVisualStart(runs.size(), 0); + { + int voff = 0; + for (size_t r = 0; r < runs.size(); r++) + { + runVisualStart[r] = voff; + voff += (int)runs[r].text.size(); + } + } + + for (size_t r = 0; r < runs.size(); r++) + result += runs[r].text; + + // ----------------------------------------------------------------------- + // Fill the caller's logicalToVisual[] array + // ----------------------------------------------------------------------- + if (logicalToVisual) + { + // For each input position, find which run it belongs to and map it + for (size_t r = 0; r < runs.size(); r++) + { + int logStart = runs[r].logicalStart; + + if (r < runPosMap.size() && !runPosMap[r].empty()) + { + int localLen = (int)runPosMap[r].size() - 1; // number of logical chars + for (int j = 0; j <= localLen; j++) + { + int logIdx = logStart + j; + if (logIdx <= (int)input.size()) + (*logicalToVisual)[logIdx] = runVisualStart[r] + runPosMap[r][j]; + } + } + } + + } + + return result; +} + +// ------------------------------------------------------------------------- +// Public API +// ------------------------------------------------------------------------- + +std::wstring shapeArabicText(const std::wstring& input) +{ + return shapeArabicTextInternal(input, nullptr); +} + +std::wstring shapeArabicText(const std::wstring& input, int logicalCursorPos, int* visualCursorPos) +{ + std::vector ltv(input.size() + 1, 0); + std::wstring result = shapeArabicTextInternal(input, <v); + + if (visualCursorPos) + { + int clamped = logicalCursorPos; + if (clamped < 0) clamped = 0; + if (clamped > (int)input.size()) clamped = (int)input.size(); + *visualCursorPos = ltv[clamped]; + } + + return result; +} diff --git a/Minecraft.World/ArabicShaping.h b/Minecraft.World/ArabicShaping.h new file mode 100644 index 00000000..103d3a7e --- /dev/null +++ b/Minecraft.World/ArabicShaping.h @@ -0,0 +1,10 @@ +#pragma once +#include + +// Shape Arabic text for visual display. Returns the visually-ordered string. +std::wstring shapeArabicText(const std::wstring& input); + +// Same as above, but also maps a logical cursor position to its visual position +// in the returned string. Pass the logical cursor index; visualCursorPos receives +// the index into the returned string where the cursor should be drawn. +std::wstring shapeArabicText(const std::wstring& input, int logicalCursorPos, int* visualCursorPos); diff --git a/Minecraft.World/Minecraft.World.vcxproj b/Minecraft.World/Minecraft.World.vcxproj index b6af9153..0c9241a4 100644 --- a/Minecraft.World/Minecraft.World.vcxproj +++ b/Minecraft.World/Minecraft.World.vcxproj @@ -2404,6 +2404,7 @@ + @@ -3479,6 +3480,7 @@ + diff --git a/Minecraft.World/Minecraft.World.vcxproj.filters b/Minecraft.World/Minecraft.World.vcxproj.filters index bf872596..132c128a 100644 --- a/Minecraft.World/Minecraft.World.vcxproj.filters +++ b/Minecraft.World/Minecraft.World.vcxproj.filters @@ -1445,6 +1445,9 @@ ConsoleHelpers + + ConsoleHelpers + net\minecraft\world\item\crafting @@ -4144,6 +4147,9 @@ ConsoleHelpers + + ConsoleHelpers + net\minecraft\world\level\chunk\storage