Add Arabic text shaping support for chat functionality

This commit introduces Arabic text shaping in the chat application by adding `ArabicShaping.cpp` and `ArabicShaping.h` for handling contextual forms and visual reordering.

The rendering logic in `ChatScreen.cpp` is updated to utilize this new functionality, adjusting cursor positions accordingly. Other UI components, including `UIControl_Base.cpp`, `UIControl_Label.cpp`, and `UIControl_SaveList.cpp`, are modified to ensure proper display of Arabic text.

Additionally, `Font.cpp` is enhanced with methods for efficient rendering of pre-shaped text.
This commit is contained in:
Revela 2026-03-17 17:08:58 -05:00
parent d473ef0dc5
commit e786604228
17 changed files with 717 additions and 28 deletions

View file

@ -6,6 +6,7 @@
#include "..\Minecraft.World\SharedConstants.h"
#include "..\Minecraft.World\StringHelpers.h"
#include "..\Minecraft.World\ChatPacket.h"
#include "..\Minecraft.World\ArabicShaping.h"
const wstring ChatScreen::allowedChars = SharedConstants::acceptableLetters;
vector<wstring> ChatScreen::s_chatHistory;
@ -154,14 +155,21 @@ void ChatScreen::render(int xm, int ym, float a)
int x = 4;
drawString(font, prefix, x, height - 12, 0xe0e0e0);
x += font->width(prefix);
wstring beforeCursor = message.substr(0, cursorIndex);
wstring afterCursor = message.substr(cursorIndex);
drawStringLiteral(font, beforeCursor, x, height - 12, 0xe0e0e0);
x += font->widthLiteral(beforeCursor);
// Shape the full message as one unit so letter connections and word order
// are correct. Track where the logical cursor maps in the visual string.
int visualCursorPos = 0;
wstring shaped = shapeArabicText(message, cursorIndex, &visualCursorPos);
// Render the full shaped message without re-shaping it
drawStringPreshaped(font, shaped, x, height - 12, 0xe0e0e0);
// Place the cursor at the correct visual position
wstring beforeCursorVisual = shaped.substr(0, visualCursorPos);
int cursorX = x + font->widthPreshaped(beforeCursorVisual);
if (frame / 6 % 2 == 0)
drawString(font, L"_", x, height - 12, 0xe0e0e0);
x += font->width(L"_");
drawStringLiteral(font, afterCursor, x, height - 12, 0xe0e0e0);
drawString(font, L"_", cursorX, height - 12, 0xe0e0e0);
Screen::render(xm, ym, a);
}

View file

@ -3,6 +3,7 @@
#include "UIControl.h"
#include "..\..\..\Minecraft.World\StringHelpers.h"
#include "..\..\..\Minecraft.World\JavaMath.h"
#include "..\..\..\Minecraft.World\ArabicShaping.h"
UIControl_Base::UIControl_Base()
{
@ -32,13 +33,16 @@ void UIControl_Base::tick()
//app.DebugPrintf("Calling SetLabel - '%ls'\n", m_label.c_str());
m_bLabelChanged = false;
// Shape the text before sending to Iggy; m_label stays unshaped for future updates
wstring shaped = shapeArabicText(m_label.getString());
IggyDataValue result;
IggyDataValue value[1];
value[0].type = IGGY_DATATYPE_string_UTF16;
IggyStringUTF16 stringVal;
stringVal.string = (IggyUTF16*) m_label.c_str();
stringVal.length = m_label.length();
stringVal.string = (IggyUTF16*) shaped.c_str();
stringVal.length = (int)shaped.length();
value[0].string16 = stringVal;
IggyResult out = IggyPlayerCallMethodRS ( m_parentScene->getMovie() , &result, getIggyValuePath() , m_setLabelFunc , 1 , value );
@ -56,13 +60,16 @@ void UIControl_Base::setLabel(UIString label, bool instant, bool force)
{
m_bLabelChanged = false;
// Shape the text before sending to Iggy; m_label stays unshaped for future updates
wstring shaped = shapeArabicText(m_label.getString());
IggyDataValue result;
IggyDataValue value[1];
value[0].type = IGGY_DATATYPE_string_UTF16;
IggyStringUTF16 stringVal;
stringVal.string = (IggyUTF16*)m_label.c_str();
stringVal.length = m_label.length();
stringVal.string = (IggyUTF16*) shaped.c_str();
stringVal.length = (int)shaped.length();
value[0].string16 = stringVal;
IggyResult out = IggyPlayerCallMethodRS ( m_parentScene->getMovie() , &result, getIggyValuePath() , m_setLabelFunc , 1 , value );

View file

@ -2,6 +2,7 @@
#include "UI.h"
#include "UIControl_Label.h"
#include "..\..\..\Minecraft.World\StringHelpers.h"
#include "..\..\..\Minecraft.World\ArabicShaping.h"
UIControl_Label::UIControl_Label()
{
@ -22,13 +23,15 @@ void UIControl_Label::init(UIString label)
{
m_label = label;
wstring shaped = shapeArabicText(m_label.getString());
IggyDataValue result;
IggyDataValue value[1];
value[0].type = IGGY_DATATYPE_string_UTF16;
IggyStringUTF16 stringVal;
stringVal.string = (IggyUTF16*)label.c_str();
stringVal.length = label.length();
stringVal.string = (IggyUTF16*)shaped.c_str();
stringVal.length = (int)shaped.length();
value[0].string16 = stringVal;
IggyResult out = IggyPlayerCallMethodRS ( m_parentScene->getMovie() , &result, getIggyValuePath() , m_initFunc , 1 , value );
}

View file

@ -1,6 +1,7 @@
#include "stdafx.h"
#include "UI.h"
#include "UIControl_SaveList.h"
#include "..\..\..\Minecraft.World\ArabicShaping.h"
bool UIControl_SaveList::setupControl(UIScene *scene, IggyValuePath *parent, const string &controlName)
{
@ -69,12 +70,14 @@ void UIControl_SaveList::addItem(const string &label, const wstring &iconName, i
void UIControl_SaveList::addItem(const wstring &label, const wstring &iconName, int data)
{
wstring shaped = shapeArabicText(label);
IggyDataValue result;
IggyDataValue value[3];
IggyStringUTF16 stringVal;
stringVal.string = (IggyUTF16*)label.c_str();
stringVal.length = static_cast<S32>(label.length());
stringVal.string = (IggyUTF16*)shaped.c_str();
stringVal.length = static_cast<S32>(shaped.length());
value[0].type = IGGY_DATATYPE_string_UTF16;
value[0].string16 = stringVal;

View file

@ -255,7 +255,7 @@ UIScene_LoadMenu::UIScene_LoadMenu(int iPad, void *initData, UILayer *parentLaye
{
wchar_t wSaveName[128];
ZeroMemory(wSaveName, sizeof(wSaveName));
mbstowcs(wSaveName, params->saveDetails->UTF8SaveName, 127);
MultiByteToWideChar(CP_UTF8, 0, params->saveDetails->UTF8SaveName, -1, wSaveName, 127);
m_levelName = wstring(wSaveName);
m_labelGameName.init(m_levelName);
}

View file

@ -49,7 +49,7 @@ static wstring ReadLevelNameFromSaveFile(const wstring& filePath, bool *outHardc
if (len > 0)
{
wchar_t wbuf[128] = {};
mbstowcs(wbuf, buf, 127);
MultiByteToWideChar(CP_UTF8, 0, buf, -1, wbuf, 127);
return wstring(wbuf);
}
}
@ -785,8 +785,11 @@ void UIScene_LoadOrJoinMenu::tick()
if (!levelName.empty())
{
m_buttonListSaves.addItem(levelName, wstring(L""));
wcstombs(m_saveDetails[i].UTF8SaveName, levelName.c_str(), 127);
m_saveDetails[i].UTF8SaveName[127] = '\0';
{
int n = WideCharToMultiByte(CP_UTF8, 0, levelName.c_str(), -1, m_saveDetails[i].UTF8SaveName, 127, nullptr, nullptr);
if (n <= 0) m_saveDetails[i].UTF8SaveName[0] = '\0';
m_saveDetails[i].UTF8SaveName[127] = '\0';
}
}
else
{
@ -1431,9 +1434,9 @@ int UIScene_LoadOrJoinMenu::KeyboardCompleteWorldNameCallback(LPVOID lpParam,boo
for (int k = 0; k < 127 && ui16Text[k]; k++)
wNewName[k] = static_cast<wchar_t>(ui16Text[k]);
// Convert to narrow for storage and in-memory update
char narrowName[128] = {};
wcstombs(narrowName, wNewName, 127);
// Convert to narrow for storage and in-memory update (UTF-8 to preserve Unicode)
char narrowName[256] = {};
WideCharToMultiByte(CP_UTF8, 0, wNewName, -1, narrowName, 255, nullptr, nullptr);
// Build the sidecar path: Windows64\GameHDD\{folder}\worldname.txt
wchar_t wFilename[MAX_SAVEFILENAME_LENGTH] = {};
@ -1449,7 +1452,7 @@ int UIScene_LoadOrJoinMenu::KeyboardCompleteWorldNameCallback(LPVOID lpParam,boo
// Update the in-memory display name so the list reflects it immediately
strncpy_s(pClass->m_saveDetails[listPos].UTF8SaveName, narrowName, 127);
pClass->m_saveDetails[listPos].UTF8SaveName[127] = '\0';
pClass->m_saveDetails[listPos].UTF8SaveName[127] = '\0'; // UTF8SaveName is still 128 bytes; narrowName fits as Arabic is <=2 bytes/char in UTF-8
// Reuse the existing callback to trigger the list repopulate
UIScene_LoadOrJoinMenu::RenameSaveDataReturned(pClass, true);
@ -2517,7 +2520,7 @@ int UIScene_LoadOrJoinMenu::SaveOptionsDialogReturned(void *pParam,int iPad,C4JS
{
wchar_t wSaveName[128];
ZeroMemory(wSaveName, 128 * sizeof(wchar_t));
mbstowcs_s(nullptr, wSaveName, 128, pClass->m_saveDetails[pClass->m_iSaveListIndex - pClass->m_iDefaultButtonsC].UTF8SaveName, _TRUNCATE);
MultiByteToWideChar(CP_UTF8, 0, pClass->m_saveDetails[pClass->m_iSaveListIndex - pClass->m_iDefaultButtonsC].UTF8SaveName, -1, wSaveName, 127);
UIKeyboardInitData kbData;
kbData.title = app.GetString(IDS_RENAME_WORLD_TITLE);
kbData.defaultText = wSaveName;

View file

@ -2,6 +2,7 @@
#include "XUI_Chat.h"
#include "..\..\Minecraft.h"
#include "..\..\Gui.h"
#include "..\..\..\Minecraft.World\ArabicShaping.h"
HRESULT CScene_Chat::OnInit( XUIMessageInit* pInitData, BOOL& bHandled )
{
@ -29,7 +30,8 @@ HRESULT CScene_Chat::OnTimer( XUIMessageTimer *pXUIMessageTimer, BOOL &bHandled)
{
m_Backgrounds[i].SetOpacity(opacity);
m_Labels[i].SetOpacity(opacity);
m_Labels[i].SetText( pGui->getMessage(m_iPad,i).c_str() );
wstring shaped = shapeArabicText(pGui->getMessage(m_iPad, i));
m_Labels[i].SetText( shaped.c_str() );
}
else
{

View file

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "XUI_Ctrl_4JList.h"
#include "..\..\..\Minecraft.World\ArabicShaping.h"
static bool TimeSortFn(const void *a, const void *b);
@ -294,8 +295,16 @@ HRESULT CXuiCtrl4JList::OnGetSourceDataText(XUIMessageGetSourceText *pGetSourceT
if( ( 0 == pGetSourceTextData->iData ) && ( ( pGetSourceTextData->bItemData ) ) )
{
EnterCriticalSection(&m_AccessListData);
pGetSourceTextData->szText =
GetData(pGetSourceTextData->iItem).pwszText;
LPCWSTR rawText = GetData(pGetSourceTextData->iItem).pwszText;
if (rawText)
{
m_shapedTextCache = shapeArabicText(rawText);
pGetSourceTextData->szText = m_shapedTextCache.c_str();
}
else
{
pGetSourceTextData->szText = rawText;
}
LeaveCriticalSection(&m_AccessListData);
bHandled = TRUE;
}

View file

@ -75,4 +75,5 @@ private:
static bool IndexSortFn(const void *a, const void *b);
HXUIOBJ m_hSelectionChangedHandlerObj;
std::wstring m_shapedTextCache; // temp buffer for Arabic-shaped text in OnGetSourceDataText
};

View file

@ -8,6 +8,7 @@
#include "..\Minecraft.World\net.minecraft.h"
#include "..\Minecraft.World\StringHelpers.h"
#include "..\Minecraft.World\Random.h"
#include "..\Minecraft.World\ArabicShaping.h"
Font::Font(Options *options, const wstring& name, Textures* textures, bool enforceUnicode, ResourceLocation *textureLocation, int cols, int rows, int charWidth, int charHeight, unsigned short charMap[]/* = nullptr */) : textures(textures)
{
@ -289,6 +290,74 @@ void Font::drawLiteral(const wstring& str, int x, int y, int color)
}
}
// Like sanitize() but skips the shapeArabicText() call - for pre-shaped strings.
wstring Font::sanitizePreshaped(const wstring& str)
{
wstring sb = str;
for (unsigned int i = 0; i < sb.length(); i++)
{
if (CharacterExists(sb[i]))
sb[i] = MapCharacter(sb[i]);
else if (unicodeWidth[sb[i]] != 0)
{
// Leave as-is: raw codepoint for glyph page rendering
}
else
{
sb[i] = 0;
}
}
return sb;
}
void Font::drawLiteralPreshaped(const wstring& str, int x, int y, int color)
{
if (str.empty()) return;
if ((color & 0xFC000000) == 0) color |= 0xFF000000;
textures->bindTexture(m_textureLocation);
glColor4f((color >> 16 & 255) / 255.0F, (color >> 8 & 255) / 255.0F, (color & 255) / 255.0F, (color >> 24 & 255) / 255.0F);
xPos = static_cast<float>(x);
yPos = static_cast<float>(y);
wstring cleanStr = sanitizePreshaped(str);
for (size_t i = 0; i < cleanStr.length(); ++i)
{
wchar_t c = cleanStr.at(i);
if (isUnicodeGlyphChar(c))
{
renderUnicodeCharacter(c);
textures->bindTexture(m_textureLocation);
lastBoundTexture = fontTexture;
}
else
{
renderCharacter(c);
}
}
}
void Font::drawShadowLiteralPreshaped(const wstring& str, int x, int y, int color)
{
int shadowColor = (color & 0xFCFCFC) >> 2 | (color & 0xFF000000);
drawLiteralPreshaped(str, x + 1, y + 1, shadowColor);
drawLiteralPreshaped(str, x, y, color);
}
int Font::widthPreshaped(const wstring& str)
{
wstring cleanStr = sanitizePreshaped(str);
if (cleanStr.empty()) return 0;
int len = 0;
for (size_t i = 0; i < cleanStr.length(); ++i)
{
wchar_t wc = cleanStr.at(i);
if (isUnicodeGlyphChar(wc))
len += (int)unicodeCharWidth(wc);
else
len += charWidths[static_cast<unsigned>(wc)];
}
return len;
}
void Font::drawShadowWordWrap(const wstring &str, int x, int y, int w, int color, int h)
{
drawWordWrapInternal(str, x + 1, y + 1, w, color, true, h);
@ -473,7 +542,7 @@ int Font::widthLiteral(const wstring& str)
wstring Font::sanitize(const wstring& str)
{
wstring sb = str;
wstring sb = shapeArabicText(str);
for (unsigned int i = 0; i < sb.length(); i++)
{

View file

@ -78,10 +78,14 @@ private:
void renderUnicodeCharacter(wchar_t c);
float unicodeCharWidth(wchar_t c);
bool isUnicodeGlyphChar(wchar_t c);
wstring sanitizePreshaped(const wstring& str); // sanitize without re-shaping Arabic
void drawLiteralPreshaped(const wstring& str, int x, int y, int color);
public:
int width(const wstring& str);
int widthLiteral(const wstring& str); // width without skipping § codes (for chat input)
int widthPreshaped(const wstring& str); // width of already-shaped text, no re-shaping
void drawShadowLiteralPreshaped(const wstring& str, int x, int y, int color);
wstring sanitize(const wstring& str);
void drawWordWrap(const wstring &string, int x, int y, int w, int col, int h); // 4J Added h param

View file

@ -110,6 +110,11 @@ void GuiComponent::drawStringLiteral(Font *font, const wstring& str, int x, int
font->drawShadowLiteral(str, x, y, color);
}
void GuiComponent::drawStringPreshaped(Font *font, const wstring& str, int x, int y, int color)
{
font->drawShadowLiteralPreshaped(str, x, y, color);
}
void GuiComponent::blit(int x, int y, int sx, int sy, int w, int h)
{
float us = 1 / 256.0f;

View file

@ -16,5 +16,6 @@ public:
void drawCenteredString(Font *font, const wstring& str, int x, int y, int color);
void drawString(Font *font, const wstring& str, int x, int y, int color);
void drawStringLiteral(Font* font, const wstring& str, int x, int y, int color);
void drawStringPreshaped(Font* font, const wstring& str, int x, int y, int color);
void blit(int x, int y, int sx, int sy, int w, int h);
};

View file

@ -0,0 +1,556 @@
#include "stdafx.h"
#include "ArabicShaping.h"
#include <vector>
#include <unordered_map>
// Arabic text shaping - contextual form selection and RTL visual reordering.
// Ported from "Arabic Writer" JS reference by Omar Muhammad (GPL).
// Each entry: base char, isolated, initial, medial, final forms
struct ArabicCharEntry
{
wchar_t base;
wchar_t isolated;
wchar_t initial;
wchar_t medial;
wchar_t final_;
bool connectsLeft;
bool connectsRight;
};
// Core Arabic + extended (Farsi/Urdu) characters with presentation forms
static const ArabicCharEntry arabicChars[] =
{
// base isolated initial medial final cL cR
{ 0x0621, 0xFE80, 0xFE80, 0xFE80, 0xFE80, false, false }, // hamza
{ 0x0622, 0xFE81, 0xFE81, 0xFE82, 0xFE82, false, true }, // alef madda
{ 0x0623, 0xFE83, 0xFE83, 0xFE84, 0xFE84, false, true }, // alef hamza above
{ 0x0624, 0xFE85, 0xFE85, 0xFE86, 0xFE86, false, true }, // waw hamza
{ 0x0625, 0xFE87, 0xFE87, 0xFE88, 0xFE88, false, true }, // alef hamza below
{ 0x0626, 0xFE89, 0xFE8B, 0xFE8C, 0xFE8A, true, true }, // yeh hamza
{ 0x0627, 0xFE8D, 0xFE8D, 0xFE8E, 0xFE8E, false, true }, // alef
{ 0x0628, 0xFE8F, 0xFE91, 0xFE92, 0xFE90, true, true }, // beh
{ 0x0629, 0xFE93, 0xFE93, 0xFE94, 0xFE94, false, true }, // teh marbuta
{ 0x062A, 0xFE95, 0xFE97, 0xFE98, 0xFE96, true, true }, // teh
{ 0x062B, 0xFE99, 0xFE9B, 0xFE9C, 0xFE9A, true, true }, // theh
{ 0x062C, 0xFE9D, 0xFE9F, 0xFEA0, 0xFE9E, true, true }, // jeem
{ 0x062D, 0xFEA1, 0xFEA3, 0xFEA4, 0xFEA2, true, true }, // hah
{ 0x062E, 0xFEA5, 0xFEA7, 0xFEA8, 0xFEA6, true, true }, // khah
{ 0x062F, 0xFEA9, 0xFEA9, 0xFEAA, 0xFEAA, false, true }, // dal
{ 0x0630, 0xFEAB, 0xFEAB, 0xFEAC, 0xFEAC, false, true }, // thal
{ 0x0631, 0xFEAD, 0xFEAD, 0xFEAE, 0xFEAE, false, true }, // reh
{ 0x0632, 0xFEAF, 0xFEAF, 0xFEB0, 0xFEB0, false, true }, // zain
{ 0x0633, 0xFEB1, 0xFEB3, 0xFEB4, 0xFEB2, true, true }, // seen
{ 0x0634, 0xFEB5, 0xFEB7, 0xFEB8, 0xFEB6, true, true }, // sheen
{ 0x0635, 0xFEB9, 0xFEBB, 0xFEBC, 0xFEBA, true, true }, // sad
{ 0x0636, 0xFEBD, 0xFEBF, 0xFEC0, 0xFEBE, true, true }, // dad
{ 0x0637, 0xFEC1, 0xFEC3, 0xFEC4, 0xFEC2, true, true }, // tah
{ 0x0638, 0xFEC5, 0xFEC7, 0xFEC8, 0xFEC6, true, true }, // zah
{ 0x0639, 0xFEC9, 0xFECB, 0xFECC, 0xFECA, true, true }, // ain
{ 0x063A, 0xFECD, 0xFECF, 0xFED0, 0xFECE, true, true }, // ghain
{ 0x0640, 0x0640, 0x0640, 0x0640, 0x0640, true, true }, // tatweel
{ 0x0641, 0xFED1, 0xFED3, 0xFED4, 0xFED2, true, true }, // feh
{ 0x0642, 0xFED5, 0xFED7, 0xFED8, 0xFED6, true, true }, // qaf
{ 0x0643, 0xFED9, 0xFEDB, 0xFEDC, 0xFEDA, true, true }, // kaf
{ 0x0644, 0xFEDD, 0xFEDF, 0xFEE0, 0xFEDE, true, true }, // lam
{ 0x0645, 0xFEE1, 0xFEE3, 0xFEE4, 0xFEE2, true, true }, // meem
{ 0x0646, 0xFEE5, 0xFEE7, 0xFEE8, 0xFEE6, true, true }, // noon
{ 0x0647, 0xFEE9, 0xFEEB, 0xFEEC, 0xFEEA, true, true }, // heh
{ 0x0648, 0xFEED, 0xFEED, 0xFEEE, 0xFEEE, false, true }, // waw
{ 0x0649, 0xFEEF, 0xFEEF, 0xFEF0, 0xFEF0, false, true }, // alef maksura
{ 0x064A, 0xFEF1, 0xFEF3, 0xFEF4, 0xFEF2, true, true }, // yeh
// Extended - Farsi/Urdu
{ 0x067E, 0xFB56, 0xFB58, 0xFB59, 0xFB57, true, true }, // peh
{ 0x0686, 0xFB7A, 0xFB7C, 0xFB7D, 0xFB7B, true, true }, // tcheh
{ 0x0698, 0xFB8A, 0xFB8A, 0xFB8B, 0xFB8B, false, true }, // jeh
{ 0x06A9, 0xFB8E, 0xFB90, 0xFB91, 0xFB8F, true, true }, // keheh (Farsi kaf)
{ 0x06AF, 0xFB92, 0xFB94, 0xFB95, 0xFB93, true, true }, // gaf
{ 0x06CC, 0xFBFC, 0xFBFE, 0xFBFF, 0xFBFD, true, true }, // Farsi yeh
// Urdu
{ 0x0679, 0xFB66, 0xFB68, 0xFB69, 0xFB67, true, true }, // tteh
{ 0x0688, 0xFB88, 0xFB88, 0xFB89, 0xFB89, false, true }, // ddal
{ 0x0691, 0xFB8C, 0xFB8C, 0xFB8D, 0xFB8D, false, true }, // rreh
{ 0x06C1, 0xFBA6, 0xFBA8, 0xFBA9, 0xFBA7, true, true }, // heh goal
{ 0x06D2, 0xFBAE, 0xFBAE, 0xFBAF, 0xFBAF, false, true }, // yeh barree
};
static const int ARABIC_CHAR_COUNT = sizeof(arabicChars) / sizeof(arabicChars[0]);
// Laam-Alef ligatures: when lam (0x0644) is followed by certain alef forms
struct LaamAlefEntry
{
wchar_t alef; // the alef variant
wchar_t isolated; // ligature isolated form
wchar_t final_; // ligature final form
};
static const LaamAlefEntry laamAlefTable[] =
{
{ 0x0622, 0xFEF5, 0xFEF6 }, // lam + alef madda
{ 0x0623, 0xFEF7, 0xFEF8 }, // lam + alef hamza above
{ 0x0625, 0xFEF9, 0xFEFA }, // lam + alef hamza below
{ 0x0627, 0xFEFB, 0xFEFC }, // lam + alef
};
// Build lookup map on first use
static std::unordered_map<wchar_t, const ArabicCharEntry*> charMap;
static bool tablesInitialized = false;
static void initTables()
{
if (tablesInitialized) return;
for (int i = 0; i < ARABIC_CHAR_COUNT; i++)
{
charMap[arabicChars[i].base] = &arabicChars[i];
}
tablesInitialized = true;
}
static bool isArabicChar(wchar_t c)
{
return (c >= 0x0600 && c <= 0x06FF) || (c >= 0xFB50 && c <= 0xFDFF) || (c >= 0xFE70 && c <= 0xFEFF);
}
static bool isHaraka(wchar_t c)
{
// Arabic diacritics (tashkeel/harakat): U+0610-U+061A, U+064B-U+065F, U+0670
return (c >= 0x064B && c <= 0x065F) || (c >= 0x0610 && c <= 0x061A) || c == 0x0670;
}
static bool isLaamAlefLigature(wchar_t c)
{
// Laam-Alef presentation forms: U+FEF5-U+FEFC
return c >= 0xFEF5 && c <= 0xFEFC;
}
static bool isDigit(wchar_t c)
{
// Western digits and Arabic-Indic digits
return (c >= L'0' && c <= L'9') || (c >= 0x0660 && c <= 0x0669);
}
// Neutral characters that inherit direction from surrounding context
static bool isNeutralChar(wchar_t c)
{
return c == L' '
|| c == L'.' || c == L',' || c == L'!' || c == L'?'
|| c == L':' || c == L';' || c == L'-' || c == L'('
|| c == L')' || c == L'[' || c == L']'
|| c == 0x060C // Arabic comma
|| c == 0x061B // Arabic semicolon
|| c == 0x061F; // Arabic question mark
}
static const ArabicCharEntry* findEntry(wchar_t c)
{
auto it = charMap.find(c);
if (it != charMap.end()) return it->second;
return nullptr;
}
static const LaamAlefEntry* findLaamAlef(wchar_t alef)
{
for (int i = 0; i < 4; i++)
{
if (laamAlefTable[i].alef == alef) return &laamAlefTable[i];
}
return nullptr;
}
// -------------------------------------------------------------------------
// Core shaping logic, shared by both public overloads.
//
// logicalToVisual: if non-null, maps input[i] -> position in output string.
// Must be pre-sized to input.size()+1.
// -------------------------------------------------------------------------
static std::wstring shapeArabicTextInternal(const std::wstring& input,
std::vector<int>* logicalToVisual)
{
if (input.empty()) return input;
initTables();
// Fast path: check if any base Arabic characters exist
bool hasArabic = false;
for (size_t i = 0; i < input.size(); i++)
{
if (input[i] >= 0x0600 && input[i] <= 0x06FF)
{
hasArabic = true;
break;
}
}
if (!hasArabic)
{
// Identity mapping
if (logicalToVisual)
{
for (size_t i = 0; i <= input.size(); i++)
(*logicalToVisual)[i] = (int)i;
}
return input;
}
// -----------------------------------------------------------------------
// Split into runs: Arabic vs non-Arabic.
// Track the starting logical index of each run.
// -----------------------------------------------------------------------
struct Run
{
std::wstring text;
bool arabic;
int logicalStart; // index into input[] where this run begins
};
std::vector<Run> runs;
size_t i = 0;
while (i < input.size())
{
bool curArabic = isArabicChar(input[i]) || isHaraka(input[i]);
Run run;
run.arabic = curArabic;
run.logicalStart = (int)i;
while (i < input.size())
{
bool charArabic = isArabicChar(input[i]) || isHaraka(input[i]);
if (charArabic == curArabic)
{
run.text += input[i];
i++;
}
else
{
break;
}
}
runs.push_back(run);
}
// -----------------------------------------------------------------------
// Merge neutral runs that sit between two Arabic runs into the preceding
// Arabic run (with the following Arabic run appended too). This keeps
// inter-word spaces inside the Arabic run so the whole phrase reverses
// together, producing correct RTL word order.
// -----------------------------------------------------------------------
for (size_t r = 1; r + 1 < runs.size(); r++)
{
if (!runs[r].arabic && runs[r - 1].arabic && runs[r + 1].arabic)
{
bool allNeutral = true;
for (wchar_t c : runs[r].text)
{
if (!isNeutralChar(c)) { allNeutral = false; break; }
}
if (allNeutral)
{
// Absorb runs[r] and runs[r+1] into runs[r-1]
runs[r - 1].text += runs[r].text + runs[r + 1].text;
runs.erase(runs.begin() + r, runs.begin() + r + 2);
r--; // re-check from same position
}
}
}
// Recompute logical starts after merging (run text lengths may have grown)
{
int pos = 0;
for (size_t r = 0; r < runs.size(); r++)
{
runs[r].logicalStart = pos;
pos += (int)runs[r].text.size();
}
}
// -----------------------------------------------------------------------
// Shape each Arabic run.
// For each run we also build a posMap: posMap[localLogical] = localVisual
// (local means within the run's text, before run offsets are added).
// -----------------------------------------------------------------------
std::vector<std::vector<int>> runPosMap(runs.size()); // per-run local maps
for (size_t r = 0; r < runs.size(); r++)
{
if (!runs[r].arabic)
{
// Non-Arabic: identity mapping
runPosMap[r].resize(runs[r].text.size() + 1);
for (size_t j = 0; j <= runs[r].text.size(); j++)
runPosMap[r][j] = (int)j;
continue;
}
std::wstring& text = runs[r].text;
const int textLen = (int)text.size();
// Collect base character indices (skip harakat)
std::vector<size_t> baseIndices;
for (size_t j = 0; j < text.size(); j++)
{
if (!isHaraka(text[j]))
baseIndices.push_back(j);
}
// ------------------------------------------------------------------
// Laam-Alef ligatures
// ------------------------------------------------------------------
std::vector<bool> consumed(text.size(), false);
std::vector<std::pair<size_t, wchar_t>> ligatures; // lam idx -> ligature char
for (size_t bi = 0; bi + 1 < baseIndices.size(); bi++)
{
size_t idx = baseIndices[bi];
size_t nextIdx = baseIndices[bi + 1];
if (text[idx] == 0x0644) // lam
{
const LaamAlefEntry* la = findLaamAlef(text[nextIdx]);
if (la)
{
bool connectsToPrev = false;
if (bi > 0)
{
size_t prevIdx = baseIndices[bi - 1];
const ArabicCharEntry* prevEntry = findEntry(text[prevIdx]);
if (prevEntry && prevEntry->connectsLeft)
connectsToPrev = true;
}
wchar_t ligChar = connectsToPrev ? la->final_ : la->isolated;
ligatures.push_back({ idx, ligChar });
consumed[nextIdx] = true;
bi++; // skip the alef
}
}
}
// Apply ligature characters
for (size_t li = 0; li < ligatures.size(); li++)
text[ligatures[li].first] = ligatures[li].second;
// Rebuild base indices after ligature consumption
baseIndices.clear();
for (size_t j = 0; j < text.size(); j++)
{
if (!isHaraka(text[j]) && !consumed[j])
baseIndices.push_back(j);
}
// ------------------------------------------------------------------
// Contextual form selection
// ------------------------------------------------------------------
for (size_t bi = 0; bi < baseIndices.size(); bi++)
{
size_t idx = baseIndices[bi];
const ArabicCharEntry* entry = findEntry(text[idx]);
if (!entry) continue;
bool prevConnects = false;
if (bi > 0)
{
size_t prevIdx = baseIndices[bi - 1];
const ArabicCharEntry* prevEntry = findEntry(text[prevIdx]);
if (prevEntry && prevEntry->connectsLeft)
prevConnects = true;
else if (!prevEntry && text[prevIdx] >= 0xFE70)
prevConnects = false; // lam-alef ligature doesn't connect left
}
bool nextConnects = false;
if (bi + 1 < baseIndices.size())
{
size_t nextIdx = baseIndices[bi + 1];
const ArabicCharEntry* nextEntry = findEntry(text[nextIdx]);
if (nextEntry && nextEntry->connectsRight)
nextConnects = true;
else if (!nextEntry && isLaamAlefLigature(text[nextIdx]))
nextConnects = true; // lam-alef ligatures always connect to the right
}
bool canConnectPrev = prevConnects && entry->connectsRight;
bool canConnectNext = nextConnects && entry->connectsLeft;
if (canConnectPrev && canConnectNext) text[idx] = entry->medial;
else if (canConnectPrev) text[idx] = entry->final_;
else if (canConnectNext) text[idx] = entry->initial;
else text[idx] = entry->isolated;
}
// ------------------------------------------------------------------
// Build shaped string (drop consumed ligature partners)
// and a forward mapping: shapedIdx[logicalPos] = position in shaped
// (for consumed chars, map to the ligature position)
// ------------------------------------------------------------------
std::vector<int> logToShaped(textLen + 1, 0);
std::wstring shaped;
// Track where each position of text[] lands in shaped[]
// consumed chars map to the position of their ligature replacement
{
int shapedPos = 0;
// First record the ligature lam positions so consumed alefs can map there
// We need to process in order
for (int j = 0; j < textLen; j++)
{
logToShaped[j] = shapedPos;
if (!consumed[j])
{
shaped += text[j];
shapedPos++;
}
// consumed chars: logToShaped[j] stays pointing to the lam position
// (shapedPos is not incremented, so it already equals lam's slot)
}
logToShaped[textLen] = shapedPos; // end-of-run cursor
}
// ------------------------------------------------------------------
// Reverse for RTL visual order, preserving LTR digit sequences.
// Also build reversedPos[posInShaped] -> posInReversed.
// ------------------------------------------------------------------
const int shapedLen = (int)shaped.size();
std::wstring reversed;
reversed.reserve(shapedLen);
// reversedOf[i] = where shaped[i] ended up in reversed[]
std::vector<int> reversedOf(shapedLen, 0);
// We'll do a two-pass approach:
// 1. Walk shaped backwards, collecting digit runs and individual chars
// 2. For each element we emit, record reversedOf[]
// Collect output segments: each segment is (isDigitSeq, startInShaped, len)
struct Seg { int start; int len; bool isDigit; };
std::vector<Seg> segs;
{
int j = shapedLen - 1;
while (j >= 0)
{
if (isDigit(shaped[j]))
{
// Find the full digit run (going left from j)
int end = j;
while (j >= 0 && isDigit(shaped[j])) j--;
int start = j + 1;
segs.push_back({ start, end - start + 1, true });
}
else
{
segs.push_back({ j, 1, false });
j--;
}
}
}
// Emit segments in order, recording reversedOf[]
{
int outPos = 0;
for (size_t s = 0; s < segs.size(); s++)
{
const Seg& seg = segs[s];
if (seg.isDigit)
{
// Digit sequence: output in LTR order (start..start+len-1)
for (int k = seg.start; k < seg.start + seg.len; k++)
{
reversedOf[k] = outPos;
reversed += shaped[k];
outPos++;
}
}
else
{
// Single non-digit char
reversedOf[seg.start] = outPos;
reversed += shaped[seg.start];
outPos++;
}
}
}
runs[r].text = reversed;
// ------------------------------------------------------------------
// Build the local logical->visual position map for this run.
// Cursor positions are between characters: for RTL text, cursor
// at shaped position p maps to visual position (shapedLen - p).
// ------------------------------------------------------------------
if (logicalToVisual)
{
runPosMap[r].resize(textLen + 1);
for (int j = 0; j <= textLen; j++)
{
int sp = logToShaped[j];
runPosMap[r][j] = shapedLen - sp;
}
}
}
// -----------------------------------------------------------------------
// Concatenate runs and compute absolute visual positions
// -----------------------------------------------------------------------
std::wstring result;
result.reserve(input.size());
// Compute the visual start offset for each run
std::vector<int> runVisualStart(runs.size(), 0);
{
int voff = 0;
for (size_t r = 0; r < runs.size(); r++)
{
runVisualStart[r] = voff;
voff += (int)runs[r].text.size();
}
}
for (size_t r = 0; r < runs.size(); r++)
result += runs[r].text;
// -----------------------------------------------------------------------
// Fill the caller's logicalToVisual[] array
// -----------------------------------------------------------------------
if (logicalToVisual)
{
// For each input position, find which run it belongs to and map it
for (size_t r = 0; r < runs.size(); r++)
{
int logStart = runs[r].logicalStart;
if (r < runPosMap.size() && !runPosMap[r].empty())
{
int localLen = (int)runPosMap[r].size() - 1; // number of logical chars
for (int j = 0; j <= localLen; j++)
{
int logIdx = logStart + j;
if (logIdx <= (int)input.size())
(*logicalToVisual)[logIdx] = runVisualStart[r] + runPosMap[r][j];
}
}
}
}
return result;
}
// -------------------------------------------------------------------------
// Public API
// -------------------------------------------------------------------------
std::wstring shapeArabicText(const std::wstring& input)
{
return shapeArabicTextInternal(input, nullptr);
}
std::wstring shapeArabicText(const std::wstring& input, int logicalCursorPos, int* visualCursorPos)
{
std::vector<int> ltv(input.size() + 1, 0);
std::wstring result = shapeArabicTextInternal(input, &ltv);
if (visualCursorPos)
{
int clamped = logicalCursorPos;
if (clamped < 0) clamped = 0;
if (clamped > (int)input.size()) clamped = (int)input.size();
*visualCursorPos = ltv[clamped];
}
return result;
}

View file

@ -0,0 +1,10 @@
#pragma once
#include <string>
// Shape Arabic text for visual display. Returns the visually-ordered string.
std::wstring shapeArabicText(const std::wstring& input);
// Same as above, but also maps a logical cursor position to its visual position
// in the returned string. Pass the logical cursor index; visualCursorPos receives
// the index into the returned string where the cursor should be drawn.
std::wstring shapeArabicText(const std::wstring& input, int logicalCursorPos, int* visualCursorPos);

View file

@ -2404,6 +2404,7 @@
<ClInclude Include="AnimatePacket.h" />
<ClInclude Include="AnvilTile.h" />
<ClInclude Include="AnvilTileItem.h" />
<ClInclude Include="ArabicShaping.h" />
<ClInclude Include="ArmorDyeRecipe.h" />
<ClInclude Include="ArmorItem.h" />
<ClInclude Include="ArmorRecipes.h" />
@ -3479,6 +3480,7 @@
<ClCompile Include="AnimatePacket.cpp" />
<ClCompile Include="AnvilTile.cpp" />
<ClCompile Include="AnvilTileItem.cpp" />
<ClCompile Include="ArabicShaping.cpp" />
<ClCompile Include="ArmorDyeRecipe.cpp" />
<ClCompile Include="ArmorItem.cpp" />
<ClCompile Include="ArmorRecipes.cpp" />

View file

@ -1445,6 +1445,9 @@
<ClInclude Include="StringHelpers.h">
<Filter>ConsoleHelpers</Filter>
</ClInclude>
<ClInclude Include="ArabicShaping.h">
<Filter>ConsoleHelpers</Filter>
</ClInclude>
<ClInclude Include="net.minecraft.world.item.crafting.h">
<Filter>net\minecraft\world\item\crafting</Filter>
</ClInclude>
@ -4144,6 +4147,9 @@
<ClCompile Include="StringHelpers.cpp">
<Filter>ConsoleHelpers</Filter>
</ClCompile>
<ClCompile Include="ArabicShaping.cpp">
<Filter>ConsoleHelpers</Filter>
</ClCompile>
<ClCompile Include="MemoryChunkStorage.cpp">
<Filter>net\minecraft\world\level\chunk\storage</Filter>
</ClCompile>