302 lines
9.8 KiB
C
302 lines
9.8 KiB
C
|
/**
|
|||
|
* @file lv_txt_ap.c
|
|||
|
*
|
|||
|
*/
|
|||
|
|
|||
|
/*********************
|
|||
|
* INCLUDES
|
|||
|
*********************/
|
|||
|
#include <stddef.h>
|
|||
|
#include "lv_bidi.h"
|
|||
|
#include "lv_txt.h"
|
|||
|
#include "lv_txt_ap.h"
|
|||
|
#include "lv_mem.h"
|
|||
|
#include "../draw/lv_draw.h"
|
|||
|
|
|||
|
/*********************
|
|||
|
* DEFINES
|
|||
|
*********************/
|
|||
|
|
|||
|
/**********************
|
|||
|
* TYPEDEFS
|
|||
|
**********************/
|
|||
|
typedef struct {
|
|||
|
uint8_t char_offset;
|
|||
|
uint16_t char_end_form;
|
|||
|
int8_t char_begining_form_offset;
|
|||
|
int8_t char_middle_form_offset;
|
|||
|
int8_t char_isolated_form_offset;
|
|||
|
struct {
|
|||
|
uint8_t conj_to_previous;
|
|||
|
uint8_t conj_to_next;
|
|||
|
} ap_chars_conjunction;
|
|||
|
} ap_chars_map_t;
|
|||
|
|
|||
|
/**********************
|
|||
|
* STATIC PROTOTYPES
|
|||
|
**********************/
|
|||
|
#if LV_USE_ARABIC_PERSIAN_CHARS == 1
|
|||
|
static uint32_t lv_ap_get_char_index(uint16_t c);
|
|||
|
static uint32_t lv_txt_lam_alef(uint32_t ch_curr, uint32_t ch_next);
|
|||
|
static bool lv_txt_is_arabic_vowel(uint16_t c);
|
|||
|
|
|||
|
/**********************
|
|||
|
* STATIC VARIABLES
|
|||
|
**********************/
|
|||
|
|
|||
|
const ap_chars_map_t ap_chars_map[] = {
|
|||
|
/*{Key Offset, End, Beginning, Middle, Isolated, {conjunction}}*/
|
|||
|
{1, 0xFE84, -1, 0, -1, {1, 0}}, // أ
|
|||
|
{2, 0xFE86, -1, 0, -1, {1, 0}}, // ؤ
|
|||
|
{3, 0xFE88, -1, 0, -1, {1, 0}}, // ﺇ
|
|||
|
{4, 0xFE8A, 1, 2, -1, {1, 0}}, // ئ
|
|||
|
{5, 0xFE8E, -1, 0, -1, {1, 0}}, // آ
|
|||
|
{6, 0xFE90, 1, 2, -1, {1, 1}}, // ب
|
|||
|
{92, 0xFB57, 1, 2, -1, {1, 1}}, // پ
|
|||
|
{8, 0xFE96, 1, 2, -1, {1, 1}}, // ت
|
|||
|
{9, 0xFE9A, 1, 2, -1, {1, 1}}, // ث
|
|||
|
{10, 0xFE9E, 1, 2, -1, {1, 1}}, // ج
|
|||
|
{100, 0xFB7B, 1, 2, -1, {1, 1}}, // چ
|
|||
|
{11, 0xFEA2, 1, 2, -1, {1, 1}}, // ح
|
|||
|
{12, 0xFEA6, 1, 2, -1, {1, 1}}, // خ
|
|||
|
{13, 0xFEAA, -1, 0, -1, {1, 0}}, // د
|
|||
|
{14, 0xFEAC, -1, 0, -1, {1, 0}}, // ذ
|
|||
|
{15, 0xFEAE, -1, 0, -1, {1, 0}}, // ر
|
|||
|
{16, 0xFEB0, -1, 0, -1, {1, 0}}, // ز
|
|||
|
{118, 0xFB8B, -1, 0, -1, {1, 0}}, // ژ
|
|||
|
{17, 0xFEB2, 1, 2, -1, {1, 1}}, // س
|
|||
|
{18, 0xFEB6, 1, 2, -1, {1, 1}}, // ش
|
|||
|
{19, 0xFEBA, 1, 2, -1, {1, 1}}, // ص
|
|||
|
{20, 0xFEBE, 1, 2, -1, {1, 1}}, // ض
|
|||
|
{21, 0xFEC2, 1, 2, -1, {1, 1}}, // ط
|
|||
|
{22, 0xFEC6, 1, 2, -1, {1, 1}}, // ظ
|
|||
|
{23, 0xFECA, 1, 2, -1, {1, 1}}, // ع
|
|||
|
{24, 0xFECE, 1, 2, -1, {1, 1}}, // غ
|
|||
|
{30, 0x0640, 0, 0, 0, {1, 1}}, // - (mad, hyphen)
|
|||
|
{31, 0xFED2, 1, 2, -1, {1, 1}}, // ف
|
|||
|
{32, 0xFED6, 1, 2, -1, {1, 1}}, // ق
|
|||
|
{135, 0xFB8F, 1, 2, -1, {1, 1}}, // ک
|
|||
|
{33, 0xFEDA, 1, 2, -1, {1, 1}}, // ﻙ
|
|||
|
{141, 0xFB93, 1, 2, -1, {1, 1}}, // گ
|
|||
|
{34, 0xFEDE, 1, 2, -1, {1, 1}}, // ل
|
|||
|
{35, 0xFEE2, 1, 2, -1, {1, 1}}, // م
|
|||
|
{36, 0xFEE6, 1, 2, -1, {1, 1}}, // ن
|
|||
|
{38, 0xFEEE, -1, 0, -1, {1, 0}}, // و
|
|||
|
{37, 0xFEEA, 1, 2, -1, {1, 1}}, // ه
|
|||
|
{39, 0xFEF0, 0, 0, -1, {1, 0}}, // ى
|
|||
|
{40, 0xFEF2, 1, 2, -1, {1, 1}}, // ي
|
|||
|
{170, 0xFBFD, 1, 2, -1, {1, 1}}, // ی
|
|||
|
{7, 0xFE94, 1, 2, -1, {1, 0}}, // ة
|
|||
|
{206, 0x06F0, 1, 2, -1, {0, 0}}, // ۰
|
|||
|
{207, 0x06F1, 0, 0, 0, {0, 0}}, // ۱
|
|||
|
{208, 0x06F2, 0, 0, 0, {0, 0}}, // ۲
|
|||
|
{209, 0x06F3, 0, 0, 0, {0, 0}}, // ۳
|
|||
|
{210, 0x06F4, 0, 0, 0, {0, 0}}, // ۴
|
|||
|
{211, 0x06F5, 0, 0, 0, {0, 0}}, // ۵
|
|||
|
{212, 0x06F6, 0, 0, 0, {0, 0}}, // ۶
|
|||
|
{213, 0x06F7, 0, 0, 0, {0, 0}}, // ۷
|
|||
|
{214, 0x06F8, 0, 0, 0, {0, 0}}, // ۸
|
|||
|
{215, 0x06F9, 0, 0, 0, {0, 0}}, // ۹
|
|||
|
LV_AP_END_CHARS_LIST
|
|||
|
};
|
|||
|
/**********************
|
|||
|
* MACROS
|
|||
|
**********************/
|
|||
|
|
|||
|
/**********************
|
|||
|
* GLOBAL FUNCTIONS
|
|||
|
**********************/
|
|||
|
uint32_t _lv_txt_ap_calc_bytes_cnt(const char * txt)
|
|||
|
{
|
|||
|
uint32_t txt_length = 0;
|
|||
|
uint32_t chars_cnt = 0;
|
|||
|
uint32_t current_ap_idx = 0;
|
|||
|
uint32_t i, j;
|
|||
|
uint32_t ch_enc;
|
|||
|
|
|||
|
txt_length = _lv_txt_get_encoded_length(txt);
|
|||
|
|
|||
|
i = 0;
|
|||
|
j = 0;
|
|||
|
while(i < txt_length) {
|
|||
|
ch_enc = _lv_txt_encoded_next(txt, &j);
|
|||
|
current_ap_idx = lv_ap_get_char_index(ch_enc);
|
|||
|
|
|||
|
if(current_ap_idx != LV_UNDEF_ARABIC_PERSIAN_CHARS)
|
|||
|
ch_enc = ap_chars_map[current_ap_idx].char_end_form;
|
|||
|
|
|||
|
if(ch_enc < 0x80)
|
|||
|
chars_cnt++;
|
|||
|
else if(ch_enc < 0x0800)
|
|||
|
chars_cnt += 2;
|
|||
|
else if(ch_enc < 0x010000)
|
|||
|
chars_cnt += 3;
|
|||
|
else
|
|||
|
chars_cnt += 4;
|
|||
|
|
|||
|
i++;
|
|||
|
}
|
|||
|
|
|||
|
return chars_cnt + 1;
|
|||
|
}
|
|||
|
|
|||
|
void _lv_txt_ap_proc(const char * txt, char * txt_out)
|
|||
|
{
|
|||
|
uint32_t txt_length = 0;
|
|||
|
uint32_t index_current, idx_next, idx_previous, i, j;
|
|||
|
uint32_t * ch_enc;
|
|||
|
uint32_t * ch_fin;
|
|||
|
char * txt_out_temp;
|
|||
|
|
|||
|
txt_length = _lv_txt_get_encoded_length(txt);
|
|||
|
|
|||
|
ch_enc = (uint32_t *)lv_mem_alloc(sizeof(uint32_t) * (txt_length + 1));
|
|||
|
ch_fin = (uint32_t *)lv_mem_alloc(sizeof(uint32_t) * (txt_length + 1));
|
|||
|
|
|||
|
i = 0;
|
|||
|
j = 0;
|
|||
|
while(j < txt_length)
|
|||
|
ch_enc[j++] = _lv_txt_encoded_next(txt, &i);
|
|||
|
|
|||
|
ch_enc[j] = 0;
|
|||
|
|
|||
|
i = 0;
|
|||
|
j = 0;
|
|||
|
idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
|
|||
|
while(i < txt_length) {
|
|||
|
index_current = lv_ap_get_char_index(ch_enc[i]);
|
|||
|
idx_next = lv_ap_get_char_index(ch_enc[i + 1]);
|
|||
|
|
|||
|
if(lv_txt_is_arabic_vowel(ch_enc[i])) { // Current character is a vowel
|
|||
|
ch_fin[j] = ch_enc[i];
|
|||
|
i++;
|
|||
|
j++;
|
|||
|
continue; // Skip this character
|
|||
|
}
|
|||
|
else if(lv_txt_is_arabic_vowel(ch_enc[i + 1])) { // Next character is a vowel
|
|||
|
idx_next = lv_ap_get_char_index(ch_enc[i + 2]); // Skip the vowel character to join with the character after it
|
|||
|
}
|
|||
|
|
|||
|
if(index_current == LV_UNDEF_ARABIC_PERSIAN_CHARS) {
|
|||
|
ch_fin[j] = ch_enc[i];
|
|||
|
j++;
|
|||
|
i++;
|
|||
|
idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
|
|||
|
continue;
|
|||
|
}
|
|||
|
|
|||
|
uint8_t conjunction_to_previuse = (i == 0 ||
|
|||
|
idx_previous == LV_UNDEF_ARABIC_PERSIAN_CHARS) ? 0 : ap_chars_map[idx_previous].ap_chars_conjunction.conj_to_next;
|
|||
|
uint8_t conjunction_to_next = ((i == txt_length - 1) ||
|
|||
|
idx_next == LV_UNDEF_ARABIC_PERSIAN_CHARS) ? 0 : ap_chars_map[idx_next].ap_chars_conjunction.conj_to_previous;
|
|||
|
|
|||
|
uint32_t lam_alef = lv_txt_lam_alef(index_current, idx_next);
|
|||
|
if(lam_alef) {
|
|||
|
if(conjunction_to_previuse) {
|
|||
|
lam_alef ++;
|
|||
|
}
|
|||
|
ch_fin[j] = lam_alef;
|
|||
|
idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS;
|
|||
|
i += 2;
|
|||
|
j++;
|
|||
|
continue;
|
|||
|
}
|
|||
|
|
|||
|
if(conjunction_to_previuse && conjunction_to_next)
|
|||
|
ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_middle_form_offset;
|
|||
|
else if(!conjunction_to_previuse && conjunction_to_next)
|
|||
|
ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_begining_form_offset;
|
|||
|
else if(conjunction_to_previuse && !conjunction_to_next)
|
|||
|
ch_fin[j] = ap_chars_map[index_current].char_end_form;
|
|||
|
else
|
|||
|
ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_isolated_form_offset;
|
|||
|
idx_previous = index_current;
|
|||
|
i++;
|
|||
|
j++;
|
|||
|
}
|
|||
|
ch_fin[j] = 0;
|
|||
|
for(i = 0; i < txt_length; i++)
|
|||
|
ch_enc[i] = 0;
|
|||
|
for(i = 0; i < j; i++)
|
|||
|
ch_enc[i] = ch_fin[i];
|
|||
|
lv_mem_free(ch_fin);
|
|||
|
|
|||
|
txt_out_temp = txt_out;
|
|||
|
i = 0;
|
|||
|
|
|||
|
while(i < txt_length) {
|
|||
|
if(ch_enc[i] < 0x80) {
|
|||
|
*(txt_out_temp++) = ch_enc[i] & 0xFF;
|
|||
|
}
|
|||
|
else if(ch_enc[i] < 0x0800) {
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x1F) | 0xC0;
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
|
|||
|
}
|
|||
|
else if(ch_enc[i] < 0x010000) {
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 12) & 0x0F) | 0xE0;
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x3F) | 0x80;
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
|
|||
|
}
|
|||
|
else if(ch_enc[i] < 0x110000) {
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 18) & 0x07) | 0xF0;
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 12) & 0x3F) | 0x80;
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 6) & 0x3F) | 0x80;
|
|||
|
*(txt_out_temp++) = ((ch_enc[i] >> 0) & 0x3F) | 0x80;
|
|||
|
}
|
|||
|
|
|||
|
i++;
|
|||
|
}
|
|||
|
*(txt_out_temp) = '\0';
|
|||
|
lv_mem_free(ch_enc);
|
|||
|
}
|
|||
|
/**********************
|
|||
|
* STATIC FUNCTIONS
|
|||
|
**********************/
|
|||
|
|
|||
|
static uint32_t lv_ap_get_char_index(uint16_t c)
|
|||
|
{
|
|||
|
for(uint8_t i = 0; ap_chars_map[i].char_end_form; i++) {
|
|||
|
if(c == (ap_chars_map[i].char_offset + LV_AP_ALPHABET_BASE_CODE))
|
|||
|
return i;
|
|||
|
else if(c == ap_chars_map[i].char_end_form //is it an End form
|
|||
|
|| c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_begining_form_offset) //is it a Beginning form
|
|||
|
|| c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_middle_form_offset) //is it a middle form
|
|||
|
|| c == (ap_chars_map[i].char_end_form + ap_chars_map[i].char_isolated_form_offset)) { //is it an isolated form
|
|||
|
return i;
|
|||
|
}
|
|||
|
}
|
|||
|
return LV_UNDEF_ARABIC_PERSIAN_CHARS;
|
|||
|
}
|
|||
|
|
|||
|
static uint32_t lv_txt_lam_alef(uint32_t ch_curr, uint32_t ch_next)
|
|||
|
{
|
|||
|
uint32_t ch_code = 0;
|
|||
|
if(ap_chars_map[ch_curr].char_offset != 34) {
|
|||
|
return 0;
|
|||
|
}
|
|||
|
if(ch_next == LV_UNDEF_ARABIC_PERSIAN_CHARS) {
|
|||
|
return 0;
|
|||
|
}
|
|||
|
ch_code = ap_chars_map[ch_next].char_offset + LV_AP_ALPHABET_BASE_CODE;
|
|||
|
if(ch_code == 0x0622) {
|
|||
|
return 0xFEF5; // (lam-alef) mad
|
|||
|
}
|
|||
|
if(ch_code == 0x0623) {
|
|||
|
return 0xFEF7; // (lam-alef) top hamza
|
|||
|
}
|
|||
|
if(ch_code == 0x0625) {
|
|||
|
return 0xFEF9; // (lam-alef) bot hamza
|
|||
|
}
|
|||
|
if(ch_code == 0x0627) {
|
|||
|
return 0xFEFB; // (lam-alef) alef
|
|||
|
}
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
static bool lv_txt_is_arabic_vowel(uint16_t c)
|
|||
|
{
|
|||
|
return (c >= 0x064B) && (c <= 0x0652);
|
|||
|
}
|
|||
|
|
|||
|
#endif
|