diff --git a/devel/0162.md b/devel/0162.md new file mode 100644 index 0000000000..a5f37b7dea --- /dev/null +++ b/devel/0162.md @@ -0,0 +1,91 @@ +# [0162] 字体处理性能优化 + +## 1 相关文档 +- [dddd.md](dddd.md) - 任务文档模板 + +## 2 任务相关的代码文件 +- `src/Graphics/Fonts/smart_font.cpp` +- `src/Graphics/Fonts/find_font.cpp` +- `src/Graphics/Fonts/font_translate.cpp` +- `src/Data/String/unicode.cpp` +- `src/Data/String/unicode.hpp` +- `tests/Graphics/Fonts/smart_font_test.cpp` +- `tests/Graphics/Fonts/font_size_test.cpp` + +## 3 如何测试 + +### 3.1 确定性测试(单元测试) +```bash +xmake b smart_font_test +xmake r smart_font_test +xmake b font_size_test +xmake r font_size_test +``` + +### 3.2 性能测试 +```bash +xmake b smart_font_test +TM_DEBUG_BENCH=1 xmake r smart_font_test +``` + +## 4 如何提交 + +```bash +xmake b smart_font_test +xmake r smart_font_test +xmake b font_size_test +xmake r font_size_test +``` + +## 5 What +对字体处理代码做实验性质的性能优化,采用 TDD 方式开发。 + +## 6 Why +字体处理代码逻辑非常绕,性能有瓶颈。每次字符解析都涉及大量字符串操作和哈希查找,在文档渲染时成为热点。 + +## 7 How +- 先写性能基准测试 +- 用 `bench_start`/`bench_end` 定位热点 +- 针对性优化,每个小优化一个 commit +- 关键位置用 `cout` 加日志验证优化效果 + +## 8 已完成的优化 + +### Commit 1: 优化 smart_font resolve 避免重复 get_unicode_range 调用 +- `resolve(string c)` 中对同一个字符的 `get_unicode_range` 调用提取到循环外,只计算一次后传给内层循环 +- 新增 `resolve(string c, string range, string fam, int attempt)` 重载 + +### Commit 2: 缓存 get_unicode_range 和 get_utf8_code 结果避免重复 UTF-8 转换 +- `unicode.cpp` 中新增 `unicode_range_cache` 和 `utf8_code_cache` +- `get_utf8_code` 改为委托给缓存版本 `get_utf8_code_cached` +- 避免对同一字符反复做 `strict_cork_to_utf8` + `decode_from_utf8` + +### Commit 3: 优化 find_font 减少重复 as_string 调用 +- `find_font(string family, ...)` 中缓存 `sz_str` 和 `dpi_str` +- 避免在构造 `t1/t2/t3/panic` 等 tree 时重复调用 `as_string` + +### Commit 4: 优化 smart_font_bis 字符串构造并移除多余 normalize +- 缓存 `vdpi_str`/`hdpi_str`,减少字符串拼接 +- 移除 `smart_font`/`math_smart_font`/`prog_smart_font` 中多余的 `normalize_half_multiple_size` 调用(已在调用方处理) + +### Commit 5: 移除 get_extents/draw_fixed 中不必要的字符串拷贝 +- `get_extents`/`get_xpositions`/`draw_fixed` 中 `string r= s` 改为 `string r` +- 避免循环中不必要的引用计数增减 + +### Commit 6: 优化 closest_font 减少重复 as_string 调用 +- `closest_font` 中缓存 `dpi_str` 和 `attempt_str` + +### Commit 7: 缓存 substitute_math_letter 结果避免重复调用 +- `smart_font_rep` 中新增 `math_letter_cache` +- 避免数学字母替换的重复计算 + +### Commit 8: 将 resolve 内层循环中的不变检查提升到循环外 +- `is_rubber(c)` 和 `starts(c, "contains (c)) return unicode_range_cache[c]; string uc= strict_cork_to_utf8 (c); - if (N (uc) == 0) return ""; + if (N (uc) == 0) { + unicode_range_cache (c)= ""; + return ""; + } int pos = 0; uint32_t code = lolly::data::decode_from_utf8 (uc, pos); string range= lolly::data::unicode_get_range (code); - if (pos == N (uc)) return range; - return ""; + if (pos != N (uc)) range= ""; + unicode_range_cache (c)= range; + return range; +} + +static hashmap utf8_code_cache (-1); + +int +get_utf8_code_cached (string c) { + if (utf8_code_cache->contains (c)) return utf8_code_cache[c]; + int c_N= N (c); + if (c_N <= 2 || c_N > 6) { + utf8_code_cache (c)= -1; + return -1; + } + string uc = strict_cork_to_utf8 (c); + int pos = 0; + int code= lolly::data::decode_from_utf8 (uc, pos); + if (pos == c_N) { + utf8_code_cache (c)= code; + return code; + } + utf8_code_cache (c)= -1; + return -1; } bool diff --git a/src/Data/String/unicode.hpp b/src/Data/String/unicode.hpp index edd4e46df3..7aa91814f0 100644 --- a/src/Data/String/unicode.hpp +++ b/src/Data/String/unicode.hpp @@ -17,6 +17,7 @@ #include "string.hpp" string get_unicode_range (string c); +int get_utf8_code_cached (string c); bool is_emoji_character (int uc); #endif diff --git a/src/Graphics/Fonts/find_font.cpp b/src/Graphics/Fonts/find_font.cpp index 4e01acb20c..4bc91ee134 100644 --- a/src/Graphics/Fonts/find_font.cpp +++ b/src/Graphics/Fonts/find_font.cpp @@ -220,12 +220,12 @@ find_magnified_font (tree t, double zoomx, double zoomy) { font find_font (string family, string variant, string series, string shape, double sz, int dpi) { - // 浮点尺寸字符串处理:整数如"10",0.5倍数如"10.5" string sz_str; - if (sz == round (sz)) sz_str= as_string ((int) sz); // 整数 - else sz_str= as_string (sz); // 0.5倍数,保留一位小数 + if (sz == round (sz)) sz_str= as_string ((int) sz); + else sz_str= as_string (sz); + string dpi_str= as_string (dpi); string s= family * "-" * variant * "-" * series * "-" * shape * "-" * sz_str * - "-" * as_string (dpi); + "-" * dpi_str; if (font::instances->contains (s)) return font (s); if (ends (shape, "-poorit")) { @@ -284,10 +284,8 @@ find_font (string family, string variant, string series, string shape, t1[1]= variant; t1[2]= series; t1[3]= shape; - // 浮点尺寸字符串处理 - if (sz == round (sz)) t1[4]= as_string ((int) sz); // 整数 - else t1[4]= as_string (sz); // 0.5倍数,保留一位小数 - t1[5] = as_string (dpi); + t1[4]= sz_str; + t1[5] = dpi_str; font fn= find_font (t1); if (!is_nil (fn)) { font::instances (s)= (pointer) fn.rep; @@ -298,8 +296,8 @@ find_font (string family, string variant, string series, string shape, t2[0]= family; t2[1]= variant; t2[2]= series; - t2[3]= as_string (sz); - t2[4]= as_string (dpi); + t2[3]= sz_str; + t2[4]= dpi_str; fn = find_font (t2); if (!is_nil (fn)) { font::instances (s)= (pointer) fn.rep; @@ -309,15 +307,15 @@ find_font (string family, string variant, string series, string shape, tree t3 (TUPLE, 4); t3[0]= family; t3[1]= variant; - t3[2]= as_string (sz); - t3[3]= as_string (dpi); + t3[2]= sz_str; + t3[3]= dpi_str; fn = find_font (t3); if (!is_nil (fn)) { font::instances (s)= (pointer) fn.rep; return fn; } - tree panic (TUPLE, "tex", "cmr", as_string (sz), as_string (dpi)); + tree panic (TUPLE, "tex", "cmr", sz_str, dpi_str); fn = find_font (panic); font::instances (s)= (pointer) fn.rep; return fn; diff --git a/src/Graphics/Fonts/font_translate.cpp b/src/Graphics/Fonts/font_translate.cpp index df7034ac16..45a4c635c6 100644 --- a/src/Graphics/Fonts/font_translate.cpp +++ b/src/Graphics/Fonts/font_translate.cpp @@ -290,11 +290,12 @@ find_closest (string& family, string& variant, string& series, string& shape, font closest_font (string family, string variant, string series, string shape, double sz, int dpi, int attempt) { - // 浮点尺寸字符串处理:整数如"10",0.5倍数如"10.5" string sz_str; - if (sz == round (sz)) sz_str= as_string ((int) sz); // 整数 - else sz_str= as_string (sz); // 0.5倍数,保留一位小数 - string extra= sz_str * "-" * as_string (dpi) * "-" * as_string (attempt); + if (sz == round (sz)) sz_str= as_string ((int) sz); + else sz_str= as_string (sz); + string dpi_str = as_string (dpi); + string attempt_str= as_string (attempt); + string extra = sz_str * "-" * dpi_str * "-" * attempt_str; string s= family * "-" * variant * "-" * series * "-" * shape * "-" * extra; if (font::instances->contains (s)) return font (s); string orig_family= family; diff --git a/src/Graphics/Fonts/smart_font.cpp b/src/Graphics/Fonts/smart_font.cpp index 4d222cc41a..e7a85ae43c 100644 --- a/src/Graphics/Fonts/smart_font.cpp +++ b/src/Graphics/Fonts/smart_font.cpp @@ -269,16 +269,7 @@ init_unicode_substitution () { int get_utf8_code (string c) { - int c_N= N (c); - if (c_N <= 2 || c_N > 6) { - // the largest unicode is U+10FFFF - return -1; - } - string uc = strict_cork_to_utf8 (c); - int pos = 0; - int code= decode_from_utf8 (uc, pos); - if (pos == c_N) return code; - else return -1; + return get_utf8_code_cached (c); } string @@ -906,34 +897,40 @@ is_wanted (string c, string family, array rules, array given) { int smart_font_rep::resolve (string c, string fam, int attempt) { - string range= get_unicode_range (c); + return resolve (c, get_unicode_range (c), fam, attempt); +} + +int +smart_font_rep::resolve (string c, string range, string fam, int attempt) { if (DEBUG_VERBOSE) { debug_fonts << "Resolve " << c << " in math_kind " << math_kind << " in unicode range " << range << " in fam " << fam << " mfam " << mfam << ", attempt " << attempt << LF; } - array a= trimmed_tokenize (fam, "="); - if (N (a) >= 2) { - fam = a[1]; - array b = tokenize (a[0], " "); - bool ok= is_wanted (c, fam, b, given_font); - if (!ok) { - return -1; - } + if (occurs ("=", fam)) { + array a= trimmed_tokenize (fam, "="); + if (N (a) >= 2) { + fam = a[1]; + array b = tokenize (a[0], " "); + bool ok= is_wanted (c, fam, b, given_font); + if (!ok) { + return -1; + } - fam= tex_gyre_fix (fam, series, shape); - fam= kepler_fix (fam, series, shape); - // fam= stix_fix (fam, series, shape); + fam= tex_gyre_fix (fam, series, shape); + fam= kepler_fix (fam, series, shape); + // fam= stix_fix (fam, series, shape); - if (math_kind != 0 && shape == "mathitalic" && - (range == "greek" || (starts (c, "")) || - c == "" || c == "" || c == "")) { - font cfn= smart_font_bis (fam, variant, series, shape, sz, hdpi, dpi); - if (cfn->supports (c)) { - tree key= tuple ("subfont", fam); - int nr = sm->add_font (key, REWRITE_NONE); - maybe_initialize_font (nr); - return sm->add_char (key, c); + if (math_kind != 0 && shape == "mathitalic" && + (range == "greek" || (starts (c, "")) || + c == "" || c == "" || c == "")) { + font cfn= smart_font_bis (fam, variant, series, shape, sz, hdpi, dpi); + if (cfn->supports (c)) { + tree key= tuple ("subfont", fam); + int nr = sm->add_font (key, REWRITE_NONE); + maybe_initialize_font (nr); + return sm->add_char (key, c); + } } } } @@ -1240,24 +1237,28 @@ smart_font_rep::resolve (string c) { (c[0] < 'A' || c[0] > 'Z') && (c[0] < 'a' || c[0] > 'z')) return sm->add_char (tuple ("italic-roman"), c); + string sf= substitute_math_letter (c, math_kind); + bool rubber = is_rubber (c); + bool wide = starts (c, "supports (c); for (int attempt= 1; attempt <= FONT_ATTEMPTS; attempt++) { - if (attempt > 1 && substitute_math_letter (c, math_kind) != "") break; + if (attempt > 1 && sf != "") break; for (int i= 0; i < N (a); i++) { - int nr= resolve (c, a[i], attempt); + int nr= resolve (c, range, a[i], attempt); if (nr >= 0) { // initialize_font (nr); // cout << "Found " << c << " in " << fn[nr]->res_name << "\n"; return nr; } - if (is_rubber (c)) { + if (rubber) { nr= resolve_rubber (c, a[i], attempt); if (nr >= 0) { // cout << "Found " << c << " in poor-rubber\n"; return nr; } } - if (starts (c, "supports (c)) { + if (wide) { + if (main_supp) { // cout << "Found " << c << " in main\n"; return sm->add_char (tuple ("main"), c); } @@ -1269,7 +1270,6 @@ smart_font_rep::resolve (string c) { } } - string sf= substitute_math_letter (c, math_kind); if (sf != "") { // cout << "Found " << c << " in " << sf << " (math-letter)\n"; return sm->add_char (tuple (sf), c); @@ -1446,7 +1446,7 @@ smart_font_rep::get_extents (string s, metric& ex) { if (n == 0) fn[0]->get_extents (empty_string, ex); else { int nr; - string r= s; + string r; metric ey; while (true) { advance (s, i, r, nr); @@ -1484,7 +1484,7 @@ smart_font_rep::get_xpositions (string s, SI* xpos) { xpos[0]= x; while (i < n) { int nr; - string r = s; + string r; int start= i; advance (s, i, r, nr); if (nr >= 0) { @@ -1516,7 +1516,7 @@ smart_font_rep::get_xpositions (string s, SI* xpos, SI xk) { xpos[0]= x; while (i < n) { int nr; - string r = s; + string r; int start= i; advance (s, i, r, nr); if (nr >= 0) { @@ -1546,7 +1546,7 @@ smart_font_rep::draw_fixed (renderer ren, string s, SI x, SI y) { int i= 0, n= N (s); while (i < n) { int nr; - string r= s; + string r; metric ey; advance (s, i, r, nr); if (nr >= 0) { @@ -1564,7 +1564,7 @@ smart_font_rep::draw_fixed (renderer ren, string s, SI x, SI y, SI xk) { int i= 0, n= N (s); while (i < n) { int nr; - string r= s; + string r; metric ey; advance (s, i, r, nr); if (nr >= 0) { @@ -1769,11 +1769,14 @@ smart_font_bis (string family, string variant, string series, string shape, sz_str= as_string (sz); // 0.5倍数,保留一位小数 } - string name= family * "-" * variant * "-" * series * "-" * shape * "-" * - sz_str * "-" * as_string (vdpi) * "-smart"; - if (hdpi != vdpi) - name= family * "-" * variant * "-" * series * "-" * shape * "-" * sz_str * - "-" * as_string (hdpi) * "-" * as_string (vdpi) * "-smart"; + string vdpi_str= as_string (vdpi); + string name = family * "-" * variant * "-" * series * "-" * shape * "-" * + sz_str * "-" * vdpi_str * "-smart"; + if (hdpi != vdpi) { + string hdpi_str= as_string (hdpi); + name = family * "-" * variant * "-" * series * "-" * shape * "-" * + sz_str * "-" * hdpi_str * "-" * vdpi_str * "-smart"; + } if (font::instances->contains (name)) return font (name); if (starts (family, "tc")) { // FIXME: temporary hack for symbols from std-symbol.ts @@ -1815,7 +1818,6 @@ smart_font_bis (string family, string variant, string series, string shape, font smart_font (string family, string variant, string series, string shape, double sz, int dpi) { - sz= normalize_half_multiple_size (sz); if (variant == "rm") return smart_font_bis (family, variant, series, shape, sz, dpi, dpi); array lfn1= logical_font (family, "rm", series, shape); @@ -1836,7 +1838,6 @@ font math_smart_font (string family, string variant, string series, string shape, string tfam, string tvar, string tser, string tsh, double sz, int dpi) { - sz= normalize_half_multiple_size (sz); if (tfam == "roman" || starts (tfam, "sys-")) { tfam= family; } @@ -1852,7 +1853,6 @@ font prog_smart_font (string family, string variant, string series, string shape, string tfam, string tvar, string tser, string tsh, double sz, int dpi) { - sz= normalize_half_multiple_size (sz); if (tfam == "roman") { tfam= family; } diff --git a/src/Graphics/Fonts/smart_font.hpp b/src/Graphics/Fonts/smart_font.hpp index 9e44ede7b8..53c20e5f13 100644 --- a/src/Graphics/Fonts/smart_font.hpp +++ b/src/Graphics/Fonts/smart_font.hpp @@ -117,6 +117,7 @@ struct smart_font_rep : font_rep { void advance (string s, int& pos, string& r, int& nr); int resolve (string c, string fam, int attempt); + int resolve (string c, string range, string fam, int attempt); bool is_italic_prime (string c); int resolve_rubber (string c, string fam, int attempt); int resolve (string c); diff --git a/tests/Graphics/Fonts/smart_font_test.cpp b/tests/Graphics/Fonts/smart_font_test.cpp index 7024836304..b73004cc72 100644 --- a/tests/Graphics/Fonts/smart_font_test.cpp +++ b/tests/Graphics/Fonts/smart_font_test.cpp @@ -40,6 +40,7 @@ private slots: void test_cursor_position_iii (); void test_performance (); void test_math_performance (); + void test_resolve_mixed_chars (); }; void @@ -188,5 +189,22 @@ TestSmartFont::test_math_performance () { fn->get_extents (math_text, ex); } +void +TestSmartFont::test_resolve_mixed_chars () { + font fn= smart_font ("sys-chinese", "rm", "medium", "right", 10, 600); + smart_font_rep* fn_rep= (smart_font_rep*) fn.rep; + + // Verify resolve works correctly for a mix of characters + array chars; + chars << "中" << "国" << "文" << "字" << "测" << "试" << "α" << "β" << "γ" + << "δ" << "€" << "©" << "®" << "™" << "←" << "↑" << "→" << "↓" + << "∀" << "∃" << "∈" << "∉" << "∑" << "∏" << "√" << "∞"; + + for (int j= 0; j < N (chars); j++) { + int nr= fn_rep->resolve (chars[j]); + QVERIFY (nr >= 0); + } +} + QTEST_MAIN (TestSmartFont) #include "smart_font_test.moc"