From 1d50a3e24e254c965b07d8857d4edb162afac028 Mon Sep 17 00:00:00 2001 From: MM20 <15646950+MM2-0@users.noreply.github.com> Date: Sat, 15 Oct 2022 15:58:22 +0200 Subject: [PATCH] Separate ligatures in search string matching --- ktx/src/main/java/de/mm20/launcher2/ktx/String.kt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ktx/src/main/java/de/mm20/launcher2/ktx/String.kt b/ktx/src/main/java/de/mm20/launcher2/ktx/String.kt index 62af5606..f003659a 100644 --- a/ktx/src/main/java/de/mm20/launcher2/ktx/String.kt +++ b/ktx/src/main/java/de/mm20/launcher2/ktx/String.kt @@ -10,16 +10,22 @@ fun String.decodeUrl(charset: String): String? { } /** - * Normalize a string to lowercase ASCII - * TODO: Only supports Chinese/Pinyin at the moment + * Normalize a string to lowercase string + * This is used for substring matching. + * Characters must be normalized independently so that + * A.contains(B) -> A.normalize().contains(B.normalize()) is true. */ fun String.normalize(): String { return StringUtils.stripAccents(this.romanize().lowercase(Locale.getDefault())) + .replace("æ", "ae") + .replace("œ", "oe") + .replace("ß", "ss") } /** * Romanize a string, transliterate non-latin characters into latin - * TODO: Only supports Chinese/Pinyin at the moment + * This is used for sorting. The resulting string represents the position where the original + * string should be sorted in the latin alphabet. */ fun String.romanize(): String { return Pinyin.toPinyin(this, "")