Separate ligatures in search string matching

2022-10-15 15:58:22 +02:00 · 2022-10-15 15:58:22 +02:00 · 1d50a3e24e
commit 1d50a3e24e
parent 0731282ea1
1 changed files with 9 additions and 3 deletions
--- a/ktx/src/main/java/de/mm20/launcher2/ktx/String.kt
+++ b/ktx/src/main/java/de/mm20/launcher2/ktx/String.kt
@ -10,16 +10,22 @@ fun String.decodeUrl(charset: String): String? {
 }

 /**
- * Normalize a string to lowercase ASCII
- * TODO: Only supports Chinese/Pinyin at the moment
+ * Normalize a string to lowercase string
+ * This is used for substring matching.
+ * Characters must be normalized independently so that
+ * A.contains(B) -> A.normalize().contains(B.normalize()) is true.
 */
 fun String.normalize(): String {
    return StringUtils.stripAccents(this.romanize().lowercase(Locale.getDefault()))
+        .replace("æ", "ae")
+        .replace("œ", "oe")
+        .replace("ß", "ss")
 }

 /**
 * Romanize a string, transliterate non-latin characters into latin
- * TODO: Only supports Chinese/Pinyin at the moment
+ * This is used for sorting. The resulting string represents the position where the original
+ * string should be sorted in the latin alphabet.
 */
 fun String.romanize(): String {
    return Pinyin.toPinyin(this, "")