Separate ligatures in search string matching
This commit is contained in:
parent
0731282ea1
commit
1d50a3e24e
@ -10,16 +10,22 @@ fun String.decodeUrl(charset: String): String? {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize a string to lowercase ASCII
|
* Normalize a string to lowercase string
|
||||||
* TODO: Only supports Chinese/Pinyin at the moment
|
* This is used for substring matching.
|
||||||
|
* Characters must be normalized independently so that
|
||||||
|
* A.contains(B) -> A.normalize().contains(B.normalize()) is true.
|
||||||
*/
|
*/
|
||||||
fun String.normalize(): String {
|
fun String.normalize(): String {
|
||||||
return StringUtils.stripAccents(this.romanize().lowercase(Locale.getDefault()))
|
return StringUtils.stripAccents(this.romanize().lowercase(Locale.getDefault()))
|
||||||
|
.replace("æ", "ae")
|
||||||
|
.replace("œ", "oe")
|
||||||
|
.replace("ß", "ss")
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Romanize a string, transliterate non-latin characters into latin
|
* Romanize a string, transliterate non-latin characters into latin
|
||||||
* TODO: Only supports Chinese/Pinyin at the moment
|
* This is used for sorting. The resulting string represents the position where the original
|
||||||
|
* string should be sorted in the latin alphabet.
|
||||||
*/
|
*/
|
||||||
fun String.romanize(): String {
|
fun String.romanize(): String {
|
||||||
return Pinyin.toPinyin(this, "")
|
return Pinyin.toPinyin(this, "")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user