🚩flag /u
JS ⟩ value ⟩ object ⟩ regex ⟩ flag ⟩ /u
// support unicode
/abc/u // treated as sequence of code points.
// ❗️'/u' flag doesn't do well with grapheme clusters.
/<.>/u.test("<🏳️🌈>") // false❗️
/<....>/u.test("<🏳️🌈>") // true ⭐️
'/u
' flag doesn't do well with grapheme clusters❗
// ❗️'/u' flag doesn't do well with grapheme clusters.
/<.>/u.test("<🏳️🌈>"), // false❗️
/<....>/u.test("<🏳️🌈>"), // true ⭐️
flag /u
works on code points, not on grapheme clusters❗
replit ⟩ /u flag, require ⟩ String extension
// ⭐ import
const _String = require('./ext/String_ext.js'); // String extension
[
'🍎'.codeUnits, // [ 55356, 57166 ] // surrogate pair (2 code units)
'🍎'.codePoints, // [ 127822 ] // 1 code point
'🏳️🌈'.codeUnits, // [ 55356, 57331, 65039, 8205, 55356, 57096 ]
'🏳️🌈'.codePoints, // [ 127987, 65039, 8205, 127752 ] // 4 code points❗️
// --------------------------------------------
// ❗️regex works on "code units", by default.
// --------------------------------------------
/🍎{3}/.test("🍎🍎🍎"), // false❗️
// let 🍎 = ab (where a = 55356, b = 57166)
// then /🍎{3}/ = /ab{3}/ = /abbb/❗️
// which is not /🍎🍎🍎/ = /ababab/❗️
/<.>/.test("<🍎>"), // false❗️
// <🍎> = <ab>, which is not <.>
// --------------------
// ✅ enable /u flag
// --------------------
/<.>/u.test("<🍎>"), // true ⭐️
// ❗️'/u' flag doesn't do well with grapheme clusters.
/<.>/u.test("<🏳️🌈>"), // false❗️
/<....>/u.test("<🏳️🌈>"), // true ⭐️
// ⭐️ 搜尋「漢字」
`Hello Привет 你好`.match(/\p{sc=Han}/gu), // [ '你', '好' ]
// ⭐️ Script
/\p{Script=Greek}/u.test("α"), // → true
/\p{Script=Arabic}/u.test("α"), // → false
// ⭐️ Alphabetic
/\p{Alphabetic}/u.test("α"), // → true
/\p{Alphabetic}/u.test("!"), // → false
/\p{Alphabetic}/u.test("漢"), // → true
].forEach(x => console.log(x));
Last updated