bidi.min.js (8431B)
1 import { lst } from "./misc.js" 2 3 // BIDI HELPERS 4 5 export function iterateBidiSections(order, from, to, f) { 6 if (!order) return f(from, to, "ltr", 0) 7 let found = false 8 for (let i = 0; i < order.length; ++i) { 9 let part = order[i] 10 if (part.from < to && part.to > from || from == to && part.to == from) { 11 f(Math.max(part.from, from), Math.min(part.to, to), part.level == 1 ? "rtl" : "ltr", i) 12 found = true 13 } 14 } 15 if (!found) f(from, to, "ltr") 16 } 17 18 export let bidiOther = null 19 export function getBidiPartAt(order, ch, sticky) { 20 let found 21 bidiOther = null 22 for (let i = 0; i < order.length; ++i) { 23 let cur = order[i] 24 if (cur.from < ch && cur.to > ch) return i 25 if (cur.to == ch) { 26 if (cur.from != cur.to && sticky == "before") found = i 27 else bidiOther = i 28 } 29 if (cur.from == ch) { 30 if (cur.from != cur.to && sticky != "before") found = i 31 else bidiOther = i 32 } 33 } 34 return found != null ? found : bidiOther 35 } 36 37 // Bidirectional ordering algorithm 38 // See http://unicode.org/reports/tr9/tr9-13.html for the algorithm 39 // that this (partially) implements. 40 41 // One-char codes used for character types: 42 // L (L): Left-to-Right 43 // R (R): Right-to-Left 44 // r (AL): Right-to-Left Arabic 45 // 1 (EN): European Number 46 // + (ES): European Number Separator 47 // % (ET): European Number Terminator 48 // n (AN): Arabic Number 49 // , (CS): Common Number Separator 50 // m (NSM): Non-Spacing Mark 51 // b (BN): Boundary Neutral 52 // s (B): Paragraph Separator 53 // t (S): Segment Separator 54 // w (WS): Whitespace 55 // N (ON): Other Neutrals 56 57 // Returns null if characters are ordered as they appear 58 // (left-to-right), or an array of sections ({from, to, level} 59 // objects) in the order in which they occur visually. 60 let bidiOrdering = (function() { 61 // Character types for codepoints 0 to 0xff 62 let lowTypes = "bbbbbbbbbtstwsbbbbbbbbbbbbbbssstwNN%%%NNNNNN,N,N1111111111NNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNbbbbbbsbbbbbbbbbbbbbbbbbbbbbbbbbb,N%%%%NNNNLNNNNN%%11NLNNN1LNNNNNLLLLLLLLLLLLLLLLLLLLLLLNLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLN" 63 // Character types for codepoints 0x600 to 0x6f9 64 let arabicTypes = "nnnnnnNNr%%r,rNNmmmmmmmmmmmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnn%nnrrrmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmnNmmmmmmrrmmNmmmmrr1111111111" 65 function charType(code) { 66 if (code <= 0xf7) return lowTypes.charAt(code) 67 else if (0x590 <= code && code <= 0x5f4) return "R" 68 else if (0x600 <= code && code <= 0x6f9) return arabicTypes.charAt(code - 0x600) 69 else if (0x6ee <= code && code <= 0x8ac) return "r" 70 else if (0x2000 <= code && code <= 0x200b) return "w" 71 else if (code == 0x200c) return "b" 72 else return "L" 73 } 74 75 let bidiRE = /[\u0590-\u05f4\u0600-\u06ff\u0700-\u08ac]/ 76 let isNeutral = /[stwN]/, isStrong = /[LRr]/, countsAsLeft = /[Lb1n]/, countsAsNum = /[1n]/ 77 78 function BidiSpan(level, from, to) { 79 this.level = level 80 this.from = from; this.to = to 81 } 82 83 return function(str, direction) { 84 let outerType = direction == "ltr" ? "L" : "R" 85 86 if (str.length == 0 || direction == "ltr" && !bidiRE.test(str)) return false 87 let len = str.length, types = [] 88 for (let i = 0; i < len; ++i) 89 types.push(charType(str.charCodeAt(i))) 90 91 // W1. Examine each non-spacing mark (NSM) in the level run, and 92 // change the type of the NSM to the type of the previous 93 // character. If the NSM is at the start of the level run, it will 94 // get the type of sor. 95 for (let i = 0, prev = outerType; i < len; ++i) { 96 let type = types[i] 97 if (type == "m") types[i] = prev 98 else prev = type 99 } 100 101 // W2. Search backwards from each instance of a European number 102 // until the first strong type (R, L, AL, or sor) is found. If an 103 // AL is found, change the type of the European number to Arabic 104 // number. 105 // W3. Change all ALs to R. 106 for (let i = 0, cur = outerType; i < len; ++i) { 107 let type = types[i] 108 if (type == "1" && cur == "r") types[i] = "n" 109 else if (isStrong.test(type)) { cur = type; if (type == "r") types[i] = "R" } 110 } 111 112 // W4. A single European separator between two European numbers 113 // changes to a European number. A single common separator between 114 // two numbers of the same type changes to that type. 115 for (let i = 1, prev = types[0]; i < len - 1; ++i) { 116 let type = types[i] 117 if (type == "+" && prev == "1" && types[i+1] == "1") types[i] = "1" 118 else if (type == "," && prev == types[i+1] && 119 (prev == "1" || prev == "n")) types[i] = prev 120 prev = type 121 } 122 123 // W5. A sequence of European terminators adjacent to European 124 // numbers changes to all European numbers. 125 // W6. Otherwise, separators and terminators change to Other 126 // Neutral. 127 for (let i = 0; i < len; ++i) { 128 let type = types[i] 129 if (type == ",") types[i] = "N" 130 else if (type == "%") { 131 let end 132 for (end = i + 1; end < len && types[end] == "%"; ++end) {} 133 let replace = (i && types[i-1] == "!") || (end < len && types[end] == "1") ? "1" : "N" 134 for (let j = i; j < end; ++j) types[j] = replace 135 i = end - 1 136 } 137 } 138 139 // W7. Search backwards from each instance of a European number 140 // until the first strong type (R, L, or sor) is found. If an L is 141 // found, then change the type of the European number to L. 142 for (let i = 0, cur = outerType; i < len; ++i) { 143 let type = types[i] 144 if (cur == "L" && type == "1") types[i] = "L" 145 else if (isStrong.test(type)) cur = type 146 } 147 148 // N1. A sequence of neutrals takes the direction of the 149 // surrounding strong text if the text on both sides has the same 150 // direction. European and Arabic numbers act as if they were R in 151 // terms of their influence on neutrals. Start-of-level-run (sor) 152 // and end-of-level-run (eor) are used at level run boundaries. 153 // N2. Any remaining neutrals take the embedding direction. 154 for (let i = 0; i < len; ++i) { 155 if (isNeutral.test(types[i])) { 156 let end 157 for (end = i + 1; end < len && isNeutral.test(types[end]); ++end) {} 158 let before = (i ? types[i-1] : outerType) == "L" 159 let after = (end < len ? types[end] : outerType) == "L" 160 let replace = before == after ? (before ? "L" : "R") : outerType 161 for (let j = i; j < end; ++j) types[j] = replace 162 i = end - 1 163 } 164 } 165 166 // Here we depart from the documented algorithm, in order to avoid 167 // building up an actual levels array. Since there are only three 168 // levels (0, 1, 2) in an implementation that doesn't take 169 // explicit embedding into account, we can build up the order on 170 // the fly, without following the level-based algorithm. 171 let order = [], m 172 for (let i = 0; i < len;) { 173 if (countsAsLeft.test(types[i])) { 174 let start = i 175 for (++i; i < len && countsAsLeft.test(types[i]); ++i) {} 176 order.push(new BidiSpan(0, start, i)) 177 } else { 178 let pos = i, at = order.length 179 for (++i; i < len && types[i] != "L"; ++i) {} 180 for (let j = pos; j < i;) { 181 if (countsAsNum.test(types[j])) { 182 if (pos < j) order.splice(at, 0, new BidiSpan(1, pos, j)) 183 let nstart = j 184 for (++j; j < i && countsAsNum.test(types[j]); ++j) {} 185 order.splice(at, 0, new BidiSpan(2, nstart, j)) 186 pos = j 187 } else ++j 188 } 189 if (pos < i) order.splice(at, 0, new BidiSpan(1, pos, i)) 190 } 191 } 192 if (direction == "ltr") { 193 if (order[0].level == 1 && (m = str.match(/^\s+/))) { 194 order[0].from = m[0].length 195 order.unshift(new BidiSpan(0, 0, m[0].length)) 196 } 197 if (lst(order).level == 1 && (m = str.match(/\s+$/))) { 198 lst(order).to -= m[0].length 199 order.push(new BidiSpan(0, len - m[0].length, len)) 200 } 201 } 202 203 return direction == "rtl" ? order.reverse() : order 204 } 205 })() 206 207 // Get the bidi ordering for the given line (and cache it). Returns 208 // false for lines that are fully left-to-right, and an array of 209 // BidiSpan objects otherwise. 210 export function getOrder(line, direction) { 211 let order = line.order 212 if (order == null) order = line.order = bidiOrdering(line.text, direction) 213 return order 214 }