openrat-cms

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

bidi.js (8431B)


      1 import { lst } from "./misc.js"
      2 
      3 // BIDI HELPERS
      4 
      5 export function iterateBidiSections(order, from, to, f) {
      6   if (!order) return f(from, to, "ltr", 0)
      7   let found = false
      8   for (let i = 0; i < order.length; ++i) {
      9     let part = order[i]
     10     if (part.from < to && part.to > from || from == to && part.to == from) {
     11       f(Math.max(part.from, from), Math.min(part.to, to), part.level == 1 ? "rtl" : "ltr", i)
     12       found = true
     13     }
     14   }
     15   if (!found) f(from, to, "ltr")
     16 }
     17 
     18 export let bidiOther = null
     19 export function getBidiPartAt(order, ch, sticky) {
     20   let found
     21   bidiOther = null
     22   for (let i = 0; i < order.length; ++i) {
     23     let cur = order[i]
     24     if (cur.from < ch && cur.to > ch) return i
     25     if (cur.to == ch) {
     26       if (cur.from != cur.to && sticky == "before") found = i
     27       else bidiOther = i
     28     }
     29     if (cur.from == ch) {
     30       if (cur.from != cur.to && sticky != "before") found = i
     31       else bidiOther = i
     32     }
     33   }
     34   return found != null ? found : bidiOther
     35 }
     36 
     37 // Bidirectional ordering algorithm
     38 // See http://unicode.org/reports/tr9/tr9-13.html for the algorithm
     39 // that this (partially) implements.
     40 
     41 // One-char codes used for character types:
     42 // L (L):   Left-to-Right
     43 // R (R):   Right-to-Left
     44 // r (AL):  Right-to-Left Arabic
     45 // 1 (EN):  European Number
     46 // + (ES):  European Number Separator
     47 // % (ET):  European Number Terminator
     48 // n (AN):  Arabic Number
     49 // , (CS):  Common Number Separator
     50 // m (NSM): Non-Spacing Mark
     51 // b (BN):  Boundary Neutral
     52 // s (B):   Paragraph Separator
     53 // t (S):   Segment Separator
     54 // w (WS):  Whitespace
     55 // N (ON):  Other Neutrals
     56 
     57 // Returns null if characters are ordered as they appear
     58 // (left-to-right), or an array of sections ({from, to, level}
     59 // objects) in the order in which they occur visually.
     60 let bidiOrdering = (function() {
     61   // Character types for codepoints 0 to 0xff
     62   let lowTypes = "bbbbbbbbbtstwsbbbbbbbbbbbbbbssstwNN%%%NNNNNN,N,N1111111111NNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNbbbbbbsbbbbbbbbbbbbbbbbbbbbbbbbbb,N%%%%NNNNLNNNNN%%11NLNNN1LNNNNNLLLLLLLLLLLLLLLLLLLLLLLNLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLN"
     63   // Character types for codepoints 0x600 to 0x6f9
     64   let arabicTypes = "nnnnnnNNr%%r,rNNmmmmmmmmmmmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnn%nnrrrmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmnNmmmmmmrrmmNmmmmrr1111111111"
     65   function charType(code) {
     66     if (code <= 0xf7) return lowTypes.charAt(code)
     67     else if (0x590 <= code && code <= 0x5f4) return "R"
     68     else if (0x600 <= code && code <= 0x6f9) return arabicTypes.charAt(code - 0x600)
     69     else if (0x6ee <= code && code <= 0x8ac) return "r"
     70     else if (0x2000 <= code && code <= 0x200b) return "w"
     71     else if (code == 0x200c) return "b"
     72     else return "L"
     73   }
     74 
     75   let bidiRE = /[\u0590-\u05f4\u0600-\u06ff\u0700-\u08ac]/
     76   let isNeutral = /[stwN]/, isStrong = /[LRr]/, countsAsLeft = /[Lb1n]/, countsAsNum = /[1n]/
     77 
     78   function BidiSpan(level, from, to) {
     79     this.level = level
     80     this.from = from; this.to = to
     81   }
     82 
     83   return function(str, direction) {
     84     let outerType = direction == "ltr" ? "L" : "R"
     85 
     86     if (str.length == 0 || direction == "ltr" && !bidiRE.test(str)) return false
     87     let len = str.length, types = []
     88     for (let i = 0; i < len; ++i)
     89       types.push(charType(str.charCodeAt(i)))
     90 
     91     // W1. Examine each non-spacing mark (NSM) in the level run, and
     92     // change the type of the NSM to the type of the previous
     93     // character. If the NSM is at the start of the level run, it will
     94     // get the type of sor.
     95     for (let i = 0, prev = outerType; i < len; ++i) {
     96       let type = types[i]
     97       if (type == "m") types[i] = prev
     98       else prev = type
     99     }
    100 
    101     // W2. Search backwards from each instance of a European number
    102     // until the first strong type (R, L, AL, or sor) is found. If an
    103     // AL is found, change the type of the European number to Arabic
    104     // number.
    105     // W3. Change all ALs to R.
    106     for (let i = 0, cur = outerType; i < len; ++i) {
    107       let type = types[i]
    108       if (type == "1" && cur == "r") types[i] = "n"
    109       else if (isStrong.test(type)) { cur = type; if (type == "r") types[i] = "R" }
    110     }
    111 
    112     // W4. A single European separator between two European numbers
    113     // changes to a European number. A single common separator between
    114     // two numbers of the same type changes to that type.
    115     for (let i = 1, prev = types[0]; i < len - 1; ++i) {
    116       let type = types[i]
    117       if (type == "+" && prev == "1" && types[i+1] == "1") types[i] = "1"
    118       else if (type == "," && prev == types[i+1] &&
    119                (prev == "1" || prev == "n")) types[i] = prev
    120       prev = type
    121     }
    122 
    123     // W5. A sequence of European terminators adjacent to European
    124     // numbers changes to all European numbers.
    125     // W6. Otherwise, separators and terminators change to Other
    126     // Neutral.
    127     for (let i = 0; i < len; ++i) {
    128       let type = types[i]
    129       if (type == ",") types[i] = "N"
    130       else if (type == "%") {
    131         let end
    132         for (end = i + 1; end < len && types[end] == "%"; ++end) {}
    133         let replace = (i && types[i-1] == "!") || (end < len && types[end] == "1") ? "1" : "N"
    134         for (let j = i; j < end; ++j) types[j] = replace
    135         i = end - 1
    136       }
    137     }
    138 
    139     // W7. Search backwards from each instance of a European number
    140     // until the first strong type (R, L, or sor) is found. If an L is
    141     // found, then change the type of the European number to L.
    142     for (let i = 0, cur = outerType; i < len; ++i) {
    143       let type = types[i]
    144       if (cur == "L" && type == "1") types[i] = "L"
    145       else if (isStrong.test(type)) cur = type
    146     }
    147 
    148     // N1. A sequence of neutrals takes the direction of the
    149     // surrounding strong text if the text on both sides has the same
    150     // direction. European and Arabic numbers act as if they were R in
    151     // terms of their influence on neutrals. Start-of-level-run (sor)
    152     // and end-of-level-run (eor) are used at level run boundaries.
    153     // N2. Any remaining neutrals take the embedding direction.
    154     for (let i = 0; i < len; ++i) {
    155       if (isNeutral.test(types[i])) {
    156         let end
    157         for (end = i + 1; end < len && isNeutral.test(types[end]); ++end) {}
    158         let before = (i ? types[i-1] : outerType) == "L"
    159         let after = (end < len ? types[end] : outerType) == "L"
    160         let replace = before == after ? (before ? "L" : "R") : outerType
    161         for (let j = i; j < end; ++j) types[j] = replace
    162         i = end - 1
    163       }
    164     }
    165 
    166     // Here we depart from the documented algorithm, in order to avoid
    167     // building up an actual levels array. Since there are only three
    168     // levels (0, 1, 2) in an implementation that doesn't take
    169     // explicit embedding into account, we can build up the order on
    170     // the fly, without following the level-based algorithm.
    171     let order = [], m
    172     for (let i = 0; i < len;) {
    173       if (countsAsLeft.test(types[i])) {
    174         let start = i
    175         for (++i; i < len && countsAsLeft.test(types[i]); ++i) {}
    176         order.push(new BidiSpan(0, start, i))
    177       } else {
    178         let pos = i, at = order.length
    179         for (++i; i < len && types[i] != "L"; ++i) {}
    180         for (let j = pos; j < i;) {
    181           if (countsAsNum.test(types[j])) {
    182             if (pos < j) order.splice(at, 0, new BidiSpan(1, pos, j))
    183             let nstart = j
    184             for (++j; j < i && countsAsNum.test(types[j]); ++j) {}
    185             order.splice(at, 0, new BidiSpan(2, nstart, j))
    186             pos = j
    187           } else ++j
    188         }
    189         if (pos < i) order.splice(at, 0, new BidiSpan(1, pos, i))
    190       }
    191     }
    192     if (direction == "ltr") {
    193       if (order[0].level == 1 && (m = str.match(/^\s+/))) {
    194         order[0].from = m[0].length
    195         order.unshift(new BidiSpan(0, 0, m[0].length))
    196       }
    197       if (lst(order).level == 1 && (m = str.match(/\s+$/))) {
    198         lst(order).to -= m[0].length
    199         order.push(new BidiSpan(0, len - m[0].length, len))
    200       }
    201     }
    202 
    203     return direction == "rtl" ? order.reverse() : order
    204   }
    205 })()
    206 
    207 // Get the bidi ordering for the given line (and cache it). Returns
    208 // false for lines that are fully left-to-right, and an array of
    209 // BidiSpan objects otherwise.
    210 export function getOrder(line, direction) {
    211   let order = line.order
    212   if (order == null) order = line.order = bidiOrdering(line.text, direction)
    213   return order
    214 }