/* eslint-disable no-plusplus, no-bitwise */

// Taken from https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330 - public domain
// This is needed because Cantonese uses characters on Unicode Plane 2
// which is cannot be processed correctly using Javascript array.
//
// e.g. 𨋢 lip1 (lift) is a Unicode Plane 2 character, occupies 4-bytes,
// and will be interpreted as two characters by Javascript.
//
// encodeUTF8() turns the string into bytestring
// decodeUTF8() combines them back and return a string array
// So that the i'th item in this array is the i'th character

export const encodeUTF8 = (s: string): Uint8Array => {
    var i = 0, bytes = new Uint8Array(s.length * 4);
    for (var ci = 0; ci !== s.length; ci++) {
        var c = s.charCodeAt(ci);
        if (c < 128) {
            bytes[i++] = c;
            continue;
        }
        if (c < 2048) {
            bytes[i++] = (c >> 6) | 192;
        } else {
            if (c > 0xd7ff && c < 0xdc00) {
                if (++ci >= s.length) {
                    throw new Error('UTF-8 encode: incomplete surrogate pair');
                }

                var c2 = s.charCodeAt(ci);
                if (c2 < 0xdc00 || c2 > 0xdfff) {
                    throw new Error(`UTF-8 encode: second surrogate character 0x${c2.toString(16)} at index ${ci} out of range`);
                }

                c = 0x10000 + ((c & 0x03ff) << 10) + (c2 & 0x03ff);
                bytes[i++] = (c >> 18) | 240;
                bytes[i++] = ((c >> 12) & 63) | 128;
            } else bytes[i++] = (c >> 12) | 224;
            bytes[i++] = ((c >> 6) & 63) | 128;
        }
        bytes[i++] = (c & 63) | 128;
    }
    return bytes.subarray(0, i);
};

export const decodeUTF8 = (bytes: Uint8Array): string[] => {
    let s_arr = [];
    var i = 0;
    while (i < bytes.length) {
        var c = bytes[i++];
        if (c > 127) {
            if (c > 191 && c < 224) {
                if (i >= bytes.length)
                    throw new Error('UTF-8 decode: incomplete 2-byte sequence');
                c = ((c & 31) << 6) | (bytes[i++] & 63);
            } else if (c > 223 && c < 240) {
                if (i + 1 >= bytes.length)
                    throw new Error('UTF-8 decode: incomplete 3-byte sequence');
                c = ((c & 15) << 12) | ((bytes[i++] & 63) << 6) | (bytes[i++] & 63);
            } else if (c > 239 && c < 248) {
                if (i + 2 >= bytes.length)
                    throw new Error('UTF-8 decode: incomplete 4-byte sequence');
                c = ((c & 7) << 18) | ((bytes[i++] & 63) << 12) | ((bytes[i++] & 63) << 6) | (bytes[i++] & 63);
            } else throw new Error('UTF-8 decode: unknown multibyte start 0x' + c.toString(16) + ' at index ' + (i - 1));
        }
        var english_string = ""
        // Ignore preceding __, which is escaped space between English words

        while (String.fromCharCode(c).match(/[A-Za-z]/)) {
            english_string = english_string + String.fromCharCode(c);
            c = bytes[i++];
        }
        if (english_string) {
            i--;
            s_arr[s_arr.length] = english_string;
        }
        else if (String.fromCharCode(c) === '_') {
            // Ignore __
        }
        else if (c <= 0xffff) {
            s_arr[s_arr.length] = String.fromCharCode(c);
        }
        else if (c <= 0x10ffff) {
            c -= 0x10000;
            s_arr[s_arr.length] = (String.fromCharCode((c >> 10) | 0xd800)+String.fromCharCode((c & 0x3FF) | 0xdc00));
        } else throw new Error('UTF-8 decode: code point 0x' + c.toString(16) + ' exceeds UTF-16 reach');
    }
    return s_arr;
};

export const countChar = (word: string): number => decodeUTF8(encodeUTF8(word.replace(/[)(}{><\][]/g, ''))).length;
