import { trie } from '../cantoneseTrie';

const segment = (inputText: string): string => {
    let outputText = '';
    for (let i = 0; i < inputText.length;){
        let m = 0;
        let maxmatch = '';
        let teststring;
        while (
            i + m < inputText.length &&
            trie.find(teststring = inputText.substr(i,m+1)).length > 0
        ) {
            if (trie.contains(teststring)) {
                maxmatch = teststring;
            }
            ++m;
        }
        m = maxmatch.length;
        if (m === 0) {
            // Character not found // Check Letters / Numbers
            while (
                i + m < inputText.length
                && inputText.substr(i,m + 1).match(new RegExp('[ -\'*-~]{'+ (m + 1) + '}','g'))
            ) {
                ++m;
            }

            if (m > 0) {
                outputText = outputText + ' ' + inputText.substr(i,m);
                i += m;
            } else {
                while (
                    i + m < inputText.length &&
                    inputText.substr(i,m + 1).match(
                        new RegExp('[\u0000-\u001F\u007F-\u2e7f]{' + (m + 1) + '}','g')
                    )
                ) {
                    ++m;
                }

                if (m > 0) {
                    outputText = outputText + ' ' + inputText.substr(i,m);
                    i += m;
                } else {
                    outputText = outputText + ' ' + inputText.substr(i,m+1);
                    ++i;
                }
            }
        } else {
            outputText = outputText + ' ' + maxmatch;
            i += m;
        }
    }
    return outputText.replace(/\n /g,'\n');
};

export default segment;
