sparkast/test/unicodeToBytes.test.ts

18 lines
642 B
TypeScript

import { expect, test } from "bun:test";
import unicodeToBytes from "../lib/nlp/tokenize/unicodeToBytes";
test("unicodeToBytes: test", () => {
const byteToUnicodeMap = unicodeToBytes();
const byteNumArray: number[] = [];
const stringToConvert = "天æ°Ķ";
for (let i = 0; i < stringToConvert.length; i++) {
const chr = stringToConvert[i];
const byteNumber = byteToUnicodeMap[chr];
byteNumArray.push(byteNumber);
}
const byteArray = new Uint8Array(byteNumArray);
const decoder = new TextDecoder('utf-8');
const utf8String = decoder.decode(byteArray);
expect(utf8String).toEqual("天气")
});