18 lines
642 B
TypeScript
18 lines
642 B
TypeScript
import { expect, test } from "bun:test";
|
|
import unicodeToBytes from "../lib/nlp/tokenize/unicodeToBytes";
|
|
|
|
test("unicodeToBytes: test", () => {
|
|
const byteToUnicodeMap = unicodeToBytes();
|
|
const byteNumArray: number[] = [];
|
|
const stringToConvert = "天æ°Ķ";
|
|
for (let i = 0; i < stringToConvert.length; i++) {
|
|
const chr = stringToConvert[i];
|
|
const byteNumber = byteToUnicodeMap[chr];
|
|
byteNumArray.push(byteNumber);
|
|
}
|
|
const byteArray = new Uint8Array(byteNumArray);
|
|
const decoder = new TextDecoder('utf-8');
|
|
const utf8String = decoder.decode(byteArray);
|
|
expect(utf8String).toEqual("天气")
|
|
});
|