diff --git a/lib/nlp/getEmbedding.ts b/lib/nlp/getEmbedding.ts
new file mode 100644
index 0000000..e20b863
--- /dev/null
+++ b/lib/nlp/getEmbedding.ts
@@ -0,0 +1,38 @@
+type EmbeddingDict = { [key: number]: Float32Array };
+
+function getEmbeddingLayer(buffer: Buffer): EmbeddingDict {
+    const dict: EmbeddingDict = {};
+
+    const entrySize = 514;
+    const numEntries = buffer.length / entrySize;
+
+    for (let i = 0; i < numEntries; i++) {
+        const offset = i * entrySize;
+        const key = buffer.readUInt16LE(offset);
+        const floatArray = new Float32Array(128);
+
+        for (let j = 0; j < 128; j++) {
+            floatArray[j] = buffer.readFloatLE(offset + 2 + j * 4);
+        }
+
+        dict[key] = floatArray;
+    }
+
+    return dict;
+}
+
+function getEmbedding(tokenIds: number[], embeddingDict: EmbeddingDict, contextSize: number) {
+  let result: number[] = [];
+  for (let i = 0; i < contextSize; i++) {
+    if (i < tokenIds.length) {
+      const tokenId = tokenIds[i];
+      result = result.concat(Array.from(embeddingDict[tokenId]))
+    }
+    else {
+      result = result.concat(new Array(128).fill(0))
+    }
+  }
+  return new Float32Array(result);
+}
+
+export {getEmbeddingLayer, getEmbedding};
\ No newline at end of file
diff --git a/lib/nlp/tokenizer.ts b/lib/nlp/tokenizer.ts
new file mode 100644
index 0000000..02dd516
--- /dev/null
+++ b/lib/nlp/tokenizer.ts
@@ -0,0 +1,56 @@
+type TokenDict = { [key: string]: number };
+
+function tokenize(query: string, tokenDict: TokenDict): number[] {
+	const tokenIds: number[] = [];
+	let index = 0;
+
+	// Replace spaces with "▁"
+	query = "▁" + query.replace(/ /g, "▁");
+	query = query.replace(/\n/g, "<0x0A>");
+
+	while (index < query.length) {
+		let bestToken = null;
+		let bestLength = 0;
+
+		// Step 2: Find the longest token that matches the beginning of the remaining query
+		for (const token in tokenDict) {
+			if (query.startsWith(token, index) && token.length > bestLength) {
+				bestToken = token;
+				bestLength = token.length;
+			}
+		}
+
+		if (bestToken) {
+			tokenIds.push(tokenDict[bestToken]);
+			index += bestLength;
+			continue;
+		}
+
+		// Step 3: Handle the case where no token matches
+		const char = query[index];
+		if (char.charCodeAt(0) <= 127) {
+			// If the character is ASCII, and it doesn't match any token, treat it as an unknown token
+			console.error(`Unknown token: ${char}`);
+            index++;
+            continue;
+		}
+
+		// If the character is non-ASCII, convert it to a series of bytes and match each byte
+		const bytes = new TextEncoder().encode(char);
+		for (const byte of bytes) {
+			const byteToken = `<0x${byte.toString(16).toUpperCase()}>`;
+			if (tokenDict[byteToken] === undefined) {
+                console.error(`Unknown byte token: ${byteToken}`);
+                index++;
+                continue;
+			}
+            tokenIds.push(tokenDict[byteToken]);
+		}
+		index++;
+	}
+
+	return tokenIds;
+}
+
+export default tokenize;
+export type { TokenDict };
diff --git a/public/model b/public/model
deleted file mode 100644
index 0c4ef7e..0000000
Binary files a/public/model and /dev/null differ