Compare commits

..

No commits in common. "44c216c81b1baff599b975a967fd844dd7908b79" and "1acb0a7f1183173b439e7369c9c920f4f21cf059" have entirely different histories.

6 changed files with 29 additions and 51 deletions

View File

@ -21,9 +21,6 @@ import tokenize from "lib/nlp/tokenize/tokenizer";
import { getEmbedding, getEmbeddingLayer } from "lib/nlp/getEmbedding"; import { getEmbedding, getEmbeddingLayer } from "lib/nlp/getEmbedding";
import { loadVocab } from "lib/nlp/tokenize/loadVocab"; import { loadVocab } from "lib/nlp/tokenize/loadVocab";
import BPETokenizer from "lib/nlp/tokenize/BPEtokenizer"; import BPETokenizer from "lib/nlp/tokenize/BPEtokenizer";
import energyScore from "lib/nlp/energyScore";
import bytesToUnicode from "lib/nlp/tokenize/bytesToUnicode";
import { searchboxLastInputAtom } from "lib/state/searchboxLastInput";
interface EmbeddingLayer { interface EmbeddingLayer {
[key: number]: Float32Array<ArrayBufferLike>; [key: number]: Float32Array<ArrayBufferLike>;
@ -34,7 +31,6 @@ export default function OneSearch() {
const [embeddingLayer, setEmbeddingLayer] = useState<EmbeddingLayer | null>(null); const [embeddingLayer, setEmbeddingLayer] = useState<EmbeddingLayer | null>(null);
const [NLUsession, setNLUsession] = useState<ort.InferenceSession | null>(null); const [NLUsession, setNLUsession] = useState<ort.InferenceSession | null>(null);
const [tokenizer, setTokenizer] = useState<BPETokenizer | null>(null); const [tokenizer, setTokenizer] = useState<BPETokenizer | null>(null);
const lastInput = useAtomValue(searchboxLastInputAtom);
const lastRequestTimeRef = useRef(0); const lastRequestTimeRef = useRef(0);
const selected = useAtomValue(selectedSuggestionAtom); const selected = useAtomValue(selectedSuggestionAtom);
const settings = useAtomValue(settingsAtom); const settings = useAtomValue(settingsAtom);
@ -71,7 +67,7 @@ export default function OneSearch() {
// Handle fetch error // Handle fetch error
sendError(error); sendError(error);
}); });
}, [lastInput]); }, [query]);
function updateSuggestion(data: suggestionItem[]) { function updateSuggestion(data: suggestionItem[]) {
setFinalSuggetsion((cur: suggestionItem[]) => { setFinalSuggetsion((cur: suggestionItem[]) => {
@ -134,12 +130,16 @@ export default function OneSearch() {
} }
async function getNLUResult(query: string) { async function getNLUResult(query: string) {
const start = new Date().getTime();
if (embeddingLayer === null || NLUsession === null || tokenizer == null) return; if (embeddingLayer === null || NLUsession === null || tokenizer == null) return;
const tokenIds = await tokenize(bytesToUnicode(query), tokenizer); const tokenIds = await tokenize(query, tokenizer);
console.log(new Date().getTime() - start, "ms");
const embeddings = getEmbedding(tokenIds, embeddingLayer, 64); const embeddings = getEmbedding(tokenIds, embeddingLayer, 64);
const inputTensor = new ort.Tensor("float32", embeddings, [1, 64, 96]); const inputTensor = new ort.Tensor("float32", embeddings, [1, 64, 96]);
const feeds = { input: inputTensor }; const feeds = { input: inputTensor };
console.log(new Date().getTime() - start, "ms");
const results = await NLUsession.run(feeds); const results = await NLUsession.run(feeds);
console.log(new Date().getTime() - start, "ms");
return results; return results;
} }
@ -171,15 +171,9 @@ export default function OneSearch() {
(async function () { (async function () {
const result = await getNLUResult(query); const result = await getNLUResult(query);
if (result === undefined) return; console.log(result);
const rawData = result.output.data;
const data: number[] = [];
for (let i=0;i<rawData.length;i++){
data.push(rawData[i] as number);
}
console.log(data, energyScore(data));
})(); })();
}, [lastInput, engineName]); }, [query, engineName]);
return ( return (
<SuggestionBox> <SuggestionBox>

View File

@ -6,14 +6,12 @@ import { selectedSuggestionAtom } from "lib/state/suggestionSelection";
import handleEnter from "lib/onesearch/handleEnter"; import handleEnter from "lib/onesearch/handleEnter";
import { suggestionAtom } from "lib/state/suggestion"; import { suggestionAtom } from "lib/state/suggestion";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { searchboxLastInputAtom } from "lib/state/searchboxLastInput";
export default function Search(props: { onFocus: () => void }) { export default function Search(props: { onFocus: () => void }) {
const { t } = useTranslation(); const { t } = useTranslation();
const settings = useAtomValue(settingsAtom); const settings = useAtomValue(settingsAtom);
const [query, setQuery] = useAtom(queryAtom); const [query, setQuery] = useAtom(queryAtom);
const [selectedSuggestion, setSelected] = useAtom(selectedSuggestionAtom); const [selectedSuggestion, setSelected] = useAtom(selectedSuggestionAtom);
const [_, setLastInput] = useAtom(searchboxLastInputAtom)
const suggestions = useAtomValue(suggestionAtom); const suggestions = useAtomValue(suggestionAtom);
const searchBoxRef = useRef<HTMLInputElement>(null); const searchBoxRef = useRef<HTMLInputElement>(null);
@ -27,21 +25,11 @@ export default function Search(props: { onFocus: () => void }) {
} else if (e.key == "ArrowUp") { } else if (e.key == "ArrowUp") {
e.preventDefault(); e.preventDefault();
const len = suggestions.length; const len = suggestions.length;
const lastSelectedIndex = (selectedSuggestion - 1 + len) % len; setSelected((selectedSuggestion - 1 + len) % len);
const lastSuggeston = suggestions[lastSelectedIndex];
setSelected(lastSelectedIndex);
if (["QUERY", "NAVIGATION", "default"].includes(lastSuggeston.type)) {
setQuery(lastSuggeston.suggestion);
}
} else if (e.key == "ArrowDown") { } else if (e.key == "ArrowDown") {
e.preventDefault(); e.preventDefault();
const len = suggestions.length; const len = suggestions.length;
const nextSelectedIndex = (selectedSuggestion + 1 + len) % len; setSelected((selectedSuggestion + 1) % len);
const nextSuggeston = suggestions[nextSelectedIndex];
setSelected(nextSelectedIndex);
if (["QUERY", "NAVIGATION", "default"].includes(nextSuggeston.type)) {
setQuery(nextSuggeston.suggestion);
}
} }
} }
@ -60,10 +48,11 @@ export default function Search(props: { onFocus: () => void }) {
placeholder={t("search.placeholder")} placeholder={t("search.placeholder")}
onFocus={props.onFocus} onFocus={props.onFocus}
onKeyDown={handleKeydown} onKeyDown={handleKeydown}
onChange={(e) => { onChange={(e) =>
setLastInput(new Date().getTime()); setQuery(() => {
setQuery(() => e.target.value); return e.target.value;
}} })
}
autoComplete="off" autoComplete="off"
autoCorrect="off" autoCorrect="off"
autoCapitalize="off" autoCapitalize="off"

View File

@ -1,13 +0,0 @@
function logsumexp(arr: number[]): number {
const maxVal = Math.max(...arr);
const sumExp = arr.reduce((sum, val) => sum + Math.exp(val - maxVal), 0);
return Math.log(sumExp) + maxVal;
}
function minusEnergyScore(logits: number[]): number {
return logsumexp(logits);
}
const energyScore = minusEnergyScore;
export default energyScore;

View File

@ -1,5 +0,0 @@
import { atom } from "jotai";
const searchboxLastInputAtom = atom(0);
export { searchboxLastInputAtom };

View File

@ -1,5 +1,5 @@
import TLDtxt from "./tlds.txt?raw"; import TLDtxt from "./tlds.txt?raw";
export function getTLD() { export function getTLD() {
return TLDtxt.split("\r\n").filter((line) => line[0] !== "#"); return TLDtxt.split("\n").filter((line) => line[0] !== "#");
} }

13
test/tokenize.test.ts Normal file
View File

@ -0,0 +1,13 @@
import { describe, expect, test } from "bun:test";
import tokenize from "../lib/nlp/tokenize/tokenizer";
describe("Test if tokenizer works", () => {
test("Using without a mirror", async () => {
let result = await tokenize("你好,世界!", "Qwen/Qwen2.5-3B", false);
expect(result).toEqual([108386, 3837, 99489, 6313]);
});
test("Using with a mirror", async () => {
let result = await tokenize("你好,世界!", "Qwen/Qwen2.5-3B", true);
expect(result).toEqual([108386, 3837, 99489, 6313]);
});
});