Compare commits

..

No commits in common. "44c216c81b1baff599b975a967fd844dd7908b79" and "1acb0a7f1183173b439e7369c9c920f4f21cf059" have entirely different histories.

6 changed files with 29 additions and 51 deletions

View File

@ -21,9 +21,6 @@ import tokenize from "lib/nlp/tokenize/tokenizer";
import { getEmbedding, getEmbeddingLayer } from "lib/nlp/getEmbedding";
import { loadVocab } from "lib/nlp/tokenize/loadVocab";
import BPETokenizer from "lib/nlp/tokenize/BPEtokenizer";
import energyScore from "lib/nlp/energyScore";
import bytesToUnicode from "lib/nlp/tokenize/bytesToUnicode";
import { searchboxLastInputAtom } from "lib/state/searchboxLastInput";
interface EmbeddingLayer {
[key: number]: Float32Array<ArrayBufferLike>;
@ -34,7 +31,6 @@ export default function OneSearch() {
const [embeddingLayer, setEmbeddingLayer] = useState<EmbeddingLayer | null>(null);
const [NLUsession, setNLUsession] = useState<ort.InferenceSession | null>(null);
const [tokenizer, setTokenizer] = useState<BPETokenizer | null>(null);
const lastInput = useAtomValue(searchboxLastInputAtom);
const lastRequestTimeRef = useRef(0);
const selected = useAtomValue(selectedSuggestionAtom);
const settings = useAtomValue(settingsAtom);
@ -71,7 +67,7 @@ export default function OneSearch() {
// Handle fetch error
sendError(error);
});
}, [lastInput]);
}, [query]);
function updateSuggestion(data: suggestionItem[]) {
setFinalSuggetsion((cur: suggestionItem[]) => {
@ -134,12 +130,16 @@ export default function OneSearch() {
}
async function getNLUResult(query: string) {
const start = new Date().getTime();
if (embeddingLayer === null || NLUsession === null || tokenizer == null) return;
const tokenIds = await tokenize(bytesToUnicode(query), tokenizer);
const tokenIds = await tokenize(query, tokenizer);
console.log(new Date().getTime() - start, "ms");
const embeddings = getEmbedding(tokenIds, embeddingLayer, 64);
const inputTensor = new ort.Tensor("float32", embeddings, [1, 64, 96]);
const feeds = { input: inputTensor };
console.log(new Date().getTime() - start, "ms");
const results = await NLUsession.run(feeds);
console.log(new Date().getTime() - start, "ms");
return results;
}
@ -171,15 +171,9 @@ export default function OneSearch() {
(async function () {
const result = await getNLUResult(query);
if (result === undefined) return;
const rawData = result.output.data;
const data: number[] = [];
for (let i=0;i<rawData.length;i++){
data.push(rawData[i] as number);
}
console.log(data, energyScore(data));
console.log(result);
})();
}, [lastInput, engineName]);
}, [query, engineName]);
return (
<SuggestionBox>

View File

@ -6,14 +6,12 @@ import { selectedSuggestionAtom } from "lib/state/suggestionSelection";
import handleEnter from "lib/onesearch/handleEnter";
import { suggestionAtom } from "lib/state/suggestion";
import { useTranslation } from "react-i18next";
import { searchboxLastInputAtom } from "lib/state/searchboxLastInput";
export default function Search(props: { onFocus: () => void }) {
const { t } = useTranslation();
const settings = useAtomValue(settingsAtom);
const [query, setQuery] = useAtom(queryAtom);
const [selectedSuggestion, setSelected] = useAtom(selectedSuggestionAtom);
const [_, setLastInput] = useAtom(searchboxLastInputAtom)
const suggestions = useAtomValue(suggestionAtom);
const searchBoxRef = useRef<HTMLInputElement>(null);
@ -27,21 +25,11 @@ export default function Search(props: { onFocus: () => void }) {
} else if (e.key == "ArrowUp") {
e.preventDefault();
const len = suggestions.length;
const lastSelectedIndex = (selectedSuggestion - 1 + len) % len;
const lastSuggeston = suggestions[lastSelectedIndex];
setSelected(lastSelectedIndex);
if (["QUERY", "NAVIGATION", "default"].includes(lastSuggeston.type)) {
setQuery(lastSuggeston.suggestion);
}
setSelected((selectedSuggestion - 1 + len) % len);
} else if (e.key == "ArrowDown") {
e.preventDefault();
const len = suggestions.length;
const nextSelectedIndex = (selectedSuggestion + 1 + len) % len;
const nextSuggeston = suggestions[nextSelectedIndex];
setSelected(nextSelectedIndex);
if (["QUERY", "NAVIGATION", "default"].includes(nextSuggeston.type)) {
setQuery(nextSuggeston.suggestion);
}
setSelected((selectedSuggestion + 1) % len);
}
}
@ -60,10 +48,11 @@ export default function Search(props: { onFocus: () => void }) {
placeholder={t("search.placeholder")}
onFocus={props.onFocus}
onKeyDown={handleKeydown}
onChange={(e) => {
setLastInput(new Date().getTime());
setQuery(() => e.target.value);
}}
onChange={(e) =>
setQuery(() => {
return e.target.value;
})
}
autoComplete="off"
autoCorrect="off"
autoCapitalize="off"

View File

@ -1,13 +0,0 @@
function logsumexp(arr: number[]): number {
const maxVal = Math.max(...arr);
const sumExp = arr.reduce((sum, val) => sum + Math.exp(val - maxVal), 0);
return Math.log(sumExp) + maxVal;
}
function minusEnergyScore(logits: number[]): number {
return logsumexp(logits);
}
const energyScore = minusEnergyScore;
export default energyScore;

View File

@ -1,5 +0,0 @@
import { atom } from "jotai";
const searchboxLastInputAtom = atom(0);
export { searchboxLastInputAtom };

View File

@ -1,5 +1,5 @@
import TLDtxt from "./tlds.txt?raw";
export function getTLD() {
return TLDtxt.split("\r\n").filter((line) => line[0] !== "#");
return TLDtxt.split("\n").filter((line) => line[0] !== "#");
}

13
test/tokenize.test.ts Normal file
View File

@ -0,0 +1,13 @@
import { describe, expect, test } from "bun:test";
import tokenize from "../lib/nlp/tokenize/tokenizer";
describe("Test if tokenizer works", () => {
test("Using without a mirror", async () => {
let result = await tokenize("你好,世界!", "Qwen/Qwen2.5-3B", false);
expect(result).toEqual([108386, 3837, 99489, 6313]);
});
test("Using with a mirror", async () => {
let result = await tokenize("你好,世界!", "Qwen/Qwen2.5-3B", true);
expect(result).toEqual([108386, 3837, 99489, 6313]);
});
});