Compare commits
No commits in common. "44c216c81b1baff599b975a967fd844dd7908b79" and "1acb0a7f1183173b439e7369c9c920f4f21cf059" have entirely different histories.
44c216c81b
...
1acb0a7f11
@ -21,9 +21,6 @@ import tokenize from "lib/nlp/tokenize/tokenizer";
|
||||
import { getEmbedding, getEmbeddingLayer } from "lib/nlp/getEmbedding";
|
||||
import { loadVocab } from "lib/nlp/tokenize/loadVocab";
|
||||
import BPETokenizer from "lib/nlp/tokenize/BPEtokenizer";
|
||||
import energyScore from "lib/nlp/energyScore";
|
||||
import bytesToUnicode from "lib/nlp/tokenize/bytesToUnicode";
|
||||
import { searchboxLastInputAtom } from "lib/state/searchboxLastInput";
|
||||
|
||||
interface EmbeddingLayer {
|
||||
[key: number]: Float32Array<ArrayBufferLike>;
|
||||
@ -34,7 +31,6 @@ export default function OneSearch() {
|
||||
const [embeddingLayer, setEmbeddingLayer] = useState<EmbeddingLayer | null>(null);
|
||||
const [NLUsession, setNLUsession] = useState<ort.InferenceSession | null>(null);
|
||||
const [tokenizer, setTokenizer] = useState<BPETokenizer | null>(null);
|
||||
const lastInput = useAtomValue(searchboxLastInputAtom);
|
||||
const lastRequestTimeRef = useRef(0);
|
||||
const selected = useAtomValue(selectedSuggestionAtom);
|
||||
const settings = useAtomValue(settingsAtom);
|
||||
@ -71,7 +67,7 @@ export default function OneSearch() {
|
||||
// Handle fetch error
|
||||
sendError(error);
|
||||
});
|
||||
}, [lastInput]);
|
||||
}, [query]);
|
||||
|
||||
function updateSuggestion(data: suggestionItem[]) {
|
||||
setFinalSuggetsion((cur: suggestionItem[]) => {
|
||||
@ -134,12 +130,16 @@ export default function OneSearch() {
|
||||
}
|
||||
|
||||
async function getNLUResult(query: string) {
|
||||
const start = new Date().getTime();
|
||||
if (embeddingLayer === null || NLUsession === null || tokenizer == null) return;
|
||||
const tokenIds = await tokenize(bytesToUnicode(query), tokenizer);
|
||||
const tokenIds = await tokenize(query, tokenizer);
|
||||
console.log(new Date().getTime() - start, "ms");
|
||||
const embeddings = getEmbedding(tokenIds, embeddingLayer, 64);
|
||||
const inputTensor = new ort.Tensor("float32", embeddings, [1, 64, 96]);
|
||||
const feeds = { input: inputTensor };
|
||||
console.log(new Date().getTime() - start, "ms");
|
||||
const results = await NLUsession.run(feeds);
|
||||
console.log(new Date().getTime() - start, "ms");
|
||||
return results;
|
||||
}
|
||||
|
||||
@ -171,15 +171,9 @@ export default function OneSearch() {
|
||||
|
||||
(async function () {
|
||||
const result = await getNLUResult(query);
|
||||
if (result === undefined) return;
|
||||
const rawData = result.output.data;
|
||||
const data: number[] = [];
|
||||
for (let i=0;i<rawData.length;i++){
|
||||
data.push(rawData[i] as number);
|
||||
}
|
||||
console.log(data, energyScore(data));
|
||||
console.log(result);
|
||||
})();
|
||||
}, [lastInput, engineName]);
|
||||
}, [query, engineName]);
|
||||
|
||||
return (
|
||||
<SuggestionBox>
|
||||
|
@ -6,14 +6,12 @@ import { selectedSuggestionAtom } from "lib/state/suggestionSelection";
|
||||
import handleEnter from "lib/onesearch/handleEnter";
|
||||
import { suggestionAtom } from "lib/state/suggestion";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { searchboxLastInputAtom } from "lib/state/searchboxLastInput";
|
||||
|
||||
export default function Search(props: { onFocus: () => void }) {
|
||||
const { t } = useTranslation();
|
||||
const settings = useAtomValue(settingsAtom);
|
||||
const [query, setQuery] = useAtom(queryAtom);
|
||||
const [selectedSuggestion, setSelected] = useAtom(selectedSuggestionAtom);
|
||||
const [_, setLastInput] = useAtom(searchboxLastInputAtom)
|
||||
const suggestions = useAtomValue(suggestionAtom);
|
||||
const searchBoxRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
@ -27,21 +25,11 @@ export default function Search(props: { onFocus: () => void }) {
|
||||
} else if (e.key == "ArrowUp") {
|
||||
e.preventDefault();
|
||||
const len = suggestions.length;
|
||||
const lastSelectedIndex = (selectedSuggestion - 1 + len) % len;
|
||||
const lastSuggeston = suggestions[lastSelectedIndex];
|
||||
setSelected(lastSelectedIndex);
|
||||
if (["QUERY", "NAVIGATION", "default"].includes(lastSuggeston.type)) {
|
||||
setQuery(lastSuggeston.suggestion);
|
||||
}
|
||||
setSelected((selectedSuggestion - 1 + len) % len);
|
||||
} else if (e.key == "ArrowDown") {
|
||||
e.preventDefault();
|
||||
const len = suggestions.length;
|
||||
const nextSelectedIndex = (selectedSuggestion + 1 + len) % len;
|
||||
const nextSuggeston = suggestions[nextSelectedIndex];
|
||||
setSelected(nextSelectedIndex);
|
||||
if (["QUERY", "NAVIGATION", "default"].includes(nextSuggeston.type)) {
|
||||
setQuery(nextSuggeston.suggestion);
|
||||
}
|
||||
setSelected((selectedSuggestion + 1) % len);
|
||||
}
|
||||
}
|
||||
|
||||
@ -60,10 +48,11 @@ export default function Search(props: { onFocus: () => void }) {
|
||||
placeholder={t("search.placeholder")}
|
||||
onFocus={props.onFocus}
|
||||
onKeyDown={handleKeydown}
|
||||
onChange={(e) => {
|
||||
setLastInput(new Date().getTime());
|
||||
setQuery(() => e.target.value);
|
||||
}}
|
||||
onChange={(e) =>
|
||||
setQuery(() => {
|
||||
return e.target.value;
|
||||
})
|
||||
}
|
||||
autoComplete="off"
|
||||
autoCorrect="off"
|
||||
autoCapitalize="off"
|
||||
|
@ -1,13 +0,0 @@
|
||||
function logsumexp(arr: number[]): number {
|
||||
const maxVal = Math.max(...arr);
|
||||
const sumExp = arr.reduce((sum, val) => sum + Math.exp(val - maxVal), 0);
|
||||
return Math.log(sumExp) + maxVal;
|
||||
}
|
||||
|
||||
function minusEnergyScore(logits: number[]): number {
|
||||
return logsumexp(logits);
|
||||
}
|
||||
|
||||
const energyScore = minusEnergyScore;
|
||||
|
||||
export default energyScore;
|
@ -1,5 +0,0 @@
|
||||
import { atom } from "jotai";
|
||||
|
||||
const searchboxLastInputAtom = atom(0);
|
||||
|
||||
export { searchboxLastInputAtom };
|
@ -1,5 +1,5 @@
|
||||
import TLDtxt from "./tlds.txt?raw";
|
||||
|
||||
export function getTLD() {
|
||||
return TLDtxt.split("\r\n").filter((line) => line[0] !== "#");
|
||||
return TLDtxt.split("\n").filter((line) => line[0] !== "#");
|
||||
}
|
||||
|
13
test/tokenize.test.ts
Normal file
13
test/tokenize.test.ts
Normal file
@ -0,0 +1,13 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import tokenize from "../lib/nlp/tokenize/tokenizer";
|
||||
|
||||
describe("Test if tokenizer works", () => {
|
||||
test("Using without a mirror", async () => {
|
||||
let result = await tokenize("你好,世界!", "Qwen/Qwen2.5-3B", false);
|
||||
expect(result).toEqual([108386, 3837, 99489, 6313]);
|
||||
});
|
||||
test("Using with a mirror", async () => {
|
||||
let result = await tokenize("你好,世界!", "Qwen/Qwen2.5-3B", true);
|
||||
expect(result).toEqual([108386, 3837, 99489, 6313]);
|
||||
});
|
||||
});
|
Loading…
Reference in New Issue
Block a user