fix: unable to properly segment raw TLD data

feat: searchbox's value changes while selecting suggestion
This commit is contained in:
alikia2x (寒寒) 2025-01-20 03:03:28 +08:00
parent de7c5990bb
commit f7de462a28
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
5 changed files with 28 additions and 23 deletions

View File

@ -23,6 +23,7 @@ import { loadVocab } from "lib/nlp/tokenize/loadVocab";
import BPETokenizer from "lib/nlp/tokenize/BPEtokenizer";
import energyScore from "lib/nlp/energyScore";
import bytesToUnicode from "lib/nlp/tokenize/bytesToUnicode";
import { searchboxLastInputAtom } from "lib/state/searchboxLastInput";
interface EmbeddingLayer {
[key: number]: Float32Array<ArrayBufferLike>;
@ -33,6 +34,7 @@ export default function OneSearch() {
const [embeddingLayer, setEmbeddingLayer] = useState<EmbeddingLayer | null>(null);
const [NLUsession, setNLUsession] = useState<ort.InferenceSession | null>(null);
const [tokenizer, setTokenizer] = useState<BPETokenizer | null>(null);
const lastInput = useAtomValue(searchboxLastInputAtom);
const lastRequestTimeRef = useRef(0);
const selected = useAtomValue(selectedSuggestionAtom);
const settings = useAtomValue(settingsAtom);
@ -69,7 +71,7 @@ export default function OneSearch() {
// Handle fetch error
sendError(error);
});
}, [query]);
}, [lastInput]);
function updateSuggestion(data: suggestionItem[]) {
setFinalSuggetsion((cur: suggestionItem[]) => {
@ -177,7 +179,7 @@ export default function OneSearch() {
}
console.log(data, energyScore(data));
})();
}, [query, engineName]);
}, [lastInput, engineName]);
return (
<SuggestionBox>

View File

@ -6,12 +6,14 @@ import { selectedSuggestionAtom } from "lib/state/suggestionSelection";
import handleEnter from "lib/onesearch/handleEnter";
import { suggestionAtom } from "lib/state/suggestion";
import { useTranslation } from "react-i18next";
import { searchboxLastInputAtom } from "lib/state/searchboxLastInput";
export default function Search(props: { onFocus: () => void }) {
const { t } = useTranslation();
const settings = useAtomValue(settingsAtom);
const [query, setQuery] = useAtom(queryAtom);
const [selectedSuggestion, setSelected] = useAtom(selectedSuggestionAtom);
const [_, setLastInput] = useAtom(searchboxLastInputAtom)
const suggestions = useAtomValue(suggestionAtom);
const searchBoxRef = useRef<HTMLInputElement>(null);
@ -25,11 +27,21 @@ export default function Search(props: { onFocus: () => void }) {
} else if (e.key == "ArrowUp") {
e.preventDefault();
const len = suggestions.length;
setSelected((selectedSuggestion - 1 + len) % len);
const lastSelectedIndex = (selectedSuggestion - 1 + len) % len;
const lastSuggeston = suggestions[lastSelectedIndex];
setSelected(lastSelectedIndex);
if (["QUERY", "NAVIGATION", "default"].includes(lastSuggeston.type)) {
setQuery(lastSuggeston.suggestion);
}
} else if (e.key == "ArrowDown") {
e.preventDefault();
const len = suggestions.length;
setSelected((selectedSuggestion + 1) % len);
const nextSelectedIndex = (selectedSuggestion + 1 + len) % len;
const nextSuggeston = suggestions[nextSelectedIndex];
setSelected(nextSelectedIndex);
if (["QUERY", "NAVIGATION", "default"].includes(nextSuggeston.type)) {
setQuery(nextSuggeston.suggestion);
}
}
}
@ -48,11 +60,10 @@ export default function Search(props: { onFocus: () => void }) {
placeholder={t("search.placeholder")}
onFocus={props.onFocus}
onKeyDown={handleKeydown}
onChange={(e) =>
setQuery(() => {
return e.target.value;
})
}
onChange={(e) => {
setLastInput(new Date().getTime());
setQuery(() => e.target.value);
}}
autoComplete="off"
autoCorrect="off"
autoCapitalize="off"

View File

@ -0,0 +1,5 @@
import { atom } from "jotai";
const searchboxLastInputAtom = atom(0);
export { searchboxLastInputAtom };

View File

@ -1,5 +1,5 @@
import TLDtxt from "./tlds.txt?raw";
export function getTLD() {
return TLDtxt.split("\n").filter((line) => line[0] !== "#");
return TLDtxt.split("\r\n").filter((line) => line[0] !== "#");
}

View File

@ -1,13 +0,0 @@
import { describe, expect, test } from "bun:test";
import tokenize from "../lib/nlp/tokenize/tokenizer";
describe("Test if tokenizer works", () => {
test("Using without a mirror", async () => {
let result = await tokenize("你好,世界!", "Qwen/Qwen2.5-3B", false);
expect(result).toEqual([108386, 3837, 99489, 6313]);
});
test("Using with a mirror", async () => {
let result = await tokenize("你好,世界!", "Qwen/Qwen2.5-3B", true);
expect(result).toEqual([108386, 3837, 99489, 6313]);
});
});