temp: NLP base class

This commit is contained in:
Alikia2x 2024-06-17 03:11:17 +08:00
parent f85162622d
commit e7f6f69dfe
3 changed files with 24 additions and 13 deletions

19
lib/nlp/base.ts Normal file
View File

@ -0,0 +1,19 @@
import { NLPResult } from "../onesearch/NLPResult";
import { stopwords } from "./stopwords";
class NLP {
result: NLPResult;
constructor(
public query: String,
public task: String
) {
this.result = new NLPResult();
}
public removeStopwords(str: string, extraStopwords: string[] = [], disableDefault: boolean = false){
const list = disableDefault ? extraStopwords : stopwords.concat(extraStopwords);
for (let word of list){
str = str.replace(new RegExp(`\\b${word}\\b`, 'gi'), '');
}
return str;
}
}

View File

@ -1,9 +1 @@
const stopwords = ["a","about","above","after","again","against","all","am","an","and","any","are","aren't","as","at","be","because","been","before","being","below","between","both","but","by","can't","cannot","could","couldn't","did","didn't","do","does","doesn't","doing","don't","down","during","each","few","for","from","further","had","hadn't","has","hasn't","have","haven't","having","he","he'd","he'll","he's","her","here","here's","hers","herself","him","himself","his","how","how's","i","i'd","i'll","i'm","i've","if","in","into","is","isn't","it","it's","its","itself","let's","me","more","most","mustn't","my","myself","no","nor","not","of","off","on","once","only","or","other","ought","our","ours ourselves","out","over","own","same","shan't","she","she'd","she'll","she's","should","shouldn't","so","some","such","than","that","that's","the","their","theirs","them","themselves","then","there","there's","these","they","they'd","they'll","they're","they've","this","those","through","to","too","under","until","up","very","was","wasn't","we","we'd","we'll","we're","we've","were","weren't","what","what's","when","when's","where","where's","which","while","who","who's","whom","why","why's","with","won't","would","wouldn't","you","you'd","you'll","you're","you've","your","yours","yourself","yourselves"]; export const stopwords = ["a","about","above","after","again","against","all","am","an","and","any","are","aren't","as","at","be","because","been","before","being","below","between","both","but","by","can't","cannot","could","couldn't","did","didn't","do","does","doesn't","doing","don't","down","during","each","few","for","from","further","had","hadn't","has","hasn't","have","haven't","having","he","he'd","he'll","he's","her","here","here's","hers","herself","him","himself","his","how","how's","i","i'd","i'll","i'm","i've","if","in","into","is","isn't","it","it's","its","itself","let's","me","more","most","mustn't","my","myself","no","nor","not","of","off","on","once","only","or","other","ought","our","ours ourselves","out","over","own","same","shan't","she","she'd","she'll","she's","should","shouldn't","so","some","such","than","that","that's","the","their","theirs","them","themselves","then","there","there's","these","they","they'd","they'll","they're","they've","this","those","through","to","too","under","until","up","very","was","wasn't","we","we'd","we'll","we're","we've","were","weren't","what","what's","when","when's","where","where's","which","while","who","who's","whom","why","why's","with","won't","would","wouldn't","you","you'd","you'll","you're","you've","your","yours","yourself","yourselves"];
export default function removeStopwords(str: string, extraStopwords: string[] = [], disableDefault: boolean = false){
const list = disableDefault ? extraStopwords : stopwords.concat(extraStopwords);
for (let word of list){
str = str.replace(new RegExp(`\\b${word}\\b`, 'gi'), '');
}
return str;
}

View File

@ -1,9 +1,9 @@
export class NLPResult { export class NLPResult {
constructor( constructor(
public suggestion: string | null, public suggestion: string | null = null,
public intention: string | null, public intention: string | null = null,
public probability: number, public probability: number = 0,
public confidence: number, public confidence: number = 0,
public relevanceBase: number = 2000, public relevanceBase: number = 2000,
public confidenceWeight: number = 0.2, public confidenceWeight: number = 0.2,
public type: string = "text", public type: string = "text",