formatting and clean-up 🧹

This commit is contained in:
Koen van Eijk 2024-10-01 11:46:26 +02:00
parent 34b740e0df
commit 225a2750e3
8 changed files with 67 additions and 41 deletions

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ dist
build build
.pytest_cache .pytest_cache
.vscode .vscode
.aider*

View File

@ -137,9 +137,7 @@ def timeline():
def search(): def search():
q = request.args.get("q") q = request.args.get("q")
entries = get_all_entries() entries = get_all_entries()
embeddings = [ embeddings = [np.frombuffer(entry.embedding, dtype=np.float64) for entry in entries]
np.frombuffer(entry.embedding, dtype=np.float64) for entry in entries
]
query_embedding = get_embedding(q) query_embedding = get_embedding(q)
similarities = [cosine_similarity(query_embedding, emb) for emb in embeddings] similarities = [cosine_similarity(query_embedding, emb) for emb in embeddings]
indices = np.argsort(similarities)[::-1] indices = np.argsort(similarities)[::-1]

View File

@ -2,9 +2,7 @@ import os
import sys import sys
import argparse import argparse
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(description="OpenRecall")
description="OpenRecall"
)
parser.add_argument( parser.add_argument(
"--storage-path", "--storage-path",
@ -38,6 +36,7 @@ def get_appdata_folder(app_name="openrecall"):
os.makedirs(path) os.makedirs(path)
return path return path
if args.storage_path: if args.storage_path:
appdata_folder = args.storage_path appdata_folder = args.storage_path
screenshots_path = os.path.join(appdata_folder, "screenshots") screenshots_path = os.path.join(appdata_folder, "screenshots")

View File

@ -9,7 +9,11 @@ from openrecall.config import screenshots_path, args
from openrecall.database import insert_entry from openrecall.database import insert_entry
from openrecall.nlp import get_embedding from openrecall.nlp import get_embedding
from openrecall.ocr import extract_text_from_image from openrecall.ocr import extract_text_from_image
from openrecall.utils import get_active_app_name, get_active_window_title, is_user_active from openrecall.utils import (
get_active_app_name,
get_active_window_title,
is_user_active,
)
def mean_structured_similarity_index(img1, img2, L=255): def mean_structured_similarity_index(img1, img2, L=255):
@ -57,6 +61,7 @@ def take_screenshots(monitor=1):
def record_screenshots_thread(): def record_screenshots_thread():
# TODO: fix the error from huggingface tokenizers # TODO: fix the error from huggingface tokenizers
import os import os
os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["TOKENIZERS_PARALLELISM"] = "false"
last_screenshots = take_screenshots() last_screenshots = take_screenshots()

View File

@ -1,5 +1,6 @@
import sys import sys
def human_readable_time(timestamp): def human_readable_time(timestamp):
import datetime import datetime
@ -35,6 +36,7 @@ def get_active_app_name_osx():
except: except:
return "" return ""
def get_active_window_title_osx(): def get_active_window_title_osx():
from Quartz import ( from Quartz import (
CGWindowListCopyWindowInfo, CGWindowListCopyWindowInfo,
@ -80,11 +82,12 @@ def get_active_window_title_windows():
def get_active_app_name_linux(): def get_active_app_name_linux():
return '' return ""
def get_active_window_title_linux(): def get_active_window_title_linux():
return '' return ""
def get_active_app_name(): def get_active_app_name():
if sys.platform == "win32": if sys.platform == "win32":
@ -107,6 +110,7 @@ def get_active_window_title():
else: else:
raise NotImplementedError("This platform is not supported") raise NotImplementedError("This platform is not supported")
def is_user_active_osx(): def is_user_active_osx():
import subprocess import subprocess
@ -115,10 +119,10 @@ def is_user_active_osx():
output = subprocess.check_output(["ioreg", "-c", "IOHIDSystem"]).decode() output = subprocess.check_output(["ioreg", "-c", "IOHIDSystem"]).decode()
# Find the line containing "HIDIdleTime" # Find the line containing "HIDIdleTime"
for line in output.split('\n'): for line in output.split("\n"):
if "HIDIdleTime" in line: if "HIDIdleTime" in line:
# Extract the idle time value # Extract the idle time value
idle_time = int(line.split('=')[-1].strip()) idle_time = int(line.split("=")[-1].strip())
# Convert idle time from nanoseconds to seconds # Convert idle time from nanoseconds to seconds
idle_seconds = idle_time / 1000000000 idle_seconds = idle_time / 1000000000
@ -137,6 +141,7 @@ def is_user_active_osx():
# If there's any other error, assume the user is not idle # If there's any other error, assume the user is not idle
return True return True
def is_user_active(): def is_user_active():
if sys.platform == "win32": if sys.platform == "win32":
return True return True
@ -146,4 +151,3 @@ def is_user_active():
return True return True
else: else:
raise NotImplementedError("This platform is not supported") raise NotImplementedError("This platform is not supported")

View File

@ -17,12 +17,17 @@ install_requires = [
"shapely==2.0.4", "shapely==2.0.4",
"h5py==3.11.0", "h5py==3.11.0",
"rapidfuzz==3.9.3", "rapidfuzz==3.9.3",
"Pillow==10.3.0" "Pillow==10.3.0",
] ]
# Define OS-specific dependencies # Define OS-specific dependencies
extras_require = {"windows": ["pywin32", "psutil"], "macos": ["pyobjc==10.3"], "linux": [], extras_require = {
'python-doctr': ['python-doctr @ git+https://github.com/koenvaneijk/doctr.git@af711bc04eb8876a7189923fb51ec44481ee18cd'] "windows": ["pywin32", "psutil"],
"macos": ["pyobjc==10.3"],
"linux": [],
"python-doctr": [
"python-doctr @ git+https://github.com/koenvaneijk/doctr.git@af711bc04eb8876a7189923fb51ec44481ee18cd"
],
} }
# Determine the current OS # Determine the current OS

View File

@ -2,29 +2,35 @@ import pytest
from unittest import mock from unittest import mock
from openrecall.config import get_appdata_folder from openrecall.config import get_appdata_folder
def test_get_appdata_folder_windows(tmp_path): def test_get_appdata_folder_windows(tmp_path):
with mock.patch('sys.platform', 'win32'): with mock.patch("sys.platform", "win32"):
with mock.patch.dict('os.environ', {'APPDATA': str(tmp_path)}): with mock.patch.dict("os.environ", {"APPDATA": str(tmp_path)}):
expected_path = tmp_path / 'openrecall' expected_path = tmp_path / "openrecall"
assert get_appdata_folder() == str(expected_path) assert get_appdata_folder() == str(expected_path)
assert expected_path.exists() assert expected_path.exists()
def test_get_appdata_folder_windows_no_appdata(): def test_get_appdata_folder_windows_no_appdata():
with mock.patch('sys.platform', 'win32'): with mock.patch("sys.platform", "win32"):
with mock.patch.dict('os.environ', {}, clear=True): with mock.patch.dict("os.environ", {}, clear=True):
with pytest.raises(EnvironmentError, match="APPDATA environment variable is not set."): with pytest.raises(
EnvironmentError, match="APPDATA environment variable is not set."
):
get_appdata_folder() get_appdata_folder()
def test_get_appdata_folder_darwin(tmp_path): def test_get_appdata_folder_darwin(tmp_path):
with mock.patch('sys.platform', 'darwin'): with mock.patch("sys.platform", "darwin"):
with mock.patch('os.path.expanduser', return_value=str(tmp_path)): with mock.patch("os.path.expanduser", return_value=str(tmp_path)):
expected_path = tmp_path / 'Library' / 'Application Support' / 'openrecall' expected_path = tmp_path / "Library" / "Application Support" / "openrecall"
assert get_appdata_folder() == str(expected_path) assert get_appdata_folder() == str(expected_path)
assert expected_path.exists() assert expected_path.exists()
def test_get_appdata_folder_linux(tmp_path): def test_get_appdata_folder_linux(tmp_path):
with mock.patch('sys.platform', 'linux'): with mock.patch("sys.platform", "linux"):
with mock.patch('os.path.expanduser', return_value=str(tmp_path)): with mock.patch("os.path.expanduser", return_value=str(tmp_path)):
expected_path = tmp_path / '.local' / 'share' / 'openrecall' expected_path = tmp_path / ".local" / "share" / "openrecall"
assert get_appdata_folder() == str(expected_path) assert get_appdata_folder() == str(expected_path)
assert expected_path.exists() assert expected_path.exists()

View File

@ -2,34 +2,42 @@ import pytest
import numpy as np import numpy as np
from openrecall.nlp import cosine_similarity from openrecall.nlp import cosine_similarity
def test_cosine_similarity_identical_vectors(): def test_cosine_similarity_identical_vectors():
a = np.array([1, 0, 0]) a = np.array([1, 0, 0])
b = np.array([1, 0, 0]) b = np.array([1, 0, 0])
assert cosine_similarity(a, b) == 1.0 assert cosine_similarity(a, b) == 1.0
def test_cosine_similarity_orthogonal_vectors(): def test_cosine_similarity_orthogonal_vectors():
a = np.array([1, 0, 0]) a = np.array([1, 0, 0])
b = np.array([0, 1, 0]) b = np.array([0, 1, 0])
assert cosine_similarity(a, b) == 0.0 assert cosine_similarity(a, b) == 0.0
def test_cosine_similarity_opposite_vectors(): def test_cosine_similarity_opposite_vectors():
a = np.array([1, 0, 0]) a = np.array([1, 0, 0])
b = np.array([-1, 0, 0]) b = np.array([-1, 0, 0])
assert cosine_similarity(a, b) == -1.0 assert cosine_similarity(a, b) == -1.0
def test_cosine_similarity_non_unit_vectors(): def test_cosine_similarity_non_unit_vectors():
a = np.array([3, 0, 0]) a = np.array([3, 0, 0])
b = np.array([1, 0, 0]) b = np.array([1, 0, 0])
assert cosine_similarity(a, b) == 1.0 assert cosine_similarity(a, b) == 1.0
def test_cosine_similarity_arbitrary_vectors(): def test_cosine_similarity_arbitrary_vectors():
a = np.array([1, 2, 3]) a = np.array([1, 2, 3])
b = np.array([4, 5, 6]) b = np.array([4, 5, 6])
expected_similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) expected_similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
assert cosine_similarity(a, b) == pytest.approx(expected_similarity) assert cosine_similarity(a, b) == pytest.approx(expected_similarity)
def test_cosine_similarity_zero_vector(): def test_cosine_similarity_zero_vector():
a = np.array([0, 0, 0]) a = np.array([0, 0, 0])
b = np.array([1, 0, 0]) b = np.array([1, 0, 0])
result = cosine_similarity(a, b) result = cosine_similarity(a, b)
assert np.isnan(result), "Expected result to be NaN when one of the vectors is a zero vector" assert np.isnan(
result
), "Expected result to be NaN when one of the vectors is a zero vector"