Files
2026-01-30 22:38:08 +01:00

41 lines
1.1 KiB
Python

from typing import Optional
from .client import EmbeddedClient
class Chunk:
def __init__(self, id: int, text: str):
self.id = id
self.text = text
self.size = len(text)
self.__embedding = None
def embed(self, client: EmbeddedClient) -> list[float]:
self.__embedding = client.embed(self.text)
return self.__embedding
@property
def embedding(self) -> Optional[list[float]]:
return self.__embedding
@property
def has_embedding(self) -> bool:
return self.__embedding is not None
class Chunks:
def __init__(self, size: int = 5000, overlap: int = 200) -> None:
self.size = size
self.overlap = overlap
self.__count = 0
def chunk(self, text: str) -> list[Chunk]:
chunks = []
i = 0
while i < len(text):
chunk_id = len(chunks) + 1
chunks.append(Chunk(id=chunk_id, text=text[i:i+self.size]))
i += self.size - self.overlap
self.__count = len(chunks)
return chunks
@property
def count(self) -> int:
return self.__count