feat(vector): Automatic indexing of documents in s3 storage
This commit is contained in:
56
store/s3.py
Normal file
56
store/s3.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from typing import Iterable, Callable
|
||||
from minio import Minio
|
||||
from minio.commonconfig import Tags
|
||||
from minio.datatypes import Object
|
||||
|
||||
class S3Storage:
|
||||
def __init__(self, endpoint: str, access_key: str, secret_key: str, bucket: str) -> None:
|
||||
self.__endpoint = endpoint
|
||||
self.__access_key = access_key
|
||||
self.__secret_key = secret_key
|
||||
self.__bucket_name = bucket
|
||||
self.client = Minio(
|
||||
self.__endpoint,
|
||||
access_key=self.__access_key,
|
||||
secret_key=self.__secret_key,
|
||||
secure=False
|
||||
)
|
||||
|
||||
def get(self, object_name: str) -> str:
|
||||
response = self.client.get_object(bucket_name=self.__bucket_name, object_name=object_name)
|
||||
raw = response.read()
|
||||
response.close()
|
||||
response.release_conn()
|
||||
return raw.decode("utf-8", errors="ignore")
|
||||
|
||||
def list(self, recursive: bool = True) -> Iterable[Object]:
|
||||
objects = self.client.list_objects(bucket_name=self.__bucket_name, recursive=recursive)
|
||||
return objects
|
||||
|
||||
def list_filtered(self, filter_func: Callable[[Object], bool], recursive: bool = True) -> Iterable[Object]:
|
||||
return list(filter(filter_func, self.list(recursive=recursive)))
|
||||
|
||||
def count(self, recursive: bool = True) -> int:
|
||||
return sum(1 for _ in self.list(recursive=recursive))
|
||||
|
||||
def set_tag(self, object_name: str, tags: dict[str, str]) -> None:
|
||||
new_tag = Tags.new_object_tags()
|
||||
for key, value in tags.items():
|
||||
new_tag[key] = value
|
||||
self.client.set_object_tags(bucket_name=self.__bucket_name, object_name=object_name, tags=new_tag)
|
||||
|
||||
def get_tag(self, object_name: str) -> dict[str, str]:
|
||||
tags = self.client.get_object_tags(bucket_name=self.__bucket_name, object_name=object_name)
|
||||
if tags is None:
|
||||
return {}
|
||||
return {key: value for key, value in tags.items()}
|
||||
|
||||
def check_tag(self, object_name: str, key: str, value: str) -> bool:
|
||||
tags = self.get_tag(object_name)
|
||||
return tags.get(key) == value
|
||||
|
||||
def delete_tag(self, object_name: str) -> None:
|
||||
self.client.delete_object_tags(bucket_name=self.__bucket_name, object_name=object_name)
|
||||
|
||||
def delete_object(self, object_name: str) -> None:
|
||||
self.client.remove_object(bucket_name=self.__bucket_name, object_name=object_name)
|
||||
Reference in New Issue
Block a user