在本地储存 lancedb 数据库,表名需要处理。
如果是有效表名,则直接使用,否则编码为base64字符串 (特化的base64字符串),特化的字符串以 b64u8 开头。
python
from lancedb.util import validate_table_name
# ----------------------------------------------------------------------------------------------------------------
_enc_prefix = '_b64u8_'
_encode_translation = bytes.maketrans(b'+/=', b'-_.')
_decode_translation = bytes.maketrans(b'-_.', b'+/=')
# 转译 原始字符串 到 编码后字符串。如果 validate_table_name 失败则转译
def _encode_name(name: str):
try:
validate_table_name(name)
return name
except ValueError:
pass
name = _enc_prefix + base64.b64encode(name.encode('u8')).translate(_encode_translation).decode('u8')
validate_table_name(name)
return name
# 转译 编码后字符串 到 原始字符串。根据前缀,决定是否转译
def _decode_name(name: str):
if name.startswith(_enc_prefix):
name = name.removeprefix(_enc_prefix)
name = base64.b64decode(name.encode('u8').translate(_decode_translation)).decode('u8')
return name
def encode_names(names: str|list[str]):
if isinstance(names, str):
return _encode_name(names)
else:
assert isinstance(names, (tuple, list))
return [_encode_name(n) for n in names]
def decode_names(names: str|list[str]):
if isinstance(names, str):
return _decode_name(names)
else:
assert isinstance(names, (tuple, list))
return [_decode_name(n) for n in names]
def dt_to_lance_ts_str(t: dt.datetime):
# 转换为lancedb可用的时间字符串
t_str = t.strftime("%Y-%m-%d %H:%M:%S.%f")
t_str2 = f"to_timestamp('{t_str}')"
return t_str2
# ----------------------------------------------------------------------------------------------------------------