三维点云数据的哈希快速查找方法
对于三维空间中的浮点坐标点云数据,实现快速查找需要考虑浮点数的精度问题以及空间分布特性。以下是几种有效的哈希查找方法:
1. 网格哈希 (Grid Hashing)
原理:将三维空间划分为均匀的网格,每个点根据坐标分配到对应的网格单元。
python
import math
class GridHash:
def __init__(self, cell_size=0.1):
self.cell_size = cell_size
self.hash_table = {}
def _get_cell_key(self, point):
x, y, z = point
return (
math.floor(x / self.cell_size),
math.floor(y / self.cell_size),
math.floor(z / self.cell_size)
)
def insert(self, point):
key = self._get_cell_key(point)
if key not in self.hash_table:
self.hash_table[key] = []
self.hash_table[key].append(point)
def query(self, point):
key = self._get_cell_key(point)
return self.hash_table.get(key, [])
2. 空间填充曲线哈希 (Z-order/Morton Code)
原理:使用空间填充曲线将三维坐标映射为一维哈希值。
python
def interleave_bits(x, y, z):
# 将三个坐标的二进制位交错排列
def spread_bits(n):
n = (n | (n << 16)) & 0x030000FF
n = (n | (n << 8)) & 0x0300F00F
n = (n | (n << 4)) & 0x030C30C3
n = (n | (n << 2)) & 0x09249249
return n
return spread_bits(x) | (spread_bits(y) << 1) | (spread_bits(z) << 2)
class MortonHash:
def __init__(self, precision=1e-6):
self.precision = precision
self.hash_table = {}
def _quantize(self, coord):
return int(coord / self.precision)
def insert(self, point):
x, y, z = [self._quantize(c) for c in point]
key = interleave_bits(x, y, z)
if key not in self.hash_table:
self.hash_table[key] = []
self.hash_table[key].append(point)
def query(self, point):
x, y, z = [self._quantize(c) for c in point]
key = interleave_bits(x, y, z)
return self.hash_table.get(key, [])
3. 近似最近邻哈希 (Locality Sensitive Hashing, LSH)
原理:使用随机投影将相近的点映射到相同哈希桶的概率更高。
python
import numpy as np
class LSHHash:
def __init__(self, dim=3, num_tables=5, hash_size=10, w=4.0):
self.num_tables = num_tables
self.hash_size = hash_size
self.w = w
# 生成随机投影向量
self.hash_tables = []
for _ in range(num_tables):
table = {
'projection': np.random.randn(dim, hash_size),
'bias': np.random.uniform(0, w, size=hash_size),
'table': {}
}
self.hash_tables.append(table)
def _hash(self, point, table):
projections = np.dot(point, table['projection']) + table['bias']
hash_key = tuple(np.floor(projections / self.w).astype(int))
return hash_key
def insert(self, point):
point = np.array(point)
for table in self.hash_tables:
key = self._hash(point, table)
if key not in table['table']:
table['table'][key] = []
table['table'][key].append(point)
def query(self, point):
point = np.array(point)
results = set()
for table in self.hash_tables:
key = self._hash(point, table)
if key in table['table']:
for p in table['table'][key]:
results.add(tuple(p))
return list(results)
4. 八叉树 (Octree) 索引
虽然不是严格的哈希表,但八叉树也能提供快速的空间查找:
python
class OctreeNode:
def __init__(self, center, size, capacity=10, max_depth=5):
self.center = center
self.size = size
self.capacity = capacity
self.max_depth = max_depth
self.points = []
self.children = None
def insert(self, point, depth=0):
if self.children is not None:
index = self._get_child_index(point)
self.children[index].insert(point, depth+1)
else:
self.points.append(point)
if len(self.points) > self.capacity and depth < self.max_depth:
self._split()
def _get_child_index(self, point):
x, y, z = point
cx, cy, cz = self.center
index = 0
if x > cx: index |= 1
if y > cy: index |= 2
if z > cz: index |= 4
return index
def _split(self):
child_size = self.size / 2
self.children = []
for i in range(8):
offset = [
child_size if (i & 1) else -child_size,
child_size if (i & 2) else -child_size,
child_size if (i & 4) else -child_size
]
child_center = [self.center[j] + offset[j] for j in range(3)]
self.children.append(OctreeNode(child_center, child_size,
self.capacity, self.max_depth))
for point in self.points:
index = self._get_child_index(point)
self.children[index].insert(point)
self.points = []
def query(self, point, radius):
results = []
if self.children is not None:
index = self._get_child_index(point)
results.extend(self.children[index].query(point, radius))
# 检查相邻节点
for i in range(8):
if i != index and self._check_overlap(i, point, radius):
results.extend(self.children[i].query(point, radius))
else:
for p in self.points:
if np.linalg.norm(np.array(p) - np.array(point)) <= radius:
results.append(p)
return results
选择建议
- 网格哈希:最简单直接,适合均匀分布的点云
- Morton哈希:对空间局部性有更好保留,适合范围查询
- LSH:适合高维数据和近似最近邻搜索
- 八叉树:适合非均匀分布的点云,动态更新效率高
对于浮点坐标,关键是要处理好精度问题,通常需要将浮点数量化到适当的精度后再进行哈希。