三维点云数据的哈希快速查找方法

对于三维空间中的浮点坐标点云数据，实现快速查找需要考虑浮点数的精度问题以及空间分布特性。以下是几种有效的哈希查找方法：

1. 网格哈希 (Grid Hashing)

原理：将三维空间划分为均匀的网格，每个点根据坐标分配到对应的网格单元。

python 复制代码

import math

class GridHash:
    def __init__(self, cell_size=0.1):
        self.cell_size = cell_size
        self.hash_table = {}
    
    def _get_cell_key(self, point):
        x, y, z = point
        return (
            math.floor(x / self.cell_size),
            math.floor(y / self.cell_size),
            math.floor(z / self.cell_size)
        )
    
    def insert(self, point):
        key = self._get_cell_key(point)
        if key not in self.hash_table:
            self.hash_table[key] = []
        self.hash_table[key].append(point)
    
    def query(self, point):
        key = self._get_cell_key(point)
        return self.hash_table.get(key, [])

2. 空间填充曲线哈希 (Z-order/Morton Code)

原理：使用空间填充曲线将三维坐标映射为一维哈希值。

python 复制代码

def interleave_bits(x, y, z):
    # 将三个坐标的二进制位交错排列
    def spread_bits(n):
        n = (n | (n << 16)) & 0x030000FF
        n = (n | (n << 8)) & 0x0300F00F
        n = (n | (n << 4)) & 0x030C30C3
        n = (n | (n << 2)) & 0x09249249
        return n
    return spread_bits(x) | (spread_bits(y) << 1) | (spread_bits(z) << 2)

class MortonHash:
    def __init__(self, precision=1e-6):
        self.precision = precision
        self.hash_table = {}
    
    def _quantize(self, coord):
        return int(coord / self.precision)
    
    def insert(self, point):
        x, y, z = [self._quantize(c) for c in point]
        key = interleave_bits(x, y, z)
        if key not in self.hash_table:
            self.hash_table[key] = []
        self.hash_table[key].append(point)
    
    def query(self, point):
        x, y, z = [self._quantize(c) for c in point]
        key = interleave_bits(x, y, z)
        return self.hash_table.get(key, [])

3. 近似最近邻哈希 (Locality Sensitive Hashing, LSH)

原理：使用随机投影将相近的点映射到相同哈希桶的概率更高。

python 复制代码

import numpy as np

class LSHHash:
    def __init__(self, dim=3, num_tables=5, hash_size=10, w=4.0):
        self.num_tables = num_tables
        self.hash_size = hash_size
        self.w = w
        
        # 生成随机投影向量
        self.hash_tables = []
        for _ in range(num_tables):
            table = {
                'projection': np.random.randn(dim, hash_size),
                'bias': np.random.uniform(0, w, size=hash_size),
                'table': {}
            }
            self.hash_tables.append(table)
    
    def _hash(self, point, table):
        projections = np.dot(point, table['projection']) + table['bias']
        hash_key = tuple(np.floor(projections / self.w).astype(int))
        return hash_key
    
    def insert(self, point):
        point = np.array(point)
        for table in self.hash_tables:
            key = self._hash(point, table)
            if key not in table['table']:
                table['table'][key] = []
            table['table'][key].append(point)
    
    def query(self, point):
        point = np.array(point)
        results = set()
        for table in self.hash_tables:
            key = self._hash(point, table)
            if key in table['table']:
                for p in table['table'][key]:
                    results.add(tuple(p))
        return list(results)

4. 八叉树 (Octree) 索引

虽然不是严格的哈希表，但八叉树也能提供快速的空间查找：

python 复制代码

class OctreeNode:
    def __init__(self, center, size, capacity=10, max_depth=5):
        self.center = center
        self.size = size
        self.capacity = capacity
        self.max_depth = max_depth
        self.points = []
        self.children = None
    
    def insert(self, point, depth=0):
        if self.children is not None:
            index = self._get_child_index(point)
            self.children[index].insert(point, depth+1)
        else:
            self.points.append(point)
            if len(self.points) > self.capacity and depth < self.max_depth:
                self._split()
    
    def _get_child_index(self, point):
        x, y, z = point
        cx, cy, cz = self.center
        index = 0
        if x > cx: index |= 1
        if y > cy: index |= 2
        if z > cz: index |= 4
        return index
    
    def _split(self):
        child_size = self.size / 2
        self.children = []
        for i in range(8):
            offset = [
                child_size if (i & 1) else -child_size,
                child_size if (i & 2) else -child_size,
                child_size if (i & 4) else -child_size
            ]
            child_center = [self.center[j] + offset[j] for j in range(3)]
            self.children.append(OctreeNode(child_center, child_size, 
                                         self.capacity, self.max_depth))
        
        for point in self.points:
            index = self._get_child_index(point)
            self.children[index].insert(point)
        self.points = []
    
    def query(self, point, radius):
        results = []
        if self.children is not None:
            index = self._get_child_index(point)
            results.extend(self.children[index].query(point, radius))
            # 检查相邻节点
            for i in range(8):
                if i != index and self._check_overlap(i, point, radius):
                    results.extend(self.children[i].query(point, radius))
        else:
            for p in self.points:
                if np.linalg.norm(np.array(p) - np.array(point)) <= radius:
                    results.append(p)
        return results

选择建议

网格哈希：最简单直接，适合均匀分布的点云
Morton哈希：对空间局部性有更好保留，适合范围查询
LSH：适合高维数据和近似最近邻搜索
八叉树：适合非均匀分布的点云，动态更新效率高

对于浮点坐标，关键是要处理好精度问题，通常需要将浮点数量化到适当的精度后再进行哈希。