java udf 实现经纬度匹配pg数据库public.geometry地理位置

背景:

数仓中有经纬度数据想要匹配城市编码,但是相关数据在gp数据库并且存储在

pg数据库的public.geometry类型字段中,无法直接导入到dataworks直接使用

现做数据清洗将数据进行匹配 ,

使用 H3和空间网格两种方式实现

1 将pg数据库中的geometry转成二进制的字符串

复制代码
select
code,
name,full_name,
encode(ST_AsBinary(geom), 'base64') AS geom_base64
FROM
public.base_geography

2.数据入仓

复制代码
insert overwrite table dwd.test
select '230307001','麻山街道','黑龙江省鸡西市麻山区麻山街道',
"AQYAAAABAAAAAQMAAAABAAAAbgAAAN1c/G1PUGBAOiF00CWcRkAGTODWXVBgQHbfMTz2m0ZAD52e
d2NQYECKyRtg5ptGQBnJHqFmUGBACrsoeuCbRkAYldQJaFBgQDMxXYjVm0ZAGVbxRmZQYEAomgew
yJtGQCQp6WFoUGBAEolCy7qbRkAXoG01a1BgQIF38umxm0ZAOQ68Wm5QYEAr3zMSoZtGQDblCu9y
UGBAM0+uKZCbRkBWRE30eVBgQPeuQV96m0ZAU82spYBQYED2DOGYZZtGQGfXvRWJUGBAj3HFxVGb
RkB15bM8j1BgQKjknNhDm0ZAdPBMaJJQYEBmu0IfLJtGQIjVH2GYUGBA8wNXeQKbRkCxxAPKplBg
QPVIg9vamkZA4JwRpb1QYEAIkQw5tppGQADICRPGUGBAAOFDiZaaRkDzy2CMyFBgQIyGjEepmkZA
8rG7QMlQYEBAaD18mZpGQN7M6EfDUGBAY9AJoYOaRkDBi76CtFBgQLEWnwJgmkZAyk+qfbpQYECk
+s4vSppGQN5Zu+3CUGBA0Jfe/lyaRkDoa5bLxlBgQCcwndZtmkZA8gpET8pQYEBoPBHEeZpGQPsC
euHOUGBAdNNmnIaaRkD52ch101BgQCBig4WTmkZAGcbdINpQYECr6uV3mppGQGpq2VrfUGBAqI/A
H36aRkD3zf3V41BgQG06ArhZmkZAbeS6KeVQYECMahFRTJpGQExUbw3sUGBASN+kaVCaRkBaLhud
81BgQNkkP+JXmkZA2XvxRftQYECkNJvHYZpGQARVo1cDUWBAOGdEaW+aRkCVgJiEC1FgQNJWJZF9
mkZAN+M0RBVRYEBwz/OnjZpGQMo329wYUWBADcNHxJSaRkBPdjOjH1FgQLYwC+2cmkZA521sdiRR
YEAU0a+tn5pGQG6hKxEoUWBADhR4J5+aRkAQroBCPVFgQIy61t6nmkZARKSmXUxRYEDxnC0gtJpG
QML8FTJXUWBAGt8Xl6qaRkB+Ab1wZ1FgQAfvq3KhmkZAufqxSX5RYEA17s1vmJpGQNv4E5WNUWBA
WcSww5iaRkBhcTjzq1FgQPoMqDejmkZAkX2QZcFRYEAlWYejq5pGQEFHq1rSUWBAbw9CQL6aRkAE
5Euo4FFgQP8gkiHHmkZA/pyC/OxRYEA7NgLxuppGQBU7Gof6UWBARdrGn6iaRkBUxVT6CVJgQP/m
xYmvmkZA6UXtfhVSYEAiOC7jpppGQADkhAkjUmBAIoleRrGaRkBgqwSLQ1JgQIkI/yJomkZA5Ga4
AR9SYEApsACmDJpGQAwepn3zUWBAs1w2OueZRkCg/rPmx1FgQMDQI0bPmUZAIsK/CJpRYEAGD9O+
uZlGQBzO/GqOUWBApTDvcaaZRkB/Tdaoh1FgQEAxsmSOmUZA9gfKbXtRYEC4BOCfUplGQGqHvyZr
UWBAumbyzTaZRkBM++b+alFgQGCt2jUhmUZAO8Q/bGlRYEDmllZD4phGQDY7Un1nUWBAj1a1pKOY
RkA8+IkDaFFgQO52vTRFmEZA4UIewQ1RYEAVqpuLv5dGQIdT5uabUGBAlIYahSSXRkCZ1qaxPVBg
QBMNUvAUlkZAiJy+ni9QYEBQcodNZJZGQKeTbHU5UGBA34eDhCiXRkB1sWmlkFBgQAspP6n2l0ZA
ahK8IY1QYEALz0vFxphGQNVcbjBUUGBAQ+bKoNqYRkAHmPkO/k9gQDpY/+cwmUZAkQ2ki81PYEAc
l3FTA5lGQOmBj8GKT2BAuECC4seYRkBTspyEUk9gQEewcf27mEZALjpZaj1PYEDiHeBJC5lGQPq2
YKkuT2BAxSCwcmiZRkDoFORnI09gQPD9DdqrmUZA1ArT9xpPYED61LFK6ZlGQMEAwocST2BANDDy
siaaRkCiCRSxCE9gQNUmTu53mkZAiX5t/fROYEDxhF5/EptGQB2vQPQkT2BAd4NorWibRkDFVPoJ
Z09gQMmvH2KDm0ZAEwoRcIhPYECvmXyzzZtGQIEExY+xT2BABI9v7xqcRkCdK0oJwU9gQObN4Vrt
m0ZATUusjMZPYEBJZYo5CJxGQKxwy0fST2BAl8eakUGcRkD7c9GQ8U9gQG8RGOsbnEZALo81IwNQ
YEBp5V5gVpxGQERpb/AFUGBA2NKjqZ6cRkBPIsK/CFBgQJDaxMn9nEZAVG8NbBVQYEByGMxfIZ1G
QD9wlScQUGBAdT+nID+dRkBc5QmEHVBgQJ6Xio15nUZAfR8OEiJQYECBeF2/YJ1GQI5zm3AvUGBA
z77yID2dRkCXkA96NlBgQJlKP+HsnEZAtqFinD9QYEBsy4CzlJxGQN1c/G1PUGBAOiF00CWcRkDd
XPxtT1BgQDohdNAlnEZA"

3.udf编写

方式一:空间网络格式

PolygonToRowGrids 网格生成主键 udtf

复制代码
import com.aliyun.odps.udf.ExecutionContext;
import com.aliyun.odps.udf.UDFException;
import com.aliyun.odps.udf.UDTF;
import com.aliyun.odps.udf.annotation.Resolve;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.io.WKBReader;
import org.apache.commons.codec.binary.Base64;
import com.aliyun.odps.data.Binary;


@Resolve("string,string -> string,string,binary,double,double,double,double")
public class PolygonToRowGrids extends UDTF {

    private WKBReader wkbReader = new WKBReader();
    private double gridSize = 0.1; // 可调整

    @Override
    public void setup(ExecutionContext ctx) {}

    @Override
    public void process(Object[] args) throws UDFException {
        String geomBase64 = (String) args[0];
        String spatialId = (String) args[1];
        if (geomBase64 == null || spatialId == null) return;

        try {
            byte[] wkbBytes = Base64.decodeBase64(geomBase64);
            Geometry geom = wkbReader.read(wkbBytes);

            double minX = geom.getEnvelopeInternal().getMinX();
            double maxX = geom.getEnvelopeInternal().getMaxX();
            double minY = geom.getEnvelopeInternal().getMinY();
            double maxY = geom.getEnvelopeInternal().getMaxY();

            int minCol = (int) Math.floor((minX + 180) / gridSize);
            int maxCol = (int) Math.floor((maxX + 180) / gridSize);
            int minRow = (int) Math.floor((minY + 90) / gridSize);
            int maxRow = (int) Math.floor((maxY + 90) / gridSize);

            for (int row = minRow; row <= maxRow; row++) {
                for (int col = minCol; col <= maxCol; col++) {
                    // 关键修改:使用整数行列拼接,确保唯一性
                    String gridId = row + "_" + col;
                    forward(gridId, spatialId, new Binary(wkbBytes), minX, maxX, minY, maxY);
                }
            }
        } catch (Exception e) {
            throw new UDFException("Error processing polygon: " + e.getMessage());
        }
    }

    @Override
    public void close() {}
}

PointToRowGrid 经纬度生成主键 udf

复制代码
import com.aliyun.odps.udf.UDF;

public class PointToRowGrid extends UDF {
    private double gridSize = 0.1;

    public String evaluate(Double lon, Double lat) {
        if (lon == null || lat == null) return null;
        int col = (int) Math.floor((lon + 180) / gridSize);
        int row = (int) Math.floor((lat + 90) / gridSize);
        return row + "_" + col;  // 修改为数字拼接
    }
}

上传部署

复制代码
上传jar包
ADD JAR target/geometry-1.0-SNAPSHOT-jar-with-dependencies.jar;

创建函数
CREATE FUNCTION polygon_to_rowgrids AS 'com.udTf.PolygonToRowGrids' USING 'geometry-1.0-SNAPSHOT-jar-with-dependencies.jar';

CREATE FUNCTION point_to_rowgrid AS 'com.udf.PointToRowGrid' USING 'geometry-1.0-SNAPSHOT-jar-with-dependencies.jar';

测试

复制代码
SELECT
    a.*,grid_id, spatial_id, geom_wkb, min_x, max_x, min_y, max_y
FROM dwd.test a
LATERAL VIEW polygon_to_rowgrids(geom_base64, cast(ad_code as bigint)) t AS grid_id, spatial_id, geom_wkb, min_x, max_x, min_y, max_y;

SELECT  point_to_rowgrid(130.511909, 45.20249) AS grid_id

方式二:使用H3方式

PolygonToH3 H3生成主键 udtf

复制代码
import com.aliyun.odps.data.Binary;
import com.aliyun.odps.udf.ExecutionContext;
import com.aliyun.odps.udf.UDFException;
import com.aliyun.odps.udf.UDTF;
import com.aliyun.odps.udf.annotation.Resolve;
import com.uber.h3core.H3Core;
import com.uber.h3core.util.LatLng;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.Polygon;
import org.locationtech.jts.geom.MultiPolygon;
import org.locationtech.jts.io.WKBReader;
import org.apache.commons.codec.binary.Base64;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

@Resolve("string,string -> string,string,binary,double,double,double,double")
public class PolygonToH3 extends UDTF {
    private WKBReader wkbReader = new WKBReader();
    private H3Core h3;
    private int resolution = 8; // H3分辨率

    @Override
    public void setup(ExecutionContext ctx) throws UDFException {
        try {
            h3 = H3Core.newInstance();
        } catch (IOException e) {
            throw new UDFException("Failed to init H3"+e.getMessage());
        }
    }

    @Override
    public void process(Object[] args) throws UDFException {
        String geomBase64 = (String) args[0];
        String spatialId = (String) args[1];

        if (geomBase64 == null || spatialId == null) return;

        try {
            byte[] wkbBytes = Base64.decodeBase64(geomBase64);
            Geometry geom = wkbReader.read(wkbBytes);

            double minX = geom.getEnvelopeInternal().getMinX();
            double maxX = geom.getEnvelopeInternal().getMaxX();
            double minY = geom.getEnvelopeInternal().getMinY();
            double maxY = geom.getEnvelopeInternal().getMaxY();

            List<LatLng> outerCoords = extractOuterCoords(geom);
            if (outerCoords.isEmpty()) return;

            // 使用返回字符串的方法
            List<String> cells = h3.polygonToCellAddresses(outerCoords, null, resolution);

            for (String cell : cells) {
                forward(cell, spatialId, new Binary(wkbBytes), minX, maxX, minY, maxY);
            }
        } catch (Exception e) {
            throw new UDFException("Error processing polygon: " + e.getMessage());
        }
    }

    private List<LatLng> extractOuterCoords(Geometry geom) {
        List<LatLng> outerCoords = new ArrayList<>();
        Polygon polygon = null;

        if (geom instanceof Polygon) {
            polygon = (Polygon) geom;
        } else if (geom instanceof MultiPolygon) {
            polygon = (Polygon) ((MultiPolygon) geom).getGeometryN(0);
        } else {
            return outerCoords;
        }

        org.locationtech.jts.geom.Coordinate[] coords = polygon.getExteriorRing().getCoordinates();
        for (org.locationtech.jts.geom.Coordinate c : coords) {
            outerCoords.add(new LatLng(c.y, c.x));
        }
        return outerCoords;
    }

    @Override
    public void close() {}
}

PointToH3 经纬度生成主键

复制代码
import com.aliyun.odps.udf.UDF;
import com.uber.h3core.H3Core;
import java.io.IOException;
import com.aliyun.odps.udf.UDFException;


public class PointToH3 extends UDF {
    private H3Core h3;
    private int resolution = 8;

    @Override
    public void setup(com.aliyun.odps.udf.ExecutionContext ctx) throws UDFException {
        try {
            h3 = H3Core.newInstance();
        } catch (IOException e) {
            throw new UDFException("Failed to init H3"+e.getMessage());
        }
    }

    public String evaluate(Double lon, Double lat) {
        if (lon == null || lat == null) return null;
        return h3.latLngToCellAddress(lat, lon, resolution);
    }
}

上传部署

ADD JAR /target/geometry-1.0-SNAPSHOT-jar-with-dependencies.jar;

CREATE FUNCTION polygon_to_h3 AS 'com.udtf.PolygonToH3' USING 'geometry-1.0-SNAPSHOT-jar-with-dependencies.jar'; CREATE FUNCTION point_to_h3 AS 'com.udf.PointToH3'USING 'geometry-1.0-SNAPSHOT-jar-with-dependencies.jar';

4.udf实现经纬度包含

直接替代pg库的功能

ST_Contains(geom,ST_SetSRID(ST_MakePoint(116.4074, 39.9042), 4490))

复制代码
/**
 * 判断点是否在几何图形内(使用 byte[] 接收 BINARY 数据)
 */
import com.aliyun.odps.data.Binary;
import com.aliyun.odps.udf.UDF;
import org.apache.commons.codec.binary.Base64;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.Point;
import org.locationtech.jts.io.ParseException;
import org.locationtech.jts.io.WKBReader;

/**
 * 判断一个点(经度,纬度)是否在给定的几何图形内部。
 * 支持两种输入方式:
 * 1. Binary 类型:直接传入 WKB 字节数组(对应 MaxCompute BINARY 列)
 * 2. String 类型:传入 Base64 编码的 WKB 字符串(对应从 Greenplum 导出的 geom_base64 列)
 */
public class STContainsPointBinary extends UDF {

    private final WKBReader wkbReader = new WKBReader();
    private final GeometryFactory geometryFactory = new GeometryFactory();

    /**
     * 处理 BINARY 类型输入(优化方案)
     *
     * @param geomWkb 几何图形的 WKB 字节数组(Binary 包装类)
     * @param lon     经度
     * @param lat     纬度
     * @return true: 点在图形内;false: 点不在图形内;null: 输入无效或解析失败
     */
    public Boolean evaluate(Binary geomWkb, Double lon, Double lat) {
        if (geomWkb == null || lon == null || lat == null) {
            return null;
        }
        try {
            // 从 Binary 对象中获取原始字节数组
            byte[] wkbBytes = geomWkb.data();
            return evaluateInternal(wkbBytes, lon, lat);
        } catch (Exception e) {
            // 记录日志或直接返回 null(根据业务需求)
            return null;
        }
    }

    /**
     * 处理 STRING 类型输入(Base64 编码的 WKB,兼容原始导入数据)
     *
     * @param geomBase64 Base64 编码的 WKB 字符串
     * @param lon        经度
     * @param lat        纬度
     * @return true: 点在图形内;false: 点不在图形内;null: 输入无效或解析失败
     */
    /*public Boolean evaluate(String geomBase64, Double lon, Double lat) {
        if (geomBase64 == null || lon == null || lat == null) {
            return null;
        }
        try {
            // Base64 解码
            byte[] wkbBytes = Base64.decodeBase64(geomBase64);
            return evaluateInternal(wkbBytes, lon, lat);
        } catch (Exception e) {
            return null;
        }
    }*/

    /**
     * 内部方法:使用 JTS 进行几何包含判断
     *
     * @param wkbBytes WKB 字节数组
     * @param lon      经度
     * @param lat      纬度
     * @return 布尔值或 null
     */
    private Boolean evaluateInternal(byte[] wkbBytes, Double lon, Double lat) {
        try {
            // 解析 WKB 为 JTS Geometry 对象
            Geometry targetGeom = wkbReader.read(wkbBytes);

            // 创建点
            Point point = geometryFactory.createPoint(new org.locationtech.jts.geom.Coordinate(lon, lat));

            // 注意:此处假设目标几何图形与点使用相同的坐标系(均为地理坐标系,例如 4490)。
            // 如果需要严格的坐标系转换,需引入 GeoTools 等库,但会增加 UDF 复杂度。
            // 大多数场景下,只要数据源统一,平面包含判断即可满足需求。

            return targetGeom.contains(point);
        } catch (ParseException e) {
            // WKB 解析失败,返回 null 表示无效数据
            return null;
        }
    }

上传部署

CREATE FUNCTION st_contains_point_binary AS 'com.udf.STContainsPointBinary' USING 'geometry-1.0-SNAPSHOT-jar-with-dependencies.jar';

5.H3方式测试

复制代码
select
a.*,st_contains_point_binary(a.geom_wkb, b.lon, b.lat)
from
(
SELECT
    a.*,grid_id, spatial_id, geom_wkb, min_x, max_x, min_y, max_y
FROM dwd.test a
LATERAL VIEW polygon_to_h3(geom_base64, cast(ad_code as bigint)) t AS grid_id, spatial_id, geom_wkb, min_x, max_x, min_y, max_y
) a
left join
(
SELECT
  '130.511909' lon,
  '45.20249'   lat,
   point_to_h3(130.511909, 45.20249) AS grid_id
) b on  a.grid_id=b.grid_id

相关推荐
@insist1232 小时前
软件设计师-数据库核心:事务 ACID 特性、并发控制与备份恢复技术全解
数据库·oracle·软考·软件设计师·软件水平考试
正在走向自律2 小时前
Oracle替换工程实践深度解析——从技术落地到成本优化的全维度攻坚
数据库·oracle·kingbasees·数据库替换
杨云龙UP2 小时前
Oracle DG / ADG日常巡检操作指南
linux·运维·服务器·数据库·ubuntu·oracle
执笔画流年呀2 小时前
简单使用MySQL
数据库·mysql·oracle
qq_334903152 小时前
Python单元测试(unittest)实战指南
jvm·数据库·python
marsh02062 小时前
13 openclaw数据验证与过滤:确保应用安全性的第一道防线
网络·数据库·ai·编程·技术
JavaGuide2 小时前
美团面试:为什么要用分布式缓存?本地缓存呢?多级缓存一致性如何保证?
数据库·redis·后端·缓存·大厂面试
一个有温度的技术博主2 小时前
Redis系列四:redis的启动配置
数据库·redis·缓存
小尔¥2 小时前
MySQL数据库认知与安装
运维·数据库·mysql