nc数据补值

nc数据补值

我的NC数据的右上角缺失值,准备使用IDW插值进行补充。上图为补值前,下图为补值后:

整体逻辑为:

代码:

python 复制代码
import numpy as np
from netCDF4 import Dataset
from scipy.spatial import cKDTree
import os

def fill_missing_idw(variable, lat, lon, power=2, max_neighbors=8):
    """
    使用IDW方法填补二维数组中的缺失值。
    
    参数:
    - variable: 2D numpy数组,可能包含缺失值 (masked array)
    - lat, lon: 2D numpy数组,与variable形状相同的纬度和经度
    - power: IDW的幂参数
    - max_neighbors: 使用的最大邻居数量
    
    返回:
    - filled_variable: 填补后的二维numpy数组
    """
    filled_variable = variable.copy()
    ny, nx = variable.shape

    if isinstance(variable, np.ma.MaskedArray):
        known_mask = ~variable.mask
        missing_mask = variable.mask
        known_values = variable.data[known_mask]
    else:
        known_mask = ~np.isnan(variable)
        missing_mask = np.isnan(variable)
        known_values = variable[known_mask]
    
    known_points = np.column_stack((lon[known_mask], lat[known_mask]))
    missing_points = np.column_stack((lon[missing_mask], lat[missing_mask]))
    
    if len(known_points) == 0:
        print("没有可用的已知点进行插值。")
        return filled_variable

    tree = cKDTree(known_points)
    distances, indexes = tree.query(missing_points, k=max_neighbors, n_jobs=-1)
    
    if max_neighbors == 1:
        distances = distances[:, np.newaxis]
        indexes = indexes[:, np.newaxis]
    
    with np.errstate(divide='ignore'):
        weights = 1 / distances**power
    weights[~np.isfinite(weights)] = 0
    
    interpolated_values = np.sum(weights * known_values[indexes], axis=1) / np.sum(weights, axis=1)
    exact_matches = distances[:,0] == 0
    interpolated_values[exact_matches] = known_values[indexes[exact_matches, 0]]
    
    filled_variable[missing_mask] = interpolated_values
    
    # 调试信息
    n_filled = np.sum(~filled_variable[missing_mask].mask) if isinstance(filled_variable, np.ma.MaskedArray) else np.sum(~np.isnan(filled_variable[missing_mask]))
    print(f"尝试填补了 {len(missing_points)} 个缺失点,成功填补了 {n_filled} 个。")
    
    return filled_variable

def fill_missing_idw(variable, lat, lon, power=2, max_neighbors=8):
    """
    使用IDW方法填补二维数组中的缺失值。
    
    参数:
    - variable: 2D numpy数组,可能包含缺失值 (masked array)
    - lat, lon: 2D numpy数组,与variable形状相同的纬度和经度
    - power: IDW的幂参数
    - max_neighbors: 使用的最大邻居数量
    
    返回:
    - filled_variable: 填补后的二维numpy数组
    """
    filled_variable = variable.copy()
    ny, nx = variable.shape

    if isinstance(variable, np.ma.MaskedArray):
        known_mask = ~variable.mask
        missing_mask = variable.mask
        known_values = variable.data[known_mask]
    else:
        known_mask = ~np.isnan(variable)
        missing_mask = np.isnan(variable)
        known_values = variable[known_mask]
    
    known_points = np.column_stack((lon[known_mask], lat[known_mask]))
    missing_points = np.column_stack((lon[missing_mask], lat[missing_mask]))
    
    if len(known_points) == 0:
        print("没有可用的已知点进行插值。")
        return filled_variable

    tree = cKDTree(known_points)
    distances, indexes = tree.query(missing_points, k=max_neighbors)
    
    if max_neighbors == 1:
        distances = distances[:, np.newaxis]
        indexes = indexes[:, np.newaxis]
    
    with np.errstate(divide='ignore'):
        weights = 1 / distances**power
    weights[~np.isfinite(weights)] = 0  # 处理距离为0的情况
    
    # 计算权重和
    weight_sums = np.sum(weights, axis=1)
    # 避免除以零
    weight_sums[weight_sums == 0] = 1
    
    interpolated_values = np.sum(weights * known_values[indexes], axis=1) / weight_sums
    
    # 处理距离为0的情况,直接赋值
    exact_matches = distances[:,0] == 0
    if np.any(exact_matches):
        interpolated_values[exact_matches] = known_values[indexes[exact_matches, 0]]
    
    filled_variable[missing_mask] = interpolated_values
    
    # 调试信息
    n_filled = np.sum(~filled_variable[missing_mask].mask) if isinstance(filled_variable, np.ma.MaskedArray) else np.sum(~np.isnan(filled_variable[missing_mask]))
    print(f"尝试填补了 {len(missing_points)} 个缺失点,成功填补了 {n_filled} 个。")
    
    return filled_variable

def process_nc_file(input_path, output_path):
    # 打开 NetCDF 文件
    with Dataset(input_path, 'r') as src:
        # 创建新的 NetCDF 文件
        with Dataset(output_path, 'w', format=src.file_format) as dst:
            # 复制全局属性
            dst.setncatts({attr: src.getncattr(attr) for attr in src.ncattrs()})
    
            # 复制维度
            for dim_name, dim in src.dimensions.items():
                dst.createDimension(dim_name, (len(dim) if not dim.isunlimited() else None))
    
            # 复制变量并处理数据
            for var_name, var in src.variables.items():
                # 复制变量定义
                fill_value = getattr(var, '_FillValue', None)
                dst_var = dst.createVariable(var_name, var.datatype, var.dimensions, fill_value=fill_value)
                dst_var.setncatts({attr: var.getncattr(attr) for attr in var.ncattrs()})
    
                # 读取数据
                data = var[:]
    
                # 检查是否是需要插值的变量
                if var_name in ['T2D', 'Q2D', 'U2D', 'V2D', 'PSFC', 'RAINRATE', 'SWDOWN', 'LWDOWN']:
                    # 只处理数据维度为 [Time, south_north, west_east]
                    if data.ndim == 3:
                        filled_data = np.ma.array(data)  # 确保是masked array
                        lat = src.variables['lat'][:]
                        lon = src.variables['lon'][:]
                        
                        for t in range(data.shape[0]):
                            print(f"Processing time index {t} for variable {var_name}")
                            var_data = data[t, :, :]
                            
                            # 确保 var_data 是 masked array
                            if not isinstance(var_data, np.ma.MaskedArray):
                                var_data = np.ma.masked_where(np.isnan(var_data), var_data)
                            
                            # 填补缺失值
                            filled_var = fill_missing_idw(var_data, lat, lon)
                            filled_data[t, :, :] = filled_var
                        
                        # 将填补后的数据写入目标文件,填补剩余的 NaN 为 _FillValue
                        fill_val = getattr(var, '_FillValue', np.nan)
                        dst_var[:] = filled_data.filled(fill_val)
                    else:
                        raise ValueError(f"Variable {var_name} 有意外的维度。")
                else:
                    # 其他变量直接复制
                    dst_var[:] = data

    print(f"填补后的数据已保存到 {output_path}")

if __name__ == "__main__":
    input_nc = r"aaa.nc"
    output_nc = r"aaa_filled.nc"
    
    # 检查输入文件是否存在
    if not os.path.exists(input_nc):
        print(f"输入文件 {input_nc} 不存在。")
    else:
        process_nc_file(input_nc, output_nc)