逐时nc数据批量处理为日平均

现有1系列的nc数据,每个数据格式如下:

需要进行批量处理将数据处理为日平均的tif,可使用下列脚本:

python 复制代码
import os
import xarray as xr
import pandas as pd
import multiprocessing as mul
import numpy as np
from rasterio.transform import from_origin
import rasterio
from datetime import datetime

def save_as_geotiff(data_array, lons, lats, filename):
    """将数据保存为 GeoTIFF 文件"""
    # 计算仿射变换参数
    transform = from_origin(
        lons.min(),  
        lats.max(),  
        (lons.max() - lons.min()) / len(lons),  
        (lats.max() - lats.min()) / len(lats)   
    )

    with rasterio.open(
        filename,
        'w',
        driver='GTiff',
        height=len(lats),
        width=len(lons),
        count=1,
        dtype=data_array.dtype,
        crs='EPSG:4326',  # WGS84 坐标系
        transform=transform,
    ) as dst:
        dst.write(data_array, 1)  

def process_snow_daily_average(args):
    """处理雪深数据并计算日平均"""
    inpath, outpath, start_year, end_year = args
    
    for year in range(start_year, end_year + 1):
        for month in range(1, 13):
            filename = f"snow_{year}_{month:02d}.nc"
            file_path = os.path.join(inpath, filename)
            
            if not os.path.exists(file_path):
                print(f"Warning: File {file_path} does not exist, skipping...")
                continue
            
            try:
                ds = xr.open_dataset(file_path)
                
                snow_data = ds['sde']
                
                time_values = pd.to_datetime(snow_data.valid_time.values)
                dates = [t.date() for t in time_values]  
                
                unique_dates = sorted(set(dates))
                
                output_year_path = os.path.join(outpath, f"y{year}")
                os.makedirs(output_year_path, exist_ok=True)
                
                for current_date in unique_dates:
                    date_mask = [d == current_date for d in dates]
                    daily_data = snow_data[date_mask, :, :]
                    
                    daily_avg = daily_data.mean(dim='valid_time', skipna=True)
                    
                    lons = daily_data.longitude.values
                    lats = daily_data.latitude.values
                    
                    outname = os.path.join(
                        output_year_path,
                        f"snow_{current_date.strftime('%Y%m%d')}.tif"
                    )
                    
                    save_as_geotiff(daily_avg.values, lons, lats, outname)
                    print(f"{outname} has been converted!")
                
                
                ds.close()
                
            except Exception as e:
                print(f"Error processing file {file_path}: {str(e)}")
                continue

if __name__ == "__main__":
    inpath = r"data/era5_sd_2025/1981_1989"  
    outpath = r"data/era5_sd_2025/daily_1981_1989"  
    start_year = 1981  
    end_year = 1989    

    num_processes = min(8, os.cpu_count())  # 使用较少的进程

    total_years = end_year - start_year + 1
    years_per_process = max(1, total_years // num_processes)

    args_list = []
    for i in range(num_processes):
        current_start = start_year + i * years_per_process
        current_end = min(current_start + years_per_process - 1, end_year)

        if i == num_processes - 1:
            current_end = end_year

        if current_start > end_year:
            break

        args_list.append((inpath, outpath, current_start, current_end))

    with mul.Pool(processes=num_processes) as pool:
        pool.map(process_snow_daily_average, args_list)

    print('--'*50)
    print('all jobs have finished!!!')
相关推荐
Dxy123931021611 分钟前
Python基于BERT的上下文纠错详解
开发语言·python·bert
SiYuanFeng1 小时前
Colab复现 NanoChat:从 Tokenizer(CPU)、Base Train(CPU) 到 SFT(GPU) 的完整踩坑实录
python·colab
炸炸鱼.2 小时前
Python 操作 MySQL 数据库
android·数据库·python·adb
_深海凉_3 小时前
LeetCode热题100-颜色分类
python·算法·leetcode
AC赳赳老秦3 小时前
OpenClaw email技能:批量发送邮件、自动回复,高效处理工作邮件
运维·人工智能·python·django·自动化·deepseek·openclaw
zhaoshuzhaoshu3 小时前
Python 语法之数据结构详细解析
python
AI问答工程师4 小时前
Meta Muse Spark 的"思维压缩"到底是什么?我用 Python 复现了核心思路(附代码)
人工智能·python
zfan5205 小时前
python对Excel数据处理(1)
python·excel·pandas
小饕5 小时前
我从零搭建 RAG 学到的 10 件事
python
老歌老听老掉牙5 小时前
PyQt5+Qt Designer实战:可视化设计智能参数配置界面,告别手动布局时代!
python·qt