物流公司配送路径动态优化(Q-Learning算法)

业务痛点:某区域物流公司(覆盖10城、500辆配送车、日均订单2万单)存在三大问题:

  • 静态路径低效:依赖人工规划固定路线,早晚高峰拥堵路段绕路率25%,平均配送时间120分钟(行业标杆90分钟)
  • 动态响应滞后:突发交通管制/订单追加时,司机凭经验调整路径,准点率仅75%(目标90%)
  • 成本高企:燃油浪费(绕路)+超时罚款年超5000万元,客户满意度下降(投诉率12%)

算法团队:数据清洗(Spark)、特征工程(状态/动作/奖励特征)、Q-Learning模型训练(Ray分布式)、模拟环境构建(Gym)、策略存储(Feast)

业务团队:API网关、路径优化服务(调用Q-Learning策略)、配送调度系统集成(司机APP/控制台)、监控告警

物流运营团队:通过司机APP接收实时路径,调度员终端干预异常订单

数据准备与特征变化

(1)原数据结构(多源异构数据,来自物流系统)

① 订单系统(delivery_orders,Hive表)

② 车辆GPS(vehicle_gps,Kafka Topic)

json 复制代码
// 实时车辆状态(每30秒上报)
{ "vehicle_id": "VH001", "lat": 39.9042, "lon": 116.4074, "speed_kmh": 45, "load_kg": 120, "timestamp": "2023-10-01T09:30:00Z" }

③ 交通数据(traffic_status,API接口)

(2)数据清洗(详细代码,算法团队负责)

目标:处理缺失值、时空错位、噪声,输出标准化的订单、车辆、交通数据

代码文件:data_processing/src/main/python/data_cleaning.py

python 复制代码
import pandas as pd
import numpy as np
from pyspark.sql import SparkSession
from pyspark.sql.functions import col,when,udf
from pyspark.sql.types import StructType,StructField,DoubleType,StringType,TimestampType  
import logging  

logging.basicConfig(level=logging.INFO)  
logger = logging.getLogger(__name__) 

def clean_order_data(spark,input_path:str,output:str):
	"""清洗订单数据(处理缺失值、异常重量、时间窗口校验)"""
	df = spark.read.parquet(input)
	logger.info(f"原始订单数据量:{df.count()},缺失值统计:\n{df.select([count(when(col(c).isNull(), c)).alias(c) for c in df.columns]).show()}")  

	# 1.缺失值处理
	# weight_kg缺失,用同类订单(同区域、同优先级)均值填充
	df df.withColumn("weight_kg",# 修改或创建weight_kg列
		when(col("weight_kg").isNull(),
			df.filter(
				col("origin_lat").between(39.9, 40.0) & #维度在39.9到40.0之间
				col("priority") == 3 # 优先级等于3
			).agg(avg("weight_kg")).first()[0]).otherwise(col("weight_kg"))
	)
	# 极端缺失(如区域无同类订单):用全局均值填充
	global_mean_weight = df.agg(avg("weight_kg")).first()[0]
	df = df.fillna({"weight_kg":global_mean_weight})

	# 2.异常值处理
	# weight_kg > 500kg(超大型货物,单独处理) 或<0.1kg(无效)->标记异常
	df = df.withColumn("is_abnormal_weight",when((col("weight_kg")>500)|(col("weight_kg") < 0.1),1).otherwise(0))
	#时间窗口校验:start >= end ->交换时间
	df = df.withColumn("time_window_start", when(col("time_window_start") > col("time_window_end"), col("time_window_end")).otherwise(col("time_window_start")))
	df = df.withColumn("time_window_end", when(col("time_window_start") > col("time_window_end"), col("time_window_start")).otherwise(col("time_window_end")))

	# 3.去重与保存
	df_clean = df.dropDuplicates(["order_id"]).select(
		"order_id", "origin_lat", "origin_lon", "dest_lat", "dest_lon",  
        "weight_kg", "time_window_start", "time_window_end", "priority"
	)
	df_clean.write.mode("overrite").parquet(out_path)
	logger.info(f"清洗后订单数据量:{df_clean.count()},保存至{output_path}")  
    return df_clean

if __name__ == "__main__":
	spark = SparkSession.builder.appName("LogisticsDataCleaning").getOrCreate()
	#输入/输出路径(数据湖存储)
	order_input = "s3://logistics-data-lake/raw/delivery_orders.parquet" 
	order_output = "s3://logistics-data-lake/cleaned/orders_cleaned.parquet" 
	clean_order_data(spark,order_input,order_output)
	spark.stop()

(3)特征工程与特征数据生成(明确feature_path/label_path)

将清洗后数据转换为Q-Learning可输入的状态特征矩阵(feature_path)和Q表策略模型(label_path),核心是设计状态、动作、奖励特征

① 状态特征生成(state_feature_generator.py)

python 复制代码
import pandas as pd  
from shapely.geometry import Point, LineString  
import geopandas as gpd  
import logging  

logging.basicConfig(level=logging.INFO)  
logger = logging.getLogger(__name__)  

def calculate_distance(lat1, lon1, lat2, lon2):  
    """计算两点间欧氏距离(简化版,实际用Haversine公式)"""  
    return ((lat1 - lat2)**2 + (lon1 - lon2)** 2)**0.5 * 111  # 1度≈111km  

def generate_state_features(cleaned_orders: pd.DataFrame, vehicle_gps: pd.DataFrame, traffic_data: pd.DataFrame) -> pd.DataFrame:  
    """生成Q-Learning状态特征(车辆位置+订单+路况)"""  
    # 1. 关联订单与车辆(假设1车1单,简化场景)  
    merged = pd.merge(vehicle_gps, cleaned_orders, left_on="current_order_id", right_on="order_id", how="left")  

    # 2. 计算状态特征  
    state_features = []  
    for _, row in merged.iterrows():  
        # 基础状态:位置、目的地、载重  
        state = {  
            "vehicle_id": row["vehicle_id"],  
            "order_id": row["order_id"],  
            "current_lat": row["lat"], "current_lon": row["lon"],  
            "dest_lat": row["dest_lat"], "dest_lon": row["dest_lon"],  
            "load_kg": row["load_kg"],  
            "remaining_time": (row["time_window_end"] - pd.Timestamp.now()).total_seconds() / 60  # 剩余时间(分钟)  
        }  

        # 路况特征:当前道路拥堵指数(关联GPS位置与交通数据)  
        current_road = traffic_data[traffic_data["road_id"] == row["current_road_id"]]  # 假设GPS含道路ID  
        state["congestion_index"] = current_road["congestion_index"].iloc[0] if not current_road.empty else 0.5  

        # 距离特征:当前位置到目的地距离(km)  
        state["distance_to_dest"] = calculate_distance(  
            row["current_lat"], row["current_lon"], row["dest_lat"], row["dest_lon"]  
        )  

        state_features.append(state)  

    state_df = pd.DataFrame(state_features)  
    logger.info(f"生成状态特征:{len(state_df)}条,示例:\n{state_df.head(2)}")  
    return state_df

② 特征数据生成(generate_feature_data.py,明确feature_path/label_path)

python 复制代码
import pandas as pd  
import joblib  
import logging  
from state_feature_generator import generate_state_features  

logging.basicConfig(level=logging.INFO)  
logger = logging.getLogger(__name__)  

def generate_feature_data(cleaned_orders_path: str, vehicle_gps_path: str, traffic_data_path: str) -> tuple:  
    """生成状态特征矩阵(feature_path)和Q表策略(label_path)"""  
    # 1. 加载清洗后数据  
    orders = pd.read_parquet(cleaned_orders_path)  
    gps = pd.read_parquet(vehicle_gps_path)  
    traffic = pd.read_parquet(traffic_data_path)  

    # 2. 生成状态特征矩阵(feature_path指向的文件)  
    state_df = generate_state_features(orders, gps, traffic)  
    feature_path = "s3://logistics-data-lake/processed/qlearning_state_features.parquet"  
    state_df.to_parquet(feature_path, index=False)  

    # 3. 初始化label_path(Q表策略文件,训练后生成)  
    label_path = "s3://logistics-data-lake/processed/q_table_policy.joblib"  

    logger.info(f"""  
    【算法团队特征数据存储说明】  
    - feature_path: {feature_path}  
      存储内容:Q-Learning状态特征矩阵(Parquet格式),含列:  
        vehicle_id(车辆ID)、order_id(订单ID)、current_lat/lon(当前位置)、dest_lat/lon(目的地)、  
        load_kg(载重)、remaining_time(剩余时间窗口)、congestion_index(拥堵指数)、distance_to_dest(距目的地距离)  
      示例数据(前2行):\n{state_df.head(2)}  

    - label_path: {label_path}  
      存储内容:Q表策略模型(Joblib格式),结构为字典:{state_tuple: {action: q_value}}  
        state_tuple: (current_lat, current_lon, dest_lat, dest_lon, congestion_index, ...)  
        action: 0-5(6个动作:0=直行,1=左转,2=右转,3=加速,4=减速,5=等待)  
        q_value: 状态-动作对的期望价值  
      生成时机:Q-Learning训练完成后,由train_qlearning.py写入  
    """)  

    return feature_path, label_path, state_df

数据特征变化对比表(原数据→清洗后→状态特征→Q表策略):

代码结构

算法团队仓库(algorithm-path-optimization)

text 复制代码
algorithm-path-optimization/  
├── data_processing/                # 数据清洗(Spark/Scala)  
│   ├── src/main/scala/com/logistics/DataCleaning.scala  # 清洗代码(含订单/车辆/交通)  
│   └── build.sbt                   # 依赖:spark-core, spark-sql  
├── feature_engineering/            # 特征工程(状态/动作/奖励)  
│   ├── state_feature_generator.py  # 状态特征生成(含距离/路况计算)  
│   ├── reward_calculator.py         # 奖励函数实现(时效+成本+体验)  
│   ├── generate_feature_data.py     # 特征数据生成(含feature_path/label_path说明)  
│   └── requirements.txt             # 依赖:pandas, geopandas, shapely  
├── model_training/                 # Q-Learning模型训练(核心)  
│   ├── q_learning.py                # Q-Learning算法实现(含原理注释)  
│   ├── train_qlearning.py           # 训练入口(Ray分布式+模拟环境)  
│   ├── simulate_environment.py      # Gym定制物流仿真环境(路网/订单/交通)  
│   └── qlearning_params.yaml        # 调参记录(ε=0.1, α=0.5, γ=0.9)  
├── feature_store/                  # Feast特征存储(动态特征服务)  
│   ├── feature_repo/                # Feast特征仓库  
│   │   ├── features.py              # 定义实体(vehicle_id)、特征视图(实时位置/路况)  
│   │   └── feature_store.yaml       # Feast配置(在线Redis/离线Parquet)  
│   └── deploy_feast.sh              # 部署Feast到K8s脚本  
└── mlflow_tracking/                # MLflow实验跟踪  
    └── run_qlearning_experiment.py  # 记录ε/α/γ/奖励曲线/收敛状态
python 复制代码
import numpy as np  
import pandas as pd  
import logging  
from collections import defaultdict  
import ray  
from simulate_environment import LogisticsEnv  # 定制仿真环境  

logging.basicConfig(level=logging.INFO)  
logger = logging.getLogger(__name__)  

class QLearningAgent:  
    def __init__(self, state_dim, action_dim, epsilon=0.1, alpha=0.5, gamma=0.9):  
        """  
        Q-Learning智能体初始化  
        :param state_dim: 状态空间维度(特征数量)  
        :param action_dim: 动作空间维度(6个动作)  
        :param epsilon: ε-贪婪策略的探索率(默认10%探索)  
        :param alpha: 学习率(Q值更新步长,默认0.5)  
        :param gamma: 折扣因子(未来奖励权重,默认0.9)  
        """  
        self.state_dim = state_dim  
        self.action_dim = action_dim  
        self.epsilon = epsilon  # 探索率(ε-贪婪策略)  
        self.alpha = alpha      # 学习率(α)  
        self.gamma = gamma      # 折扣因子(γ)  
        # Q表:嵌套字典 {state: {action: q_value}},初始Q值为0  
        self.q_table = defaultdict(lambda: defaultdict(float))  

    def choose_action(self, state):
        """ε-贪婪策略选择动作:以ε概率探索(随机动作),1-ε概率利用(最大Q值动作)"""  
        if np.random.uniform(0, 1) < self.epsilon:  
            # 探索:随机选择动作(0-5)  
            return np.random.randint(0, self.action_dim)  
        else:  
            # 利用:选择当前状态下Q值最大的动作  
            state_actions = self.q_table[state]  
            if not state_actions:  # 状态未见过,随机选  
                return np.random.randint(0, self.action_dim)  
            return max(state_actions, key=state_actions.get)  

    def update_q_value(self, state, action, reward, next_state):  
        """Q值更新(核心公式):Q(s,a) ← Q(s,a) + α[r + γ·maxQ(s',a') - Q(s,a)]"""  
        current_q = self.q_table[state][action]  
        # 下一状态的最大Q值(若next_state未见过,max_q=0)  
        max_next_q = max(self.q_table[next_state].values()) if self.q_table[next_state] else 0  
        # 更新Q值  
        new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)  
        self.q_table[state][action] = new_q  

    def train(self, env, episodes=1000):  
        """训练Q-Learning智能体:与环境交互episodes轮"""  
        for episode in range(episodes):  
            state = env.reset()  # 重置环境,获取初始状态  
            total_reward = 0  
            done = False  

            while not done:  
                # 1. 选择动作(ε-贪婪策略)  
                action = self.choose_action(state)  
                # 2. 执行动作,获取环境反馈(下一状态、奖励、是否结束)  
                next_state, reward, done, info = env.step(action)  
                # 3. 更新Q值  
                self.update_q_value(state, action, reward, next_state)  
                # 4. 转移到下一状态  
                state = next_state  
                total_reward += reward  

            # 每100轮打印训练进度  
            if episode % 100 == 0:  
                logger.info(f"Episode {episode}, Total Reward: {total_reward:.2f}, Epsilon: {self.epsilon}")  
                # 衰减探索率(随训练进行减少探索)  
                self.epsilon = max(0.01, self.epsilon * 0.995)  

        logger.info("Q-Learning训练完成,Q表收敛")  
        return self.q_table  

# 示例:物流场景状态编码(将状态特征元组化,作为Q表键)  
def encode_state(state_features: dict) -> tuple:  
    """将状态特征字典编码为Q表键(元组)"""  
    return (  
        round(state_features["current_lat"], 4), round(state_features["current_lon"], 4),  
        round(state_features["dest_lat"], 4), round(state_features["dest_lon"], 4),  
        round(state_features["congestion_index"], 2), round(state_features["remaining_time"], 1),  
        round(state_features["load_kg"], 1)  
    )
python 复制代码
import joblib  
import pandas as pd  
from q_learning import QLearningAgent, encode_state  
from simulate_environment import LogisticsEnv  
from feature_engineering.generate_feature_data import generate_feature_data  
import ray  
import logging  

logging.basicConfig(level=logging.INFO)  
logger = logging.getLogger(__name__)  

def train_and_save_policy():  
    # 1. 初始化Ray分布式训练  
    ray.init(num_cpus=4)  

    # 2. 生成特征数据(获取feature_path和label_path)  
    feature_path, label_path, state_df = generate_feature_data(  
        cleaned_orders_path="s3://logistics-data-lake/cleaned/orders_cleaned.parquet",  
        vehicle_gps_path="s3://logistics-data-lake/cleaned/gps_cleaned.parquet",  
        traffic_data_path="s3://logistics-data-lake/cleaned/traffic_cleaned.parquet"  
    )  

    # 3. 加载状态特征,初始化仿真环境  
    env = LogisticsEnv(state_df)  # 定制物流仿真环境(路网+订单+交通)  
    state_dim = len(env.state_features[0])  # 状态特征维度  
    action_dim = 6  # 6个动作(0-5)  

    # 4. 训练Q-Learning智能体  
    agent = QLearningAgent(  
        state_dim=state_dim, action_dim=action_dim,  
        epsilon=0.1, alpha=0.5, gamma=0.9  
    )  
    q_table = agent.train(env, episodes=2000)  # 训练2000轮  

    # 5. 保存Q表策略到label_path(Joblib文件)  
    joblib.dump(q_table, label_path)  
    logger.info(f"Q表策略已保存至{label_path},共{len(q_table)}个状态")  

    # 6. 记录MLflow实验  
    import mlflow  
    with mlflow.start_run(run_name="q_learning_path_optimization"):  
        mlflow.log_param("epsilon", agent.epsilon)  
        mlflow.log_param("alpha", agent.alpha)  
        mlflow.log_param("gamma", agent.gamma)  
        mlflow.log_metric("q_table_size", len(q_table))  
        mlflow.log_artifact(label_path)  # 上传Q表文件  

if __name__ == "__main__":  
    train_and_save_policy()

业务团队仓库(business-path-optimization)

text 复制代码
business-path-optimization/  
├── api_gateway/                    # API网关(Kong配置)  
├── path_optimization_service/      # 路径优化服务(Go)  
│   ├── main.go                     # FastAPI风格Go服务(调用Q表策略+Feast特征)  
│   └── Dockerfile                  # 容器化配置  
├── dispatch_system/                # 配送调度系统(Java+React)  
│   ├── backend/                    # Spring Boot后端(订单管理/路径渲染)  
│   ├── frontend/                   # React前端(司机APP/控制台)  
│   └── sql/                        # PostgreSQL表结构(路径历史/策略版本)  
├── monitoring/                     # 监控告警(Prometheus+Grafana)  
└── deployment/                     # K8s部署配置(服务/Ingress)
go 复制代码
package main  

import (
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"path/to/joblib" // 伪代码:Joblib加载Q表
	"path/to/feast"  // 伪代码:Feast客户端
)

// 请求/响应结构体
type PathRequest struct {
	VehicleID string  `json:"vehicle_id"`
	OrderID   string  `json:"order_id"`
	CurrentLat float64 `json:"current_lat"`
	CurrentLon float64 `json:"current_lon"`
}

type PathResponse struct {
	VehicleID   string `json:"vehicle_id"`
	OptimalAction int    `json:"optimal_action"` // 0-5动作
	NextWaypoint  struct {
		Lat float64 `json:"lat"`
		Lon float64 `json:"lon"`
	} `json:"next_waypoint"`
	ETA string `json:"eta"` // 预计到达时间
}

// 全局变量:加载Q表策略和Feast客户端
var qTable map[string]map[int]float64 // 伪代码:Q表结构
var feastClient *feast.Client

func init() {
	// 加载Q表策略(label_path文件)
	qTable = joblib.Load("s3://logistics-data-lake/processed/q_table_policy.joblib")
	// 初始化Feast客户端(调用算法团队特征服务)
	feastClient = feast.NewClient("feast-feature-service:6566") // K8s内部服务名
}

// 路径优化API处理函数
func optimizePathHandler(w http.ResponseWriter, r *http.Request) {
	var req PathRequest
	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
		http.Error(w, "Invalid request", http.StatusBadRequest)
		return
	}

	// 1. 调用Feast获取实时特征(路况/剩余时间)
	features, err := feastClient.GetOnlineFeatures(
		[]string{"congestion_index", "remaining_time"}, 
		map[string]interface{}{"vehicle_id": req.VehicleID},
	)
	if err != nil {
		http.Error(w, "Failed to get features", http.StatusInternalServerError)
		return
	}

	// 2. 构造状态特征(编码为Q表键)
	state := encodeState(req.CurrentLat, req.CurrentLon, features.DestLat, features.DestLon, 
		features.CongestionIndex, features.RemainingTime, 0) // 载重简化处理

	// 3. 查询Q表,选择最优动作(最大Q值)
	actionQValues := qTable[state]
	if actionQValues == nil {
		// 状态未见过,随机选择动作(探索)
		actionQValues = make(map[int]float64)
		for i := 0; i < 6; i++ {
			actionQValues[i] = 0.0
		}
	}
	optimalAction := 0
	maxQ := -1e9
	for action, q := range actionQValues {
		if q > maxQ {
			maxQ = q
			optimalAction = action
		}
	}

	// 4. 构造响应(动作→下一航点)
	resp := PathResponse{
		VehicleID: req.VehicleID,
		OptimalAction: optimalAction,
		NextWaypoint: calculateNextWaypoint(req.CurrentLat, req.CurrentLon, optimalAction),
		ETA: calculateETA(req.CurrentLat, req.CurrentLon, features.DestLat, features.DestLon),
	}

	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(resp)
}

func main() {
	http.HandleFunc("/api/optimize-path", optimizePathHandler)
	log.Println("Path optimization service started on :8080")
	log.Fatal(http.ListenAndServe(":8080", nil))
}
相关推荐
天若有情6732 小时前
【自研实战】轻量级ASCII字符串加密算法:从设计到落地(防查岗神器版)
网络·c++·算法·安全·数据安全·加密
啊森要自信2 小时前
CANN ops-cv:AI 硬件端视觉算法推理训练的算子性能调优与实战应用详解
人工智能·算法·cann
仟濹3 小时前
算法打卡day2 (2026-02-07 周五) | 算法: DFS | 3_卡码网99_计数孤岛_DFS
算法·深度优先
驭渊的小故事3 小时前
简单模板笔记
数据结构·笔记·算法
YuTaoShao3 小时前
【LeetCode 每日一题】1653. 使字符串平衡的最少删除次数——(解法一)前后缀分解
算法·leetcode·职场和发展
VT.馒头3 小时前
【力扣】2727. 判断对象是否为空
javascript·数据结构·算法·leetcode·职场和发展
goodluckyaa3 小时前
LCR 006. 两数之和 II - 输入有序数组
算法
孤狼warrior3 小时前
YOLO目标检测 一千字解析yolo最初的摸样 模型下载,数据集构建及模型训练代码
人工智能·python·深度学习·算法·yolo·目标检测·目标跟踪
Σίσυφος19004 小时前
PCL法向量估计 之 RANSAC 平面估计法向量
算法·机器学习·平面