一 tensorflow官方实现
tensorflow的官方实现已经是V2版本
python
class Cross(tf.keras.layers.Layer):
"""Cross Layer in Deep & Cross Network to learn explicit feature interactions.
Args:
projection_dim: int,低秩矩阵的维度,应该小于input_dim/2, 官方建议input_dim/4
diag_scale: float,增加交互权重矩阵对角线的缩放因子,主要用于增强低秩分解的稳定性。
use_bias: 决定是否在交互计算中加入偏置项。
preactivation: 在权重矩阵和输入特征点乘前使用的激活函数。
kernel_initializer: Initializer to use on the kernel matrix.
bias_initializer: Initializer to use on the bias vector.
kernel_regularizer: Regularizer to use on the kernel matrix.
bias_regularizer: Regularizer to use on bias vector.
Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs.
Output shape: A single (batch_size, `input_dim`) dimensional output.
"""
def __init__(
self,
projection_dim: Optional[int] = None,
diag_scale: Optional[float] = 0.0,
use_bias: bool = True,
preactivation: Optional[Union[str, tf.keras.layers.Activation]] = None,
kernel_initializer: Union[
Text, tf.keras.initializers.Initializer] = "truncated_normal",
bias_initializer: Union[Text,
tf.keras.initializers.Initializer] = "zeros",
kernel_regularizer: Union[Text, None,
tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Union[Text, None,
tf.keras.regularizers.Regularizer] = None,
**kwargs):
super(Cross, self).__init__(**kwargs)
self._projection_dim = projection_dim
self._diag_scale = diag_scale
self._use_bias = use_bias
self._preactivation = tf.keras.activations.get(preactivation)
self._kernel_initializer = tf.keras.initializers.get(kernel_initializer)
self._bias_initializer = tf.keras.initializers.get(bias_initializer)
self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
self._input_dim = None
self._supports_masking = True
if self._diag_scale < 0: # pytype: disable=unsupported-operands
raise ValueError(
"`diag_scale` should be non-negative. Got `diag_scale` = {}".format(
self._diag_scale))
def build(self, input_shape):
# 根据输入特征的维度动态初始化交互权重矩阵
last_dim = input_shape[-1] # 获取输入特征维度 input-dim
if self._projection_dim is None:
# 全参数模式,Dense 层会负责计算 𝑊⋅𝑥
self._dense = tf.keras.layers.Dense(
last_dim, # 输入等于输出
kernel_initializer=_clone_initializer(self._kernel_initializer), # 初始化权重方式
bias_initializer=self._bias_initializer, # 偏置初始化方式
kernel_regularizer=self._kernel_regularizer, # 权重正则
bias_regularizer=self._bias_regularizer, # 偏置正则
use_bias=self._use_bias,
dtype=self.dtype,
activation=self._preactivation, # 激活函数
)
else:
# 低秩分解模式:U 负责将输入降维到 r 维,V 再将降维结果恢复到原始维度
self._dense_u = tf.keras.layers.Dense(
self._projection_dim,
kernel_initializer=_clone_initializer(self._kernel_initializer),
kernel_regularizer=self._kernel_regularizer,
use_bias=False,
dtype=self.dtype,
)
self._dense_v = tf.keras.layers.Dense(
last_dim,
kernel_initializer=_clone_initializer(self._kernel_initializer),
bias_initializer=self._bias_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
use_bias=self._use_bias,
dtype=self.dtype,
activation=self._preactivation,
)
self.built = True
def call(self, x0: tf.Tensor, x: Optional[tf.Tensor] = None) -> tf.Tensor:
"""Computes the feature cross.
Args:
x0: The input tensor
x: Optional second input tensor. If provided, the layer will compute
crosses between x0 and x; if not provided, the layer will compute
crosses between x0 and itself.
Returns:
Tensor of crosses.
"""
if not self.built:
self.build(x0.shape)
if x is None: # 如果不输入待交叉x,那么就是自己和自己交叉
x = x0
if x0.shape[-1] != x.shape[-1]:
raise ValueError("`x0` and `x` dimension mismatch! Got `x0` dimension {}, and x "
"dimension {}. This case is not supported yet.".format(x0.shape[-1], x.shape[-1]))
# W * x
if self._projection_dim is None:
prod_output = self._dense(x)
else:
prod_output = self._dense_v(self._dense_u(x))
# 确保计算结果与层的数据类型(compute_dtype)一致
prod_output = tf.cast(prod_output, self.compute_dtype)
# 添加对角线缩放
if self._diag_scale:
prod_output = prod_output + self._diag_scale * x
return x0 * prod_output + x
class DCN(tfrs.Model):
def __init__(self, use_cross_layer, deep_layer_sizes, projection_dim=None):
super().__init__()
self.embedding_dimension = 32 # embedding维度
str_features = ["movie_id", "user_id", "user_zip_code",
"user_occupation_text"]
int_features = ["user_gender", "bucketized_user_age"]
self._all_features = str_features + int_features
self._embeddings = {}
# Compute embeddings for string features.
# 对于类别特征进行embedding编码
for feature_name in str_features:
vocabulary = vocabularies[feature_name]
self._embeddings[feature_name] = tf.keras.Sequential(
[tf.keras.layers.StringLookup(vocabulary=vocabulary, mask_token=None),
tf.keras.layers.Embedding(len(vocabulary) + 1,self.embedding_dimension)
])
# Compute embeddings for int features.
# 对于数字类型进行编码,这里int,所以也可以embedding,如果是float,这么做就不ok了
for feature_name in int_features:
vocabulary = vocabularies[feature_name]
self._embeddings[feature_name] = tf.keras.Sequential(
[tf.keras.layers.IntegerLookup(vocabulary=vocabulary, mask_value=None),
tf.keras.layers.Embedding(len(vocabulary) + 1, self.embedding_dimension)
])
# 论文中的cross模块
if use_cross_layer:
self._cross_layer = tfrs.layers.dcn.Cross(
projection_dim=projection_dim,
kernel_initializer="glorot_uniform")
else:
self._cross_layer = None
# DNN模块
self._deep_layers = [tf.keras.layers.Dense(layer_size, activation="relu")
for layer_size in deep_layer_sizes]
self._logit_layer = tf.keras.layers.Dense(1)
self.task = tfrs.tasks.Ranking(
loss=tf.keras.losses.MeanSquaredError(),
metrics=[tf.keras.metrics.RootMeanSquaredError("RMSE")]
)
def call(self, features):
"""
官方实现,这里的来源是DCN-V2,其中探讨了串联和并联以及mixed
"""
# Concatenate embeddings
embeddings = []
for feature_name in self._all_features:
embedding_fn = self._embeddings[feature_name]
embeddings.append(embedding_fn(features[feature_name]))
x = tf.concat(embeddings, axis=1)
# Build Cross Network
if self._cross_layer is not None:
x = self._cross_layer(x)
# Build Deep Network, 串联模式
for deep_layer in self._deep_layers:
x = deep_layer(x)
return self._logit_layer(x)
def compute_loss(self, features, training=False):
labels = features.pop("user_rating")
scores = self(features)
return self.task(
labels=labels,
predictions=scores,
)
# 使用
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()
def run_models(use_cross_layer, deep_layer_sizes, projection_dim=None, num_runs=5):
models = []
rmses = []
for i in range(num_runs):
model = DCN(use_cross_layer=use_cross_layer,
deep_layer_sizes=deep_layer_sizes,
projection_dim=projection_dim)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate))
models.append(model)
model.fit(cached_train, epochs=epochs, verbose=False)
metrics = model.evaluate(cached_test, return_dict=True)
rmses.append(metrics["RMSE"])
mean, stdv = np.average(rmses), np.std(rmses)
return {"model": models, "mean": mean, "stdv": stdv}
二. torch实现
代码摘录于deepctr-torch
python
import torch
import torch.nn as nn
from .basemodel import BaseModel
from ..inputs import combined_dnn_input
from ..layers import CrossNet, DNN
class CrossNet(nn.Module):
"""The Cross Network part of Deep&Cross Network model,which leans both low and high degree cross feature.
Input shape
- 2D tensor with shape: ``(batch_size, units)``.
Output shape
- 2D tensor with shape: ``(batch_size, units)``.
Arguments
- in_features : Positive integer, dimensionality of input features.
- input_feature_num: Positive integer, shape(Input tensor)[-1]
- layer_num: Positive integer, the cross layer number
- parameterization: string, ``"vector"``or ``"matrix"`` , way to parameterize the cross network.
- l2_reg: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix
- seed: A Python integer to use as random seed.
"""
def __init__(self, in_features, layer_num=2, parameterization='vector', seed=1024, device='cpu'):
super(CrossNet, self).__init__()
self.layer_num = layer_num
self.parameterization = parameterization
if self.parameterization == 'vector':
# weight in DCN. (in_features, 1)
self.kernels = nn.Parameter(torch.Tensor(self.layer_num, in_features, 1)) # 初始化了向量权重参数
elif self.parameterization == 'matrix':
# weight matrix in DCN-M. (in_features, in_features)
self.kernels = nn.Parameter(torch.Tensor(self.layer_num, in_features, in_features)) # 初始化矩阵全参数
else: # error
raise ValueError("parameterization should be 'vector' or 'matrix'")
self.bias = nn.Parameter(torch.Tensor(self.layer_num, in_features, 1))
for i in range(self.kernels.shape[0]):
nn.init.xavier_normal_(self.kernels[i])
for i in range(self.bias.shape[0]):
nn.init.zeros_(self.bias[i])
self.to(device)
def forward(self, inputs):
# parameterization='vector': W 的形状是 (in_features, 1)。
# parameterization='matrix': W 的形状是 (in_features, in_features)
x_0 = inputs.unsqueeze(2) # 在最后增加了一维矩阵乘法或点积运算,这样才能保证,𝑥0⋅𝑊的形状是合法的
x_l = x_0 # 这里和tf实现有差异,这里没有考虑待交叉输入,直接就是输入特征自己进行多阶显式交叉
for i in range(self.layer_num):
# Vector 并不是完全交叉。这种设计实际上只是在原始特征的每一维上应用单一权重,类似于逐特征的线性加权,没有显式建模特征间的交互。它属于一种低阶的交叉形式,可能更适合捕捉简单的特征影响
if self.parameterization == 'vector':
# x_l (batch_size, in_feats, 1), self.kernels[i] (in_feats, 1)
# 通过 torch.tensordot,选择 x_l 的第 1 维度 (in_feats) 和 self.kernels[i] 的第 0 维度 (in_feats)
# 对 in_feats 维进行点积计算,消去 in_feats 维度
# 最终保留 x_l 的第 0 维 (batch_size) 和第2维 (1),以及 self.kernels[i] 的第1维 (1)
# 得到结果形状为 (batch_size, 1, 1)
# 这个就是给X_l 做了一个加权平均结果,得到一个权重
xl_w = torch.tensordot(x_l, self.kernels[i], dims=([1], [0]))
# 给所有的 x_0 (bs, in_feats, 1) 的特征加权
# 通过矩阵乘法,将 x_0 的每个特征与权重 xl_w (bs, 1, 1) 相乘
# 计算完成后,结果保留了 x_0 的特征维度 (in_features),最终得到 (bs, in_feats, 1)
dot_ = torch.matmul(x_0, xl_w)
x_l = dot_ + self.bias[i] + x_l # (bs, in_feats, 1)
elif self.parameterization == 'matrix': # 这个才是完全交叉
xl_w = torch.matmul(self.kernels[i], x_l) # W * xi (bs, in_features, 1)
dot_ = xl_w + self.bias[i] # W * xi + b
x_l = x_0 * dot_ + x_l # x0 · (W * xi + b) + xl Hadamard-product
else: # error
raise ValueError("parameterization should be 'vector' or 'matrix'")
x_l = torch.squeeze(x_l, dim=2) # 把计算用的最后一维再抹掉
return x_l
class DCN(BaseModel):
"""Instantiates the Deep&Cross Network architecture. Including DCN-V (parameterization='vector')
and DCN-M (parameterization='matrix').
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param cross_num: positive integet,cross layer number
:param cross_parameterization: str, ``"vector"`` or ``"matrix"``, how to parameterize the cross network.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_cross: float. L2 regularizer strength applied to cross net
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param init_std: float,to use as the initialize std of embedding vector
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param device: str, ``"cpu"`` or ``"cuda:0"``
:param gpus: list of int or torch.device for multiple gpus. If None, run on `device`. `gpus[0]` should be the same gpu with `device`.
:return: A PyTorch model instance.
"""
def __init__(self, linear_feature_columns,
dnn_feature_columns,
cross_num=2,
cross_parameterization='vector',
dnn_hidden_units=(128, 128),
l2_reg_linear=0.00001,
l2_reg_embedding=0.00001,
l2_reg_cross=0.00001,
l2_reg_dnn=0,
init_std=0.0001,
seed=1024,
dnn_dropout=0,
dnn_activation='relu',
dnn_use_bn=False,
task='binary', device='cpu', gpus=None):
super(DCN, self).__init__(linear_feature_columns=linear_feature_columns,
dnn_feature_columns=dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding,
init_std=init_std, seed=seed, task=task,
device=device, gpus=gpus)
self.dnn_hidden_units = dnn_hidden_units # DNN 层数
self.cross_num = cross_num # 交叉层数(交叉阶数)
self.dnn = DNN(self.compute_input_dim(dnn_feature_columns),
dnn_hidden_units,
activation=dnn_activation,
use_bn=dnn_use_bn,
l2_reg=l2_reg_dnn,
dropout_rate=dnn_dropout,
init_std=init_std,
device=device)
if len(self.dnn_hidden_units) > 0 and self.cross_num > 0:
dnn_linear_in_feature = self.compute_input_dim(dnn_feature_columns) + dnn_hidden_units[-1]
elif len(self.dnn_hidden_units) > 0:
dnn_linear_in_feature = dnn_hidden_units[-1]
elif self.cross_num > 0:
dnn_linear_in_feature = self.compute_input_dim(dnn_feature_columns)
self.dnn_linear = nn.Linear(dnn_linear_in_feature, 1, bias=False).to(device)
# 显式的交叉网络
self.crossnet = CrossNet(in_features=self.compute_input_dim(dnn_feature_columns),
layer_num=cross_num,
parameterization=cross_parameterization,
device=device)
self.add_regularization_weight(
filter(lambda x: 'weight' in x[0] and 'bn' not in x[0],
self.dnn.named_parameters()),
l2=l2_reg_dnn)
self.add_regularization_weight(self.dnn_linear.weight, l2=l2_reg_linear)
self.add_regularization_weight(self.crossnet.kernels, l2=l2_reg_cross)
self.to(device)
def forward(self, X):
logit = self.linear_model(X)
sparse_embedding_list, dense_value_list = self.input_from_feature_columns(X,
self.dnn_feature_columns,
self.embedding_dict)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
if len(self.dnn_hidden_units) > 0 and self.cross_num > 0: # Deep & Cross
deep_out = self.dnn(dnn_input)
cross_out = self.crossnet(dnn_input)
stack_out = torch.cat((cross_out, deep_out), dim=-1)
logit += self.dnn_linear(stack_out)
elif len(self.dnn_hidden_units) > 0: # Only Deep
deep_out = self.dnn(dnn_input)
logit += self.dnn_linear(deep_out)
elif self.cross_num > 0: # Only Cross
cross_out = self.crossnet(dnn_input)
logit += self.dnn_linear(cross_out)
y_pred = self.out(logit)
return y_pred
cross 和deep串并联比较
对比维度 | 串联结构 | 并联结构 |
---|---|---|
设计复杂度 | 简单,直观,易实现 | 较复杂,需要对特征维度对齐和拼接有更多设计 |
特征交互建模 | 逐层提取,显式交互优先 | 并行建模,显式和隐式交互并重 |
计算效率 | 更高效,计算开销小 | 计算开销大,特别是高维稀疏特征 |
特征信息保留 | 特征在 Cross Network 后可能丢失部分信息 | 输入特征直接进入两条路径,信息无损 |
模型表现 | 适合低阶显式交互为主的任务 | 适合需要复杂高阶交互的任务 |
适用数据规模 | 小规模特征或低维度特征 | 大规模高维稀疏特征 |
鲁棒性 | 难以避免特征交互部分对后续网络的影响 | 路径独立,干扰小,更鲁棒 |
Reference:
1. DCN-V2论文
2. DCN论文地址
3.视频介绍-wangshusheng
4. tensorflow实现-官方
5. tensorflow实现-官方
6. pytorch实现,deepctr-torch
7. torchrec实现