Fisher_Score分数计算

Fisher_Score 计算

自己实现的代码

matlab 复制代码
function W = fsFisher(data)
	%Fisher Score
	% Input:
	%	data: dataset 
	% Output:
	%   W: W(i) represents the Fisher Score of the i-th feature.  

	% numC = max(Y);
	Y = data(:, end); % 提取标签
	X  = data(:, 1:end-1); % 提取样本数据,去掉标签列
	unique_labels = unique(Y); % 获取所有唯一的类别标签
	numC = length(unique_labels); % 类别数量

	[~, numF] = size(X);
	W = zeros(1,numF);

	% statistic for classes
	cIDX = cell(numC,1);
	n_i = zeros(numC,1);
	for j = 1:numC
		%cIDX{j} = find(Y(:)==j);
		cIDX{j} = find(Y(:)==unique_labels(j));
		n_i(j) = length(cIDX{j});
	end

	% calculate score for each features
	for i = 1:numF
		temp1 = 0;
		temp2 = 0;
		f_i = X(:,i);
		u_i = mean(f_i);
		
		for j = 1:numC
			u_cj = mean(f_i(cIDX{j}));
			var_cj = var(f_i(cIDX{j}),1);
			temp1 = temp1 + n_i(j) * (u_cj-u_i)^2;
			temp2 = temp2 + n_i(j) * var_cj;
		end
		% check
		if temp1 == 0
			W(i) = 0;
		else
			if temp2 == 0
				W(i) = 100;
			else
				W(i) = temp1/temp2;
			end
		end
	end
end

matlab代码如下

matlab 复制代码
function [out] = fsFisher(X,Y)
%Fisher Score, use the N var formulation
%   X, the data, each raw is an instance
%   Y, the label in 1 2 3 ... format

numC = max(Y);
[~, numF] = size(X);
out.W = zeros(1,numF);

% statistic for classes
cIDX = cell(numC,1);
n_i = zeros(numC,1);
for j = 1:numC
    cIDX{j} = find(Y(:)==j);
    n_i(j) = length(cIDX{j});
end

% calculate score for each features
for i = 1:numF
    temp1 = 0;
    temp2 = 0;
    f_i = X(:,i);
    u_i = mean(f_i);
    
    for j = 1:numC
        u_cj = mean(f_i(cIDX{j}));
        var_cj = var(f_i(cIDX{j}),1);
        temp1 = temp1 + n_i(j) * (u_cj-u_i)^2;
        temp2 = temp2 + n_i(j) * var_cj;
    end
    
    if temp1 == 0
        out.W(i) = 0;
    else
        if temp2 == 0
            out.W(i) = 100;
        else
            out.W(i) = temp1/temp2;
        end
    end
end

[~, out.fList] = sort(out.W, 'descend');
out.prf = 1;

Bibtex 引用

复制代码
@BOOK{Duda-etal01,
   title = {Pattern Classification},
   publisher = {John Wiley \& Sons, New York},
   year = {2001},
   author = {Duda, R.O. and Hart, P.E. and Stork, D.G.},
   edition = {2},
  }
}

来源:Feature Selection Package - Algorithms - Fisher Score

相关推荐
一条破秋裤13 小时前
一份多光谱数据分析
笔记·数据挖掘·数据分析
cal_14 小时前
数据分析中的拉链表解析
大数据·数据挖掘·数据分析
九章云极AladdinEdu15 小时前
华为昇腾NPU与NVIDIA CUDA生态兼容层开发实录:手写算子自动转换工具链(AST级代码迁移方案)
人工智能·深度学习·opencv·机器学习·华为·数据挖掘·gpu算力
王小王-12316 小时前
基于Python的程序员数据分析与可视化系统的设计与实现
python·数据挖掘·数据分析·招聘数据分析·程序员数据分析·招聘薪资数据分析·智联招聘可视化
Deng94520131416 小时前
基于数据挖掘的课程推荐系统研究
人工智能·数据挖掘·数据预处理·基于用户的协同过滤·文本特征提取
kngines1 天前
【力扣(LeetCode)】数据挖掘面试题0002:当面对实时数据流时您如何设计和实现机器学习模型?
机器学习·数据挖掘·面试题·实时数据
HyperAI超神经1 天前
OmniGen2 多模态推理×自我纠正双引擎,引领图像生成新范式;95 万分类标签!TreeOfLife-200M 解锁物种认知新维度
人工智能·数据挖掘·数据集·图像生成·医疗健康·在线教程·数学代码
DesolateGIS1 天前
数学建模:非线性规划:凸规划问题
数学建模·matlab
Monkey的自我迭代1 天前
Python标准库:时间与随机数全解析
前端·python·数据挖掘
kngines1 天前
【力扣(LeetCode)】数据挖掘面试题0003: 356. 直线镜像
leetcode·数据挖掘·直线镜像·对称轴