typedef struct s_NBFeatureNode
{
//特征取值
int nbf_val;
//特征值概率P(Fn|C)
double nbf_pfc;
//特征值出现的个数
int nbf_count;
//下一个特征取值
struct s_NBFeatureNode *next;
} s_NBFeatureNode;
typedef struct s_NBFeature
{
struct s_NBFeatureNode *header;
} s_NBFeature;
typedef struct s_NBayes
{
//样本分类取值
int y;
//样本分类概率
double pc;
//所有特征列表
s_NBFeature *nbfeatures;
} s_NBayes;
s_NBayes *nbayes = NULL;
int nbayes_count = 0;
int nbayes_countfeature = 0;
//计算特征值出现于某分类中的概率
int nb_feature_pc_insert(s_NBFeature *nbf, int x)
{
if (nbf == NULL)
{
return -1;
}
//如果没有取值则作为头节点
if (nbf->header == NULL)
{
s_NBFeatureNode *pnew = malloc(sizeof(s_NBFeatureNode));
//概率值
pnew->nbf_pfc = 0;
//特征取值
pnew->nbf_val = x;
//出现次数
pnew->nbf_count = 1;
pnew->next = NULL;
nbf->header = pnew;
return 0;
}
s_NBFeatureNode *p = nbf->header;
//如果特征值相同
if (p->nbf_val == x)
{
//出现次数加1
p->nbf_count++;
return 0;
}
while (p->next != NULL)
{
//如果特征值相同
if (p->next->nbf_val == x)
{
//出现次数加1
p->next->nbf_count++;
return 0;
}
p = p->next;
}
//没有找到特征值,创建新节点到链表尾
s_NBFeatureNode *pnew = malloc(sizeof(s_NBFeatureNode));
//概率值
pnew->nbf_pfc = 0;
//特征值
pnew->nbf_val = x;
//出现次数
pnew->nbf_count = 1;
pnew->next = NULL;
p->next = pnew;
return 0;
}
//为每个分类的特征计算不同取值的概率
int nb_feature_pc(s_NBFeature *nbf, int y, int ycount, int *sample_x, int csample, int cfeature, int feature)
{
if (nbf == NULL)
{
return -1;
}
if (sample_x == NULL)
{
return -1;
}
//在所有训练样本中计算
for (int i = 0; i < csample; i++)
{
//找到分类值相同的样本
if (sample_x[i * cfeature + (cfeature - 1)] == y)
{
//计算此取值的个数
int x = sample_x[i * cfeature + feature];
nb_feature_pc_insert(nbf, x);
}
}
//通过取值的个数计算概率
s_NBFeatureNode *p = nbf->header;
while (p != NULL)
{
//计算概率
p->nbf_pfc = (double) p->nbf_count / (double) ycount;
p = p->next;
}
return 0;
}
//相互贝叶斯分类器
int nb_probability(int *sample_x, int csample, int cfeature)
{
if (sample_x == NULL)
{
return -1;
}
//计算每一种取值有多少个样本
s_List list;
list_init(&list);
for (int i = 0; i < csample; i++)
{
list_insert(&list, sample_x[i * cfeature + (cfeature - 1)]);
}
s_ListNode *p = list.header;
//计算出有多少种分类
while (p != NULL)
{
nbayes_count++;
p = p->next;
}
//特征数
nbayes_countfeature = cfeature;
//多种分类取值
nbayes = malloc(sizeof(s_NBayes) * nbayes_count);
//计算每种分类概率P(C)
p = list.header;
for (int i = 0; i < nbayes_count && p != NULL; i++)
{
//分类取值
nbayes[i].y = p->key;
//概率P(C)为相同取值样本数除以样本总数
nbayes[i].pc = (double) p->count / (double) csample;
//构建每一个分类的特征取值概率
nbayes[i].nbfeatures = malloc(sizeof(s_NBFeature) * (cfeature - 1));
//计算此分类的每一个特征的每一种取值数及概率
for (int j = 0; j < cfeature - 1; j++)
{
nbayes[i].nbfeatures[j].header = NULL;
nb_feature_pc(&nbayes[i].nbfeatures[j], p->key, p->count, sample_x, csample, cfeature, j);
}
p = p->next;
}
list_destroy(&list);
return 0;
}
//释放资源内存
int nb_distory()
{
if (nbayes == NULL)
{
return -1;
}
for (int i = 0; i < nbayes_count; i++)
{
if (nbayes[i].nbfeatures != NULL)
{
for (int j = 0; j < nbayes_countfeature - 1; j++)
{
if (nbayes[i].nbfeatures[j].header != NULL)
{
s_NBFeatureNode *p = nbayes[i].nbfeatures[j].header;
while (p != NULL)
{
s_NBFeatureNode *pdel = p;
p = p->next;
free(pdel);
}
}
}
free(nbayes[i].nbfeatures);
}
}
free(nbayes);
}
//计算目标样本的概率
double nb_probability_px(int *x, int cfeature)
{
if (x == NULL)
{
return -1;
}
//从所有分类中找出目标分类,并计算概率
for (int i = 0; i < nbayes_count; i++)
{
//计算分类概率
if (x[cfeature - 1] == nbayes[i].y)
{
//分类概率
double px = nbayes[i].pc;
//处理每一个特征
for (int j = 0; j < cfeature - 1; j++)
{
//处理特征的某一个取值
s_NBFeatureNode *p = nbayes[i].nbfeatures[j].header;
int find = 0;
while (p != NULL)
{
//找出特征值并计算概率
if (p->nbf_val == x[j])
{
px *= p->nbf_pfc;
find = 1;
break;
}
p = p->next;
}
//如果没找到概率为0
if (!find)
{
px *= 0;
}
}
return px;
}
}
return -1;
}
Copyright © 2015-2023 问渠网 辽ICP备15013245号