202403-02-相似度计算 csp认证

其实这个问题就是求两篇文章的词汇的交集和并集，首先一说到并集，我就想到了set集合数据结构，set中的元素必须唯一。
STL之set的基本使用--博客参考

所以将两个文章的词汇全部加入set中，并求出set的大小，即为并集的大小。

cpp 复制代码

#include <iostream>
#include <string>
#include <set>

using namespace std;

void toupper(string &str)
{
    for (int i = 0; i < str.size(); i++)
    {
        if (str[i] >= 'a' && str[i] <= 'z')
        {
            str[i] = str[i] - ('a' - 'A');
        }
    }
}

int main() {
    int n, m;
    cin >> n >> m;
    
    string word;
    set<string> first_set;  // 存储第一篇文章的单词
    set<string> union_set;  // 存储并集
    
    // 读取第一篇文章
    for(int i = 0; i < n; i++){
        cin >> word;
        toupper(word);
        first_set.insert(word);
        union_set.insert(word);
    } 
    
    int intersection = 0;  // 交集数量
    
    // 读取第二篇文章
    for(int i = 0; i < m; i++){
        cin >> word;
        toupper(word);
        
        // 判断是否在第一篇文章中出现过
        if(first_set.find(word) != first_set.end()){
            intersection++;
            first_set.erase(first_set.find(word)); // 在第一篇文章的set中删除，这样第二篇文章中出现 连续两个the的时候只会统计一次 
        }
        
        // 加入并集
        union_set.insert(word);
    } 
    
    // 输出结果
    cout << "交集数量: " << intersection << endl;
    cout << "并集数量: " << union_set.size() << endl;
    
    system("pause");  // 仅用于调试环境，提交代码时建议移除
    return 0;
}

但是我一开始选用的是unordered_map。。。我也不知道为什么、

cpp 复制代码

#include <iostream>
#include <string>
#include <unordered_map>
using namespace std;
void toUpperCase(string &str)
{
    for (int i = 0; i < str.size(); i++)
    {
        if (char(str[i]) >= 'a' && char(str[i])  'z')
        {
            str[i] = str[i] - (char('a') - 'A');
        }
    }
}

void PrintMap(const unordered_map<string, int>& rd)
{
	cout << "------------------" << endl;
    for (auto i : word)
    {
        cout << i.first << " " << i.second << dl;
    }
    
}
int main()
{
    int n, m; // 两篇文章的单词个数
    cin >> n >> m;
    string word;
    unordered_map<string, int> nword;
    unordered_map<string, int> mword;
    
    unordered_map<string, int> mixed;
    for (int i = 0; i < n; i++)
    {
        cin >> word;
        toUpperCase(word);
        nword[word] = 1;
        mixed[word]++;
    }
    
    int sum = 0; // 并集数量 
    for (int i = 0; i < m; i++)
    {
        cin >> word;
        toUpperCase(word);
        mixed[word]++;
        if(nword[word] > 0 && mword[word] == 0){ // 如果在第一篇文章已经存在 而且是第二篇文章第一次读取到 
        	sum++;
		}
		mword[word] = 1;
    }
    PrintMap(nword), PrintMap(mword);
    
    PrintMap(mixed);
    cout << sum << endl;
    cout << mixed.size() << endl;
    
    system("pause");
    return 0;
}

//更加节省空间的方法 
#include <iostream>
#include <string>
#include <unordered_map>
using namespace std;
void toUpperCase(string &str)
{
    for (int i = 0; i < str.size(); i++)
    {
        if (char(str[i]) >= 'a' && char(str[i]) <= 'z')
        {
            str[i] = str[i] - (char('a') - 'A');
        }
    }
}

void PrintMap(const unordered_map<string, int>& word)
{
	cout << "------------------" << endl;
    for (auto i : word)
    {
        cout << i.first << " " << i.second << endl;
    }
    
}
int main()
{
    int n, m; // 两篇文章的单词个数
    cin >> n >> m;
    string word;    
    unordered_map<string, int> mixed;
    for (int i = 0; i < n; i++)
    {
        cin >> word;
        toUpperCase(word);
        mixed[word] = 1; // 表示在两篇文章中第一篇出现 
    }
    
    int sum = 0; // 并集数量 
    for (int i = 0; i < m; i++)
    {
        cin >> word;
        toUpperCase(word);
        if(mixed[word] == 1){
        	sum++;
        	mixed[word] = 2; // 表现在第二篇出现 
		} 
    }
    
    PrintMap(mixed);
    cout << sum << endl;
    cout << mixed.size() << endl;
    
    return 0;
}