import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from flask import Flask, render_template, jsonify
import json
初始化nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
初始化分类器和评分模型
classifier = None
scorer = None
初始化分类和评分标准
categories = ['工作', '生活']
score_criteria = {
'工作': {
'完成任务': 10,
'解决问题': 5,
'其他': 1
},
'生活': {
'锻炼': 5,
'学习': 3,
'其他': 1
}
}
初始化基础分数和等级
base_score = 0
level = '初级'
初始化日志文件和评分记录
daily_log = []
scores = []
初始化TF-IDF向量化器
vectorizer = TfidfVectorizer()
初始化词形还原器
lemmatizer = WordNetLemmatizer()
训练分类器
def train_classifier():
global classifier
读取训练数据
training_data = []
training_labels = []
for category in categories:
with open(f'{category}_log.txt', 'r') as file:
activities = file.readlines()
training_data.extend(activities)
training_labels.extend([category] * len(activities))
数据预处理
processed_data = preprocess_text(training_data)
特征提取
features = vectorizer.fit_transform(processed_data).toarray()
训练分类器
classifier = MultinomialNB()
classifier.fit(features, training_labels)
训练评分模型
def train_scorer():
global scorer
读取训练数据
training_data = []
training_scores = []
for category in categories:
with open(f'{category}_log.txt', 'r') as file:
activities = file.readlines()
for activity in activities:
training_data.append(activity)
training_scores.append(score_activity(activity, category))
数据预处理
processed_data = preprocess_text(training_data)
特征提取
features = vectorizer.fit_transform(processed_data).toarray()
训练评分模型
scorer = MultinomialNB()
scorer.fit(features, training_scores)
预处理文本数据
def preprocess_text(data):
processed_data = []
for text in data:
分词
tokens = word_tokenize(text.lower())
去除停用词和标点符号
stop_words = set(stopwords.words('english'))
filtered_tokens = [token for token in tokens if token.isalnum() and token not in stop_words]
词形还原
lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
重新组合为文本
processed_text = ' '.join(lemmatized_tokens)
processed_data.append(processed_text)
return processed_data
智能分类工作日常
def classify_activity(activity):
processed_activity = preprocess_text([activity])
features = vectorizer.transform(processed_activity).toarray()
category = classifier.predict(features)[0]
return category
根据分类和评分标准计算活动得分
def score_activity(activity, category):
score = 0
for word, points in score_criteria[category].items():
if word in activity:
score += points
return score
记录每天的工作日常
def log_daily_activity(activity):
global base_score, level
智能分类
category = classify_activity(activity)
计算得分
score = score_activity(activity, category)
更新基础分数和等级
base_score += score
if base_score >= 100:
level = '高级'
elif base_score >= 50:
level = '中级'
记录日志
daily_log.append((activity, category, score))
更新评分记录
scores.append(score)
创建Flask应用
app = Flask(name)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/simulate')
def simulate():
global base_score, level
模拟每天的活动记录
activities = [
'完成工作任务',
'锻炼身体',
'解决问题',
'学习新知识',
'其他活动'
]
for activity in activities:
log_daily_activity(activity)
返回当前分数和等级
return jsonify({'base_score': base_score, 'level': level})
if name == 'main':
训练分类器和评分模型
train_classifier()
train_scorer()
启动Flask应用
app.run(debug=True)