【小白量化机器人】环境,提供了全球市场的实时行情和交易接口。我们利用本地大模型进行分析。
一般趋势行情选择趋势策略,震荡行情选择震荡策略。
我们利用爬虫,爬取最近2天的财经新闻。
再利用大模型进行评分。如果新闻平稳偏向看多,就使用趋势交易策略。
一、爬取新闻程序演示代码。
python
import requests
from bs4 import BeautifulSoup
import os
from datetime import datetime, timedelta
import re
import sys
import io
# 设置系统编码为UTF-8
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# 确保文件路径使用UTF-8编码
import locale
locale.setlocale(locale.LC_ALL, 'zh_CN.UTF-8')
def create_directory():
"""创建最新日期目录"""
# 获取当前日期作为目录名
today = datetime.now().strftime('%Y-%m-%d')
directory = os.path.join(os.getcwd(), today)
if not os.path.exists(directory):
os.makedirs(directory)
return directory
def clean_filename(filename):
"""清理文件名,去除非法字符"""
# 去除非法字符
filename = re.sub(r'[<>:/\\|?*]', '', filename)
# 限制文件名长度
if len(filename) > 100:
filename = filename[:100]
return filename
def crawl_financial_news():
"""爬取财经新闻"""
# 爬取新浪财经新闻
url = "https://finance.sina.com.cn/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
# 处理编码,确保正确解析GBK内容
if 'charset' in response.headers:
encoding = response.headers['charset']
else:
# 尝试自动检测编码
encoding = response.apparent_encoding
# 确保使用正确的编码解析内容
response.encoding = encoding
soup = BeautifulSoup(response.text, 'html.parser')
# 创建最新日期目录
directory = create_directory()
# 提取新闻链接
news_links = []
# 查找新闻列表
for link in soup.find_all('a', href=True):
href = link['href']
# 筛选财经新闻链接
if ('/finance/' in href or '/money/' in href) and href.startswith('https://'):
news_links.append(href)
# 去重
news_links = list(set(news_links))
print(f"找到 {len(news_links)} 条新闻链接")
# 计算2天前的日期
two_days_ago = (datetime.now() - timedelta(days=2)).date()
# 爬取每条新闻
saved_count = 0
for i, news_url in enumerate(news_links): # 不限制数量,爬取所有链接
try:
news_response = requests.get(news_url, headers=headers, timeout=10)
news_response.raise_for_status()
# 处理编码,确保正确解析GBK内容
if 'charset' in news_response.headers:
encoding = news_response.headers['charset']
else:
# 尝试自动检测编码
encoding = news_response.apparent_encoding
# 确保使用正确的编码解析内容
news_response.encoding = encoding
news_soup = BeautifulSoup(news_response.text, 'html.parser')
# 提取标题
title = news_soup.find('h1')
if not title:
title = news_soup.find('h2')
if title:
title_text = title.get_text(strip=True)
else:
title_text = f"新闻{i}"
# 提取发布时间
publish_time = None
# 尝试从meta标签提取发布时间
meta_time = news_soup.find('meta', {'name': 'publishdate'})
if meta_time:
publish_time = meta_time.get('content')
# 尝试从时间标签提取
if not publish_time:
time_tags = news_soup.find_all(['time', 'span'], class_=['time', 'pubtime', 'publish_time'])
for tag in time_tags:
time_text = tag.get_text(strip=True)
if re.search(r'\d{4}-\d{2}-\d{2}', time_text):
publish_time = time_text
break
# 尝试从链接中提取日期
if not publish_time:
date_match = re.search(r'/(\d{4}-\d{2}-\d{2})/', news_url)
if date_match:
publish_time = date_match.group(1)
# 检查是否在最近2天内
if publish_time:
try:
# 解析发布日期
if isinstance(publish_time, str):
# 提取日期部分
date_str = re.search(r'\d{4}-\d{2}-\d{2}', publish_time)
if date_str:
publish_date = datetime.strptime(date_str.group(0), '%Y-%m-%d').date()
# 检查是否在最近2天内
if publish_date >= two_days_ago:
# 提取内容
content = []
content_div = news_soup.find('div', class_=['article', 'content', 'main-content'])
if content_div:
paragraphs = content_div.find_all('p')
for p in paragraphs:
text = p.get_text(strip=True)
if text:
content.append(text)
if not content:
# 尝试其他方式提取内容
for p in news_soup.find_all('p'):
text = p.get_text(strip=True)
if len(text) > 50: # 过滤短文本
content.append(text)
# 保存到文件
if title_text and content:
filename = clean_filename(title_text) + '.txt'
file_path = os.path.join(directory, filename)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(f"标题: {title_text}\n")
f.write(f"链接: {news_url}\n")
f.write(f"发布时间: {publish_time}\n")
f.write(f"爬取时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
f.write("内容:\n")
f.write('\n'.join(content))
print(f"已保存: {filename} (发布时间: {publish_time})")
saved_count += 1
except Exception as e:
print(f"解析日期失败: {publish_time}, 错误: {str(e)}")
except Exception as e:
print(f"爬取新闻失败: {news_url}, 错误: {str(e)}")
print(f"爬取完成,共保存 {saved_count} 条最近2天内的新闻")
except Exception as e:
print(f"爬取失败: {str(e)}")
if __name__ == "__main__":
crawl_financial_news()
二、本地大模型分析代码。
python
##连接本地deepseek
##先安装ollama模块:pip install ollama
# ollama run deepseek-coder-v2:latest
import ollama
from ollama import chat
import os
import re
from datetime import datetime, timedelta
# 定义分析规则
规则='''
帮我分析下面新闻,对明天股市有什么影响。对股市多空评分为1-10,6以上看多,5以下看空。只要一个评分,你打多少分?不要思考过程,只要评分。
'''
def extract_score_from_response(response):
"""从大模型响应中提取评分数字"""
# 使用正则表达式匹配1-10的数字
match = re.search(r'\b([1-9]|10)\b', response)
if match:
return int(match.group(1))
return None
def analyze_news_content(content):
"""分析单篇新闻内容"""
# 组合规则和新闻内容
ask = 规则 + ' ' + content
try:
# 调用大模型进行分析
stream = chat(
model='qwen3:8b',
messages=[{'role': 'user', 'content': ask}],
stream=True,
)
# 收集响应内容
response = ''
for chunk in stream:
response += chunk['message']['content']
# 提取评分
score = extract_score_from_response(response)
return score, response.strip()
except Exception as e:
print(f"分析新闻时出错: {e}")
return None, None
def process_news_directory(directory_path):
"""处理新闻目录中的所有txt文件"""
scores = []
file_results = []
# 检查目录是否存在
if not os.path.exists(directory_path):
print(f"目录不存在: {directory_path}")
return None, None
# 获取所有txt文件
txt_files = [f for f in os.listdir(directory_path) if f.endswith('.txt')]
print(f"找到 {len(txt_files)} 个新闻文件")
if not txt_files:
print("目录中没有找到txt文件")
return None, None
# 处理每个文件
for filename in txt_files:
file_path = os.path.join(directory_path, filename)
print(f"\n正在分析: {filename}")
try:
# 读取文件内容
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 分析新闻内容
score, response = analyze_news_content(content)
if score is not None:
scores.append(score)
file_results.append({
'filename': filename,
'score': score,
'response': response
})
print(f"评分: {score}")
else:
print(f"无法提取评分,响应: {response}")
file_results.append({
'filename': filename,
'score': None,
'response': response
})
except Exception as e:
print(f"处理文件 {filename} 时出错: {e}")
file_results.append({
'filename': filename,
'score': None,
'response': f"错误: {e}"
})
return scores, file_results
def calculate_prediction(scores):
"""计算平均评分并给出多空预测"""
if not scores:
return None, "没有有效的评分数据"
avg_score = sum(scores) / len(scores)
if avg_score >= 6:
prediction = "看多"
elif avg_score <= 5:
prediction = "看空"
else:
prediction = "中性"
return avg_score, prediction
def save_results_to_file(file_results, avg_score, prediction, output_file="分析结果.txt"):
"""将分析结果保存到文件"""
try:
with open(output_file, 'w', encoding='utf-8') as f:
# 只写入平均评分
f.write(f"{avg_score:.2f}")
print(f"\n分析结果已保存到: {output_file}")
return True
except Exception as e:
print(f" 保存结果到文件时出错: {e}")
return False
#微:17578755056
def main():
"""主函数"""
print("开始分析新闻对股市的影响...")
# 新闻目录路径
news_directory = datetime.now().strftime('%Y-%m-%d')
# 处理新闻目录
scores, file_results = process_news_directory(news_directory)
if scores:
# 计算平均评分和预测
avg_score, prediction = calculate_prediction(scores)
print("\n" + "="*50)
print("分析结果汇总:")
print("="*50)
# 显示每个文件的分析结果
for result in file_results:
print(f"\n文件: {result['filename']}")
if result['score'] is not None:
print(f"评分: {result['score']}")
else:
print("评分: 无法获取")
if result['response']:
print(f"响应: {result['response'][:100]}...")
print("\n" + "="*50)
print(f"平均评分: {avg_score:.2f}")
print(f"多空预测: {prediction}")
print("="*50)
# 详细解释
if prediction == "看多":
print("\n预测说明: 多数新闻对股市有积极影响,建议关注投资机会")
elif prediction == "看空":
print("\n预测说明: 多数新闻对股市有负面影响,建议谨慎操作")
else:
print("\n预测说明: 新闻影响中性,市场可能维持震荡格局")
# 保存结果到文件
save_results_to_file(file_results, avg_score, prediction)
else:
print("\n未能获取有效的评分数据")
if __name__ == "__main__":
main()
程序分析的最终结果,保存到文件【分析结果.txt】中,后面我们根据这个文件的评分,自动选择合适的交易策略。
超越自己是我的每一步!我的进步就是你的进步!