游戏对战数据分析

下载和数据整合部分已经完毕

源数据和说明链接如下:如有密码 9527

百度网盘 请输入提取码

放一部分代码,还没完全写完

复制代码
import requests
from lxml import etree
from urllib import parse
import json
import time
import math
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import logging
import sys
import os
# 1.获取信息阶段
# 1.1 日志
first_url ="https://score.09game.com/MOBA/BasicDataList?UserID=1477944&GameTypeID=21&CurrentSeason=0&GameSource=0&Time=-1&PageIndex=0&PageSize=13"

# 先将本地数据创建一个文件夹存档
if not os.path.exists('d:/goushi'):
    os.makedirs('d:/goushi')
def create_logger(logger_name='zhanji09_logger'):
    """
    日志功能,记录相关信息
    """
    logger = logging.getLogger(name=logger_name)
    logger.setLevel(logging.INFO)
    logger.propagate=False  # 不向上传递
    # 存储用
    handler_file = logging.FileHandler('d:/goushi/zhanji.log',mode='a',encoding='utf-8')
    format_file = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s|%(thread)d'
                                  ,datefmt='%Y-%m-%d %H:%M:%S')
    handler_file.setLevel(logging.ERROR)
    handler_file.setFormatter(format_file)
    # 输出用
    handler_console = logging.StreamHandler(sys.stdout)
    handler_console.setLevel(logging.WARNING)
    format_console = logging.Formatter('%(message)s|%(asctime)s|%(thread)d')
    handler_console.setFormatter(format_console)
    # 要避免重复添加
    if not logger.handlers:
        logger.addHandler(handler_file)
        logger.addHandler(handler_console)
    return logger

logger = create_logger()

# 1.2 获取总共有多少页,多少条数据
def get_total_page(url):
    """获取总共有多少页,向上取证"""
    resp = requests.get(url)
    data = json.loads(resp.text)
    pagetotal = data['data']['pageTotal']
    pagesize = data['data']['pageSize']
    page_num = math.ceil(pagetotal/pagesize)
    print('总共有%d页共计%d条数据'%(page_num,pagetotal))
    return page_num,pagesize
# page_num,pagesize = get_total_page(first_url)


# 1.3 获取每一页的(全满则13个)g_id
page_data_url = "https://score.09game.com/MOBA/BasicDataList?UserID=1477944&GameTypeID=21&CurrentSeason=0&GameSource=0&Time=-1&PageIndex=0&PageSize=13"
file_path = "d:/goushi/data09_less.txt"
def get_gid_base(url, page_num):
    """
    多线程的基本功能,每一页是先存最早的
    每一页如果满的话,有13个数据
    """
    resp = requests.get(url)
    cur_data = json.loads(resp.text)
    g_id = [x['g_id'] for x in cur_data['data']['listEntity']]
    create_time = [x['create_time'] for x in cur_data['data']['listEntity']]
    with open(file_path, mode='a', encoding='utf-8') as f:
        # 按照最早的到最新的顺序存储
        for a, b in zip(g_id[::-1], create_time[::-1]):
            f.write(f"{a}\t{b}\n")
    print(f"第{page_num}页数据已存")

def multi_thread_get_gid(page_num):
    """由于使用多线程,故每一页的顺序不会完全一致"""
    with ThreadPoolExecutor(max_workers=4) as pool:
        for i in range(page_num - 1, -1, -1):
            url = page_data_url.replace("PageIndex=0", "PageIndex={}".format(i))
            pool.submit(get_gid_base, url, i)

# multi_thread_get_gid(page_num)

# 1.4 获取每局bureau表信息
bureau_url_none = "https://score.09game.com/MOBA/GameBureauMessage?GameTypeID=21&GameID="
def get_bureau_base(url,g_id):
    resp = requests.get(url,timeout=5)
    try:
        if resp.status_code == 200:
            cur_data = json.loads(resp.text)
            g_id = cur_data['data'][0]['g_id']
            win_id = cur_data['data'][0]['win_id']
            with open('d:/goushi/data_09_bureau.txt',mode='a') as f:
                f.write(f"{g_id}\t{win_id}\n")
            print(f"{g_id}的bureau数据已存")
    except Exception as e:
        print(e)
        logger.error(f"{g_id}的bureau数据获取失败")
    # finally:
    #     time.sleep(0.5)

def multi_thread_get_bureau():
    with open(file_path,mode='r') as f:
        datas = f.readlines()
    with ThreadPoolExecutor(max_workers=8) as pool:
        for i in datas:
            g_id = i.split("\t")[0]
            url = bureau_url_none.replace("GameID=","GameID={}".format(g_id))
            pool.submit(get_bureau_base, url,g_id)

# multi_thread_get_bureau()
# 1.5 验证两份数据是否等长
def check_data_length():
    df_1 = pd.read_csv(file_path, header=None, sep='\t')
    df_2 = pd.read_csv('d:/goushi/data_09_bureau.txt', header=None, sep='\t')
    if len(df_1)>len(df_2):
        print("bureau数据存在缺失")
        merge_df = pd.merge(df_1, df_2,how='left',left_on=0,right_on=0)
        missed_ids = merge_df[merge_df['1_y'].isnull()][0].values
        for i in missed_ids:
            url = bureau_url_none.replace("GameID=","GameID={}".format(i))
            get_bureau_base(url, i)
    else:
        print("bureau数据完整",f"总共{len(df_1)}条数据")

# check_data_length()

# 1.6获取每局详细数据
correlation_url_none = "https://score.09game.com/MOBA/CorrelationPlayerMilitaryExploit?GameTypeID=21&GameID=&GameSource=0&CurrentSeason=0"


def get_correlation_base(url, g_id):
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code == 200:
            cur_data = json.loads(resp.text)
            with open('d:/goushi/data_09_correlation.txt', mode='a') as f:
                for info in cur_data['data']:
                    user_id = str(info['user_id'])
                    user_name = info['user_name']
                    hero_id = info['hero_id']
                    hero_name = info['hero_name']
                    hero_level = int(info['hero_level'])
                    kill_count = int(info['kill_count'])
                    killed_count = int(info['killed_count'])
                    assist_count = int(info['assist_count'])
                    title = str(info['title'])
                    dust_count = int(info['dust_count'])
                    eye_count = int(info['eye_count'])
                    gem_count = int(info['gem_count'])
                    smoke_count = int(info['smoke_count'])
                    creep_kill = int(info['creep_kill'])
                    creep_denies = int(info['creep_denies'])
                    total_money = int(info['total_money'])
                    hurt_value = int(info['hurt_value'])
                    team_id = str(info['team_id'])
                    neutral_kill = int(info['neutral_kill'])
                    #     items = info['items'] # 装备信息,暂不存储
                    #     skills = info['skills'][:-3] # 去掉最后两个0,暂不存储
                    # 此时一定要将g_id写入,用于两表合并
                    f.write(
                        f"{g_id}\t{user_id}\t{user_name}\t{hero_id}\t{hero_name}\t{hero_level}\t{kill_count}\t{killed_count}\t{assist_count}\t{title}\t{dust_count}\t{eye_count}\t{gem_count}\t{smoke_count}\t{creep_kill}\t{creep_denies}\t{total_money}\t{hurt_value}\t{team_id}\t{neutral_kill}\n")
            print(f"{g_id}的correlation数据已存")
    except Exception as e:
        print(e)
        logger.error(f"{g_id}的correlation数据获取失败")


def multi_thread_get_correlation():
    with open('d:/goushi/data_09_bureau.txt', mode='r') as f:
        datas = f.readlines()
    with ThreadPoolExecutor(max_workers=6) as pool:
        for i in datas:
            g_id = i.split("\t")[0]
            url = correlation_url_none.replace("GameID=", "GameID={}".format(g_id))
            pool.submit(get_correlation_base, url, g_id)

# multi_thread_get_correlation()

# 1.7 补上获取失败的,实测发现偶尔会有数据获取失败的
def check_data_failure():
    missed_datas = pd.read_csv("D:/goushi/zhanji.log"
                               , sep='|'
                               , names=['time', 'type', 'message', 'other'])

    # 用于指定时间范围,需要先将时间列设为索引
    missed_datas['time'] = pd.to_datetime(missed_datas['time'])
    missed_datas.set_index(missed_datas['time'])
    # 输入比较时间范围
    need = missed_datas[missed_datas['time'] > '2025-03-10 18:40:00']
    #     missed_ids = missed_datas.loc[:,'message'].map(lambda x:x.split('的')[0])
    missed_ids = need.loc[:, 'message'].map(lambda x: x.split('的')[0])
    if len(missed_ids) > 0:
        print('存在下载失败的数据%d个' % len(missed_ids))
    for g_id in missed_ids:
        url = correlation_url_none.replace("GameID=", "GameID={}".format(g_id))
        get_correlation_base(url, g_id)
相关推荐
xiezhr3 天前
米哈游36岁程序员被曝复工当晚猝死出租屋内
游戏·程序员·游戏开发
爱搞虚幻的阿恺6 天前
Niagara粒子系统-超炫酷的闪电特效(加餐 纸牌螺旋上升效果)
游戏·游戏引擎
智算菩萨7 天前
儿童游乐空间的双维建构:室内淘气堡与室外亲子乐园的发展学理、功能分野与协同育人机制研究
游戏·游戏策划
marteker7 天前
房地产市场平台Zillow与《魔兽世界》合作展示游戏内房屋
游戏
wanhengidc7 天前
云手机 打造云端算力
运维·服务器·网络·游戏·智能手机
henry1010107 天前
DeepSeek生成的HTML5小游戏 -- 黑8台球
前端·javascript·css·游戏·html
yuanmenghao7 天前
从零开始:使用 Claude Code 打造字母消除游戏
游戏·glm·claudecode
阿甘编程点滴8 天前
2026年推荐以下5款游戏直播提词器
游戏
PieroPc8 天前
HTML5 Canvas 平台跳跃游戏
前端·游戏·html5
Swift社区8 天前
LeetCode 390 消除游戏 - Swift 题解
leetcode·游戏·swift