游戏对战数据分析

下载和数据整合部分已经完毕

源数据和说明链接如下:如有密码 9527

百度网盘 请输入提取码

放一部分代码,还没完全写完

复制代码
import requests
from lxml import etree
from urllib import parse
import json
import time
import math
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import logging
import sys
import os
# 1.获取信息阶段
# 1.1 日志
first_url ="https://score.09game.com/MOBA/BasicDataList?UserID=1477944&GameTypeID=21&CurrentSeason=0&GameSource=0&Time=-1&PageIndex=0&PageSize=13"

# 先将本地数据创建一个文件夹存档
if not os.path.exists('d:/goushi'):
    os.makedirs('d:/goushi')
def create_logger(logger_name='zhanji09_logger'):
    """
    日志功能,记录相关信息
    """
    logger = logging.getLogger(name=logger_name)
    logger.setLevel(logging.INFO)
    logger.propagate=False  # 不向上传递
    # 存储用
    handler_file = logging.FileHandler('d:/goushi/zhanji.log',mode='a',encoding='utf-8')
    format_file = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s|%(thread)d'
                                  ,datefmt='%Y-%m-%d %H:%M:%S')
    handler_file.setLevel(logging.ERROR)
    handler_file.setFormatter(format_file)
    # 输出用
    handler_console = logging.StreamHandler(sys.stdout)
    handler_console.setLevel(logging.WARNING)
    format_console = logging.Formatter('%(message)s|%(asctime)s|%(thread)d')
    handler_console.setFormatter(format_console)
    # 要避免重复添加
    if not logger.handlers:
        logger.addHandler(handler_file)
        logger.addHandler(handler_console)
    return logger

logger = create_logger()

# 1.2 获取总共有多少页,多少条数据
def get_total_page(url):
    """获取总共有多少页,向上取证"""
    resp = requests.get(url)
    data = json.loads(resp.text)
    pagetotal = data['data']['pageTotal']
    pagesize = data['data']['pageSize']
    page_num = math.ceil(pagetotal/pagesize)
    print('总共有%d页共计%d条数据'%(page_num,pagetotal))
    return page_num,pagesize
# page_num,pagesize = get_total_page(first_url)


# 1.3 获取每一页的(全满则13个)g_id
page_data_url = "https://score.09game.com/MOBA/BasicDataList?UserID=1477944&GameTypeID=21&CurrentSeason=0&GameSource=0&Time=-1&PageIndex=0&PageSize=13"
file_path = "d:/goushi/data09_less.txt"
def get_gid_base(url, page_num):
    """
    多线程的基本功能,每一页是先存最早的
    每一页如果满的话,有13个数据
    """
    resp = requests.get(url)
    cur_data = json.loads(resp.text)
    g_id = [x['g_id'] for x in cur_data['data']['listEntity']]
    create_time = [x['create_time'] for x in cur_data['data']['listEntity']]
    with open(file_path, mode='a', encoding='utf-8') as f:
        # 按照最早的到最新的顺序存储
        for a, b in zip(g_id[::-1], create_time[::-1]):
            f.write(f"{a}\t{b}\n")
    print(f"第{page_num}页数据已存")

def multi_thread_get_gid(page_num):
    """由于使用多线程,故每一页的顺序不会完全一致"""
    with ThreadPoolExecutor(max_workers=4) as pool:
        for i in range(page_num - 1, -1, -1):
            url = page_data_url.replace("PageIndex=0", "PageIndex={}".format(i))
            pool.submit(get_gid_base, url, i)

# multi_thread_get_gid(page_num)

# 1.4 获取每局bureau表信息
bureau_url_none = "https://score.09game.com/MOBA/GameBureauMessage?GameTypeID=21&GameID="
def get_bureau_base(url,g_id):
    resp = requests.get(url,timeout=5)
    try:
        if resp.status_code == 200:
            cur_data = json.loads(resp.text)
            g_id = cur_data['data'][0]['g_id']
            win_id = cur_data['data'][0]['win_id']
            with open('d:/goushi/data_09_bureau.txt',mode='a') as f:
                f.write(f"{g_id}\t{win_id}\n")
            print(f"{g_id}的bureau数据已存")
    except Exception as e:
        print(e)
        logger.error(f"{g_id}的bureau数据获取失败")
    # finally:
    #     time.sleep(0.5)

def multi_thread_get_bureau():
    with open(file_path,mode='r') as f:
        datas = f.readlines()
    with ThreadPoolExecutor(max_workers=8) as pool:
        for i in datas:
            g_id = i.split("\t")[0]
            url = bureau_url_none.replace("GameID=","GameID={}".format(g_id))
            pool.submit(get_bureau_base, url,g_id)

# multi_thread_get_bureau()
# 1.5 验证两份数据是否等长
def check_data_length():
    df_1 = pd.read_csv(file_path, header=None, sep='\t')
    df_2 = pd.read_csv('d:/goushi/data_09_bureau.txt', header=None, sep='\t')
    if len(df_1)>len(df_2):
        print("bureau数据存在缺失")
        merge_df = pd.merge(df_1, df_2,how='left',left_on=0,right_on=0)
        missed_ids = merge_df[merge_df['1_y'].isnull()][0].values
        for i in missed_ids:
            url = bureau_url_none.replace("GameID=","GameID={}".format(i))
            get_bureau_base(url, i)
    else:
        print("bureau数据完整",f"总共{len(df_1)}条数据")

# check_data_length()

# 1.6获取每局详细数据
correlation_url_none = "https://score.09game.com/MOBA/CorrelationPlayerMilitaryExploit?GameTypeID=21&GameID=&GameSource=0&CurrentSeason=0"


def get_correlation_base(url, g_id):
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code == 200:
            cur_data = json.loads(resp.text)
            with open('d:/goushi/data_09_correlation.txt', mode='a') as f:
                for info in cur_data['data']:
                    user_id = str(info['user_id'])
                    user_name = info['user_name']
                    hero_id = info['hero_id']
                    hero_name = info['hero_name']
                    hero_level = int(info['hero_level'])
                    kill_count = int(info['kill_count'])
                    killed_count = int(info['killed_count'])
                    assist_count = int(info['assist_count'])
                    title = str(info['title'])
                    dust_count = int(info['dust_count'])
                    eye_count = int(info['eye_count'])
                    gem_count = int(info['gem_count'])
                    smoke_count = int(info['smoke_count'])
                    creep_kill = int(info['creep_kill'])
                    creep_denies = int(info['creep_denies'])
                    total_money = int(info['total_money'])
                    hurt_value = int(info['hurt_value'])
                    team_id = str(info['team_id'])
                    neutral_kill = int(info['neutral_kill'])
                    #     items = info['items'] # 装备信息,暂不存储
                    #     skills = info['skills'][:-3] # 去掉最后两个0,暂不存储
                    # 此时一定要将g_id写入,用于两表合并
                    f.write(
                        f"{g_id}\t{user_id}\t{user_name}\t{hero_id}\t{hero_name}\t{hero_level}\t{kill_count}\t{killed_count}\t{assist_count}\t{title}\t{dust_count}\t{eye_count}\t{gem_count}\t{smoke_count}\t{creep_kill}\t{creep_denies}\t{total_money}\t{hurt_value}\t{team_id}\t{neutral_kill}\n")
            print(f"{g_id}的correlation数据已存")
    except Exception as e:
        print(e)
        logger.error(f"{g_id}的correlation数据获取失败")


def multi_thread_get_correlation():
    with open('d:/goushi/data_09_bureau.txt', mode='r') as f:
        datas = f.readlines()
    with ThreadPoolExecutor(max_workers=6) as pool:
        for i in datas:
            g_id = i.split("\t")[0]
            url = correlation_url_none.replace("GameID=", "GameID={}".format(g_id))
            pool.submit(get_correlation_base, url, g_id)

# multi_thread_get_correlation()

# 1.7 补上获取失败的,实测发现偶尔会有数据获取失败的
def check_data_failure():
    missed_datas = pd.read_csv("D:/goushi/zhanji.log"
                               , sep='|'
                               , names=['time', 'type', 'message', 'other'])

    # 用于指定时间范围,需要先将时间列设为索引
    missed_datas['time'] = pd.to_datetime(missed_datas['time'])
    missed_datas.set_index(missed_datas['time'])
    # 输入比较时间范围
    need = missed_datas[missed_datas['time'] > '2025-03-10 18:40:00']
    #     missed_ids = missed_datas.loc[:,'message'].map(lambda x:x.split('的')[0])
    missed_ids = need.loc[:, 'message'].map(lambda x: x.split('的')[0])
    if len(missed_ids) > 0:
        print('存在下载失败的数据%d个' % len(missed_ids))
    for g_id in missed_ids:
        url = correlation_url_none.replace("GameID=", "GameID={}".format(g_id))
        get_correlation_base(url, g_id)
相关推荐
NRatel17 小时前
亚马逊S3的使用简记(游戏资源发布更新)
游戏·unity·amazon s3
天波信息技术分享1 天前
AI 云电竞游戏盒子:从“盒子”到“云-端-芯”一体化竞技平台的架构实践
人工智能·游戏·架构
小杨 想拼1 天前
使用js完成抽奖项目 效果和内容自定义,可以模仿游戏抽奖页面
前端·游戏
qq_546937272 天前
一款开源的远程桌面软件,旨在为用户提供流畅的游戏体验,支持 2K 分辨率、60 FPS,延迟仅为 40ms。
游戏
How_doyou_do2 天前
关于截屏时实现游戏暂停以及本地和上线不同步问题
游戏
李昕壑2 天前
CS2服务器是何方神圣
游戏
wanhengidc2 天前
云手机选哪个比较好用?
服务器·网络·安全·游戏·智能手机
蛊明2 天前
Steam修改游戏安装目录
游戏·steam
李昕壑3 天前
FPS游戏时,你的电脑都在干什么(CS2)
游戏
zaiyang遇见3 天前
【递归完全搜索】CCC 2008 - 24点游戏Twenty-four
算法·游戏·c/c++·全排列·信息学奥赛