游戏对战数据分析

下载和数据整合部分已经完毕

源数据和说明链接如下:如有密码 9527

百度网盘 请输入提取码

放一部分代码,还没完全写完

复制代码
import requests
from lxml import etree
from urllib import parse
import json
import time
import math
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import logging
import sys
import os
# 1.获取信息阶段
# 1.1 日志
first_url ="https://score.09game.com/MOBA/BasicDataList?UserID=1477944&GameTypeID=21&CurrentSeason=0&GameSource=0&Time=-1&PageIndex=0&PageSize=13"

# 先将本地数据创建一个文件夹存档
if not os.path.exists('d:/goushi'):
    os.makedirs('d:/goushi')
def create_logger(logger_name='zhanji09_logger'):
    """
    日志功能,记录相关信息
    """
    logger = logging.getLogger(name=logger_name)
    logger.setLevel(logging.INFO)
    logger.propagate=False  # 不向上传递
    # 存储用
    handler_file = logging.FileHandler('d:/goushi/zhanji.log',mode='a',encoding='utf-8')
    format_file = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s|%(thread)d'
                                  ,datefmt='%Y-%m-%d %H:%M:%S')
    handler_file.setLevel(logging.ERROR)
    handler_file.setFormatter(format_file)
    # 输出用
    handler_console = logging.StreamHandler(sys.stdout)
    handler_console.setLevel(logging.WARNING)
    format_console = logging.Formatter('%(message)s|%(asctime)s|%(thread)d')
    handler_console.setFormatter(format_console)
    # 要避免重复添加
    if not logger.handlers:
        logger.addHandler(handler_file)
        logger.addHandler(handler_console)
    return logger

logger = create_logger()

# 1.2 获取总共有多少页,多少条数据
def get_total_page(url):
    """获取总共有多少页,向上取证"""
    resp = requests.get(url)
    data = json.loads(resp.text)
    pagetotal = data['data']['pageTotal']
    pagesize = data['data']['pageSize']
    page_num = math.ceil(pagetotal/pagesize)
    print('总共有%d页共计%d条数据'%(page_num,pagetotal))
    return page_num,pagesize
# page_num,pagesize = get_total_page(first_url)


# 1.3 获取每一页的(全满则13个)g_id
page_data_url = "https://score.09game.com/MOBA/BasicDataList?UserID=1477944&GameTypeID=21&CurrentSeason=0&GameSource=0&Time=-1&PageIndex=0&PageSize=13"
file_path = "d:/goushi/data09_less.txt"
def get_gid_base(url, page_num):
    """
    多线程的基本功能,每一页是先存最早的
    每一页如果满的话,有13个数据
    """
    resp = requests.get(url)
    cur_data = json.loads(resp.text)
    g_id = [x['g_id'] for x in cur_data['data']['listEntity']]
    create_time = [x['create_time'] for x in cur_data['data']['listEntity']]
    with open(file_path, mode='a', encoding='utf-8') as f:
        # 按照最早的到最新的顺序存储
        for a, b in zip(g_id[::-1], create_time[::-1]):
            f.write(f"{a}\t{b}\n")
    print(f"第{page_num}页数据已存")

def multi_thread_get_gid(page_num):
    """由于使用多线程,故每一页的顺序不会完全一致"""
    with ThreadPoolExecutor(max_workers=4) as pool:
        for i in range(page_num - 1, -1, -1):
            url = page_data_url.replace("PageIndex=0", "PageIndex={}".format(i))
            pool.submit(get_gid_base, url, i)

# multi_thread_get_gid(page_num)

# 1.4 获取每局bureau表信息
bureau_url_none = "https://score.09game.com/MOBA/GameBureauMessage?GameTypeID=21&GameID="
def get_bureau_base(url,g_id):
    resp = requests.get(url,timeout=5)
    try:
        if resp.status_code == 200:
            cur_data = json.loads(resp.text)
            g_id = cur_data['data'][0]['g_id']
            win_id = cur_data['data'][0]['win_id']
            with open('d:/goushi/data_09_bureau.txt',mode='a') as f:
                f.write(f"{g_id}\t{win_id}\n")
            print(f"{g_id}的bureau数据已存")
    except Exception as e:
        print(e)
        logger.error(f"{g_id}的bureau数据获取失败")
    # finally:
    #     time.sleep(0.5)

def multi_thread_get_bureau():
    with open(file_path,mode='r') as f:
        datas = f.readlines()
    with ThreadPoolExecutor(max_workers=8) as pool:
        for i in datas:
            g_id = i.split("\t")[0]
            url = bureau_url_none.replace("GameID=","GameID={}".format(g_id))
            pool.submit(get_bureau_base, url,g_id)

# multi_thread_get_bureau()
# 1.5 验证两份数据是否等长
def check_data_length():
    df_1 = pd.read_csv(file_path, header=None, sep='\t')
    df_2 = pd.read_csv('d:/goushi/data_09_bureau.txt', header=None, sep='\t')
    if len(df_1)>len(df_2):
        print("bureau数据存在缺失")
        merge_df = pd.merge(df_1, df_2,how='left',left_on=0,right_on=0)
        missed_ids = merge_df[merge_df['1_y'].isnull()][0].values
        for i in missed_ids:
            url = bureau_url_none.replace("GameID=","GameID={}".format(i))
            get_bureau_base(url, i)
    else:
        print("bureau数据完整",f"总共{len(df_1)}条数据")

# check_data_length()

# 1.6获取每局详细数据
correlation_url_none = "https://score.09game.com/MOBA/CorrelationPlayerMilitaryExploit?GameTypeID=21&GameID=&GameSource=0&CurrentSeason=0"


def get_correlation_base(url, g_id):
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code == 200:
            cur_data = json.loads(resp.text)
            with open('d:/goushi/data_09_correlation.txt', mode='a') as f:
                for info in cur_data['data']:
                    user_id = str(info['user_id'])
                    user_name = info['user_name']
                    hero_id = info['hero_id']
                    hero_name = info['hero_name']
                    hero_level = int(info['hero_level'])
                    kill_count = int(info['kill_count'])
                    killed_count = int(info['killed_count'])
                    assist_count = int(info['assist_count'])
                    title = str(info['title'])
                    dust_count = int(info['dust_count'])
                    eye_count = int(info['eye_count'])
                    gem_count = int(info['gem_count'])
                    smoke_count = int(info['smoke_count'])
                    creep_kill = int(info['creep_kill'])
                    creep_denies = int(info['creep_denies'])
                    total_money = int(info['total_money'])
                    hurt_value = int(info['hurt_value'])
                    team_id = str(info['team_id'])
                    neutral_kill = int(info['neutral_kill'])
                    #     items = info['items'] # 装备信息,暂不存储
                    #     skills = info['skills'][:-3] # 去掉最后两个0,暂不存储
                    # 此时一定要将g_id写入,用于两表合并
                    f.write(
                        f"{g_id}\t{user_id}\t{user_name}\t{hero_id}\t{hero_name}\t{hero_level}\t{kill_count}\t{killed_count}\t{assist_count}\t{title}\t{dust_count}\t{eye_count}\t{gem_count}\t{smoke_count}\t{creep_kill}\t{creep_denies}\t{total_money}\t{hurt_value}\t{team_id}\t{neutral_kill}\n")
            print(f"{g_id}的correlation数据已存")
    except Exception as e:
        print(e)
        logger.error(f"{g_id}的correlation数据获取失败")


def multi_thread_get_correlation():
    with open('d:/goushi/data_09_bureau.txt', mode='r') as f:
        datas = f.readlines()
    with ThreadPoolExecutor(max_workers=6) as pool:
        for i in datas:
            g_id = i.split("\t")[0]
            url = correlation_url_none.replace("GameID=", "GameID={}".format(g_id))
            pool.submit(get_correlation_base, url, g_id)

# multi_thread_get_correlation()

# 1.7 补上获取失败的,实测发现偶尔会有数据获取失败的
def check_data_failure():
    missed_datas = pd.read_csv("D:/goushi/zhanji.log"
                               , sep='|'
                               , names=['time', 'type', 'message', 'other'])

    # 用于指定时间范围,需要先将时间列设为索引
    missed_datas['time'] = pd.to_datetime(missed_datas['time'])
    missed_datas.set_index(missed_datas['time'])
    # 输入比较时间范围
    need = missed_datas[missed_datas['time'] > '2025-03-10 18:40:00']
    #     missed_ids = missed_datas.loc[:,'message'].map(lambda x:x.split('的')[0])
    missed_ids = need.loc[:, 'message'].map(lambda x: x.split('的')[0])
    if len(missed_ids) > 0:
        print('存在下载失败的数据%d个' % len(missed_ids))
    for g_id in missed_ids:
        url = correlation_url_none.replace("GameID=", "GameID={}".format(g_id))
        get_correlation_base(url, g_id)
相关推荐
上海云盾-小余11 小时前
BGP 高防与普通高防差异解析:游戏与政企业务该如何选型
游戏
_Evan_Yao12 小时前
当 if 成为性能判官:分支预测、流水线冲刷与 Java 中的“猜谜游戏”
人工智能·游戏
魔士于安12 小时前
Unity windows 同步 异步 打开文件文件夹工具
游戏·unity·游戏引擎·贴图·模型
魔士于安12 小时前
unity lowpoly 风格 城市 建筑 道路 交通标志
游戏·unity·游戏引擎·贴图·模型
^—app56686612 小时前
短剧游戏APP广告联盟SDK类型模块调用源代码
游戏
chushiyunen13 小时前
pygame实现射击游戏
python·游戏·pygame
开开心心就好13 小时前
支持批量添加水印的实用工具推荐
人工智能·游戏·ci/cd·docker·音视频·语音识别·媒体
黑客说14 小时前
2026 AI 游戏热度排行榜|游戏推荐
人工智能·科技·游戏·娱乐
前端不太难14 小时前
鸿蒙游戏的“帧”到底是什么?
游戏·状态模式·harmonyos
penngo1 天前
# 使用Claude Code开发植物大战僵尸游戏(pygame,附源码)
python·游戏·pygame