游戏对战数据分析

下载和数据整合部分已经完毕

源数据和说明链接如下:如有密码 9527

百度网盘 请输入提取码

放一部分代码,还没完全写完

复制代码
import requests
from lxml import etree
from urllib import parse
import json
import time
import math
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import logging
import sys
import os
# 1.获取信息阶段
# 1.1 日志
first_url ="https://score.09game.com/MOBA/BasicDataList?UserID=1477944&GameTypeID=21&CurrentSeason=0&GameSource=0&Time=-1&PageIndex=0&PageSize=13"

# 先将本地数据创建一个文件夹存档
if not os.path.exists('d:/goushi'):
    os.makedirs('d:/goushi')
def create_logger(logger_name='zhanji09_logger'):
    """
    日志功能,记录相关信息
    """
    logger = logging.getLogger(name=logger_name)
    logger.setLevel(logging.INFO)
    logger.propagate=False  # 不向上传递
    # 存储用
    handler_file = logging.FileHandler('d:/goushi/zhanji.log',mode='a',encoding='utf-8')
    format_file = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s|%(thread)d'
                                  ,datefmt='%Y-%m-%d %H:%M:%S')
    handler_file.setLevel(logging.ERROR)
    handler_file.setFormatter(format_file)
    # 输出用
    handler_console = logging.StreamHandler(sys.stdout)
    handler_console.setLevel(logging.WARNING)
    format_console = logging.Formatter('%(message)s|%(asctime)s|%(thread)d')
    handler_console.setFormatter(format_console)
    # 要避免重复添加
    if not logger.handlers:
        logger.addHandler(handler_file)
        logger.addHandler(handler_console)
    return logger

logger = create_logger()

# 1.2 获取总共有多少页,多少条数据
def get_total_page(url):
    """获取总共有多少页,向上取证"""
    resp = requests.get(url)
    data = json.loads(resp.text)
    pagetotal = data['data']['pageTotal']
    pagesize = data['data']['pageSize']
    page_num = math.ceil(pagetotal/pagesize)
    print('总共有%d页共计%d条数据'%(page_num,pagetotal))
    return page_num,pagesize
# page_num,pagesize = get_total_page(first_url)


# 1.3 获取每一页的(全满则13个)g_id
page_data_url = "https://score.09game.com/MOBA/BasicDataList?UserID=1477944&GameTypeID=21&CurrentSeason=0&GameSource=0&Time=-1&PageIndex=0&PageSize=13"
file_path = "d:/goushi/data09_less.txt"
def get_gid_base(url, page_num):
    """
    多线程的基本功能,每一页是先存最早的
    每一页如果满的话,有13个数据
    """
    resp = requests.get(url)
    cur_data = json.loads(resp.text)
    g_id = [x['g_id'] for x in cur_data['data']['listEntity']]
    create_time = [x['create_time'] for x in cur_data['data']['listEntity']]
    with open(file_path, mode='a', encoding='utf-8') as f:
        # 按照最早的到最新的顺序存储
        for a, b in zip(g_id[::-1], create_time[::-1]):
            f.write(f"{a}\t{b}\n")
    print(f"第{page_num}页数据已存")

def multi_thread_get_gid(page_num):
    """由于使用多线程,故每一页的顺序不会完全一致"""
    with ThreadPoolExecutor(max_workers=4) as pool:
        for i in range(page_num - 1, -1, -1):
            url = page_data_url.replace("PageIndex=0", "PageIndex={}".format(i))
            pool.submit(get_gid_base, url, i)

# multi_thread_get_gid(page_num)

# 1.4 获取每局bureau表信息
bureau_url_none = "https://score.09game.com/MOBA/GameBureauMessage?GameTypeID=21&GameID="
def get_bureau_base(url,g_id):
    resp = requests.get(url,timeout=5)
    try:
        if resp.status_code == 200:
            cur_data = json.loads(resp.text)
            g_id = cur_data['data'][0]['g_id']
            win_id = cur_data['data'][0]['win_id']
            with open('d:/goushi/data_09_bureau.txt',mode='a') as f:
                f.write(f"{g_id}\t{win_id}\n")
            print(f"{g_id}的bureau数据已存")
    except Exception as e:
        print(e)
        logger.error(f"{g_id}的bureau数据获取失败")
    # finally:
    #     time.sleep(0.5)

def multi_thread_get_bureau():
    with open(file_path,mode='r') as f:
        datas = f.readlines()
    with ThreadPoolExecutor(max_workers=8) as pool:
        for i in datas:
            g_id = i.split("\t")[0]
            url = bureau_url_none.replace("GameID=","GameID={}".format(g_id))
            pool.submit(get_bureau_base, url,g_id)

# multi_thread_get_bureau()
# 1.5 验证两份数据是否等长
def check_data_length():
    df_1 = pd.read_csv(file_path, header=None, sep='\t')
    df_2 = pd.read_csv('d:/goushi/data_09_bureau.txt', header=None, sep='\t')
    if len(df_1)>len(df_2):
        print("bureau数据存在缺失")
        merge_df = pd.merge(df_1, df_2,how='left',left_on=0,right_on=0)
        missed_ids = merge_df[merge_df['1_y'].isnull()][0].values
        for i in missed_ids:
            url = bureau_url_none.replace("GameID=","GameID={}".format(i))
            get_bureau_base(url, i)
    else:
        print("bureau数据完整",f"总共{len(df_1)}条数据")

# check_data_length()

# 1.6获取每局详细数据
correlation_url_none = "https://score.09game.com/MOBA/CorrelationPlayerMilitaryExploit?GameTypeID=21&GameID=&GameSource=0&CurrentSeason=0"


def get_correlation_base(url, g_id):
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code == 200:
            cur_data = json.loads(resp.text)
            with open('d:/goushi/data_09_correlation.txt', mode='a') as f:
                for info in cur_data['data']:
                    user_id = str(info['user_id'])
                    user_name = info['user_name']
                    hero_id = info['hero_id']
                    hero_name = info['hero_name']
                    hero_level = int(info['hero_level'])
                    kill_count = int(info['kill_count'])
                    killed_count = int(info['killed_count'])
                    assist_count = int(info['assist_count'])
                    title = str(info['title'])
                    dust_count = int(info['dust_count'])
                    eye_count = int(info['eye_count'])
                    gem_count = int(info['gem_count'])
                    smoke_count = int(info['smoke_count'])
                    creep_kill = int(info['creep_kill'])
                    creep_denies = int(info['creep_denies'])
                    total_money = int(info['total_money'])
                    hurt_value = int(info['hurt_value'])
                    team_id = str(info['team_id'])
                    neutral_kill = int(info['neutral_kill'])
                    #     items = info['items'] # 装备信息,暂不存储
                    #     skills = info['skills'][:-3] # 去掉最后两个0,暂不存储
                    # 此时一定要将g_id写入,用于两表合并
                    f.write(
                        f"{g_id}\t{user_id}\t{user_name}\t{hero_id}\t{hero_name}\t{hero_level}\t{kill_count}\t{killed_count}\t{assist_count}\t{title}\t{dust_count}\t{eye_count}\t{gem_count}\t{smoke_count}\t{creep_kill}\t{creep_denies}\t{total_money}\t{hurt_value}\t{team_id}\t{neutral_kill}\n")
            print(f"{g_id}的correlation数据已存")
    except Exception as e:
        print(e)
        logger.error(f"{g_id}的correlation数据获取失败")


def multi_thread_get_correlation():
    with open('d:/goushi/data_09_bureau.txt', mode='r') as f:
        datas = f.readlines()
    with ThreadPoolExecutor(max_workers=6) as pool:
        for i in datas:
            g_id = i.split("\t")[0]
            url = correlation_url_none.replace("GameID=", "GameID={}".format(g_id))
            pool.submit(get_correlation_base, url, g_id)

# multi_thread_get_correlation()

# 1.7 补上获取失败的,实测发现偶尔会有数据获取失败的
def check_data_failure():
    missed_datas = pd.read_csv("D:/goushi/zhanji.log"
                               , sep='|'
                               , names=['time', 'type', 'message', 'other'])

    # 用于指定时间范围,需要先将时间列设为索引
    missed_datas['time'] = pd.to_datetime(missed_datas['time'])
    missed_datas.set_index(missed_datas['time'])
    # 输入比较时间范围
    need = missed_datas[missed_datas['time'] > '2025-03-10 18:40:00']
    #     missed_ids = missed_datas.loc[:,'message'].map(lambda x:x.split('的')[0])
    missed_ids = need.loc[:, 'message'].map(lambda x: x.split('的')[0])
    if len(missed_ids) > 0:
        print('存在下载失败的数据%d个' % len(missed_ids))
    for g_id in missed_ids:
        url = correlation_url_none.replace("GameID=", "GameID={}".format(g_id))
        get_correlation_base(url, g_id)
相关推荐
远程软件小帮手10 小时前
好用的云电脑!手机怎么用UU远程云电脑玩电脑游戏?
运维·服务器·游戏·电脑
yingxiao88811 小时前
挖掘百亿“数字热土”!解读印度游戏与媒体娱乐的高速增长
游戏·娱乐·媒体
一个小狼娃18 小时前
Android集成Unity避坑指南
android·游戏·unity
ii_best1 天前
安卓/IOS工具开发基础教程:按键精灵一个简单的文字识别游戏验证
android·开发语言·游戏·ios·编辑器
wanhengidc1 天前
云手机的基本原理
运维·服务器·游戏·智能手机·云计算
闲人编程1 天前
Python游戏开发入门:Pygame实战
开发语言·python·游戏·pygame·毕设·codecapsule
AA陈超2 天前
虚幻引擎5 GAS开发俯视角RPG游戏 P07-06 能力输入的回调
c++·游戏·ue5·游戏引擎·虚幻
IT教程资源C2 天前
(N_152)基于java贪吃蛇游戏5
游戏
Arenaschi2 天前
AI对未来游戏模式与游戏开发的助力
网络·人工智能·游戏·ai
todoitbo2 天前
游戏远程操控性能横评:ToDesk、Parsec、UU远程深度对比
游戏·todesk·远程·游戏远程