一道 python 数据分析的题目

python 数据分析的题目。

做题方法:使用 pandas 读取数据,然后分析。

知识点:pandas,正则表达式,py知识。

过程:不断使用 GPT,遇到有问题的地方自己分析,把分析的结果告诉 GPT,注意要使用 Data Analysis,中文效果不是很好。

python 复制代码
import pandas as pd
import hashlib
import re
from datetime import datetime

def load_csv(file_path):
    return pd.read_csv(file_path)

def preprocess_a_table(a_table):
    account_table_map = {}
    for _, row in a_table.iterrows():
        account = row['账号']
        table = row['操作表']
        if account not in account_table_map:
            account_table_map[account] = set()
        account_table_map[account].add(table)
    return account_table_map

#  row 是b表
def is_valid_operation(row, a_table, account_table_map):
    account = row['账号']
    operation = row['执行操作'].lower()

    # Extracting table name and operation type
    table_name_match = re.search(r'from (\w+)', operation) or re.search(r'into (\w+)', operation) or re.search(r'update (\w+)', operation)
    table_name = table_name_match.group(1) if table_name_match else None

    operation_type_match = re.match(r'(\w+)', operation)
    operation_type = operation_type_match.group(1) if operation_type_match else None

    # Check if account is not in account_table_map or if the table_name is not in the account's permissible tables
    if account not in account_table_map or (table_name and table_name not in account_table_map[account]):
        return '0_' + str(row['编号'])

    # Check for time and permission violations
    for _, account_row in a_table[a_table['账号'] == account].iterrows():
        if account_row['操作表'] == table_name:
            # Time check
            operation_time = datetime.strptime(row['操作时间'], '%Y/%m/%d %H:%M:%S').time()
            start_time, end_time = account_row['可操作时间段(时:分:秒)'].split('~')
            start_time = datetime.strptime(start_time, '%H:%M:%S').time()
            end_time = datetime.strptime(end_time, '%H:%M:%S').time()

            if not (start_time <= operation_time <= end_time):
                return str(account_row['编号']) + '_' + str(row['编号'])  # Time violation

            # Permission check
            permissions = account_row['权限'].split(',')
            if operation_type and operation_type not in permissions:
                return str(account_row['编号']) + '_' + str(row['编号'])  # Permission violation

    return None

def custom_sort(violation):
    parts = violation.split('_')
    return (int(parts[0]), int(parts[1]))

def main():
    a_table_path = '../iris_data/a_table_test.csv'
    b_table_path = '../iris_data/b_table_test.csv'

    a_table = load_csv(a_table_path)
    b_table = load_csv(b_table_path)

    account_table_map = preprocess_a_table(a_table)

    violations = []
    for _, row in b_table.iterrows():
        violation = is_valid_operation(row, a_table, account_table_map)
        if violation:
            violations.append(violation)

    # Sort the violations based on the numerical part after the underscore
    violations_sorted = sorted(violations, key=custom_sort)

    # Join the sorted list into a string
    violations_joined = ','.join(violations_sorted)

    # Calculate MD5 hash
    md5_hash = hashlib.md5(violations_joined.encode()).hexdigest()

    print(f"Sorted Violations: {violations_joined}")
    print(f"MD5 Hash: {md5_hash}")


if __name__ == "__main__":
    main()
相关推荐
databook41 分钟前
Manim实现闪光轨迹特效
后端·python·动效
Juchecar2 小时前
解惑:NumPy 中 ndarray.ndim 到底是什么?
python
用户8356290780512 小时前
Python 删除 Excel 工作表中的空白行列
后端·python
Json_2 小时前
使用python-fastApi框架开发一个学校宿舍管理系统-前后端分离项目
后端·python·fastapi
RestCloud2 小时前
数据传输中的三大难题,ETL 平台是如何解决的?
数据分析·api
数据智能老司机9 小时前
精通 Python 设计模式——分布式系统模式
python·设计模式·架构
数据智能老司机10 小时前
精通 Python 设计模式——并发与异步模式
python·设计模式·编程语言
数据智能老司机10 小时前
精通 Python 设计模式——测试模式
python·设计模式·架构
数据智能老司机10 小时前
精通 Python 设计模式——性能模式
python·设计模式·架构
c8i10 小时前
drf初步梳理
python·django