一道 python 数据分析的题目

python 数据分析的题目。

做题方法:使用 pandas 读取数据,然后分析。

知识点:pandas,正则表达式,py知识。

过程:不断使用 GPT,遇到有问题的地方自己分析,把分析的结果告诉 GPT,注意要使用 Data Analysis,中文效果不是很好。

python 复制代码
import pandas as pd
import hashlib
import re
from datetime import datetime

def load_csv(file_path):
    return pd.read_csv(file_path)

def preprocess_a_table(a_table):
    account_table_map = {}
    for _, row in a_table.iterrows():
        account = row['账号']
        table = row['操作表']
        if account not in account_table_map:
            account_table_map[account] = set()
        account_table_map[account].add(table)
    return account_table_map

#  row 是b表
def is_valid_operation(row, a_table, account_table_map):
    account = row['账号']
    operation = row['执行操作'].lower()

    # Extracting table name and operation type
    table_name_match = re.search(r'from (\w+)', operation) or re.search(r'into (\w+)', operation) or re.search(r'update (\w+)', operation)
    table_name = table_name_match.group(1) if table_name_match else None

    operation_type_match = re.match(r'(\w+)', operation)
    operation_type = operation_type_match.group(1) if operation_type_match else None

    # Check if account is not in account_table_map or if the table_name is not in the account's permissible tables
    if account not in account_table_map or (table_name and table_name not in account_table_map[account]):
        return '0_' + str(row['编号'])

    # Check for time and permission violations
    for _, account_row in a_table[a_table['账号'] == account].iterrows():
        if account_row['操作表'] == table_name:
            # Time check
            operation_time = datetime.strptime(row['操作时间'], '%Y/%m/%d %H:%M:%S').time()
            start_time, end_time = account_row['可操作时间段(时:分:秒)'].split('~')
            start_time = datetime.strptime(start_time, '%H:%M:%S').time()
            end_time = datetime.strptime(end_time, '%H:%M:%S').time()

            if not (start_time <= operation_time <= end_time):
                return str(account_row['编号']) + '_' + str(row['编号'])  # Time violation

            # Permission check
            permissions = account_row['权限'].split(',')
            if operation_type and operation_type not in permissions:
                return str(account_row['编号']) + '_' + str(row['编号'])  # Permission violation

    return None

def custom_sort(violation):
    parts = violation.split('_')
    return (int(parts[0]), int(parts[1]))

def main():
    a_table_path = '../iris_data/a_table_test.csv'
    b_table_path = '../iris_data/b_table_test.csv'

    a_table = load_csv(a_table_path)
    b_table = load_csv(b_table_path)

    account_table_map = preprocess_a_table(a_table)

    violations = []
    for _, row in b_table.iterrows():
        violation = is_valid_operation(row, a_table, account_table_map)
        if violation:
            violations.append(violation)

    # Sort the violations based on the numerical part after the underscore
    violations_sorted = sorted(violations, key=custom_sort)

    # Join the sorted list into a string
    violations_joined = ','.join(violations_sorted)

    # Calculate MD5 hash
    md5_hash = hashlib.md5(violations_joined.encode()).hexdigest()

    print(f"Sorted Violations: {violations_joined}")
    print(f"MD5 Hash: {md5_hash}")


if __name__ == "__main__":
    main()
相关推荐
lozhyf11 分钟前
Go语言-学习一
开发语言·学习·golang
dujunqiu21 分钟前
bash: ./xxx: No such file or directory
开发语言·bash
爱偷懒的程序源23 分钟前
解决go.mod文件中replace不生效的问题
开发语言·golang
日月星宿~24 分钟前
【JVM】调优
java·开发语言·jvm
加德霍克31 分钟前
【机器学习】使用scikit-learn中的KNN包实现对鸢尾花数据集或者自定义数据集的的预测
人工智能·python·学习·机器学习·作业
2401_8437852333 分钟前
C语言 指针_野指针 指针运算
c语言·开发语言
matlabgoodboy36 分钟前
代码编写java代做matlab程序代编Python接单c++代写web系统设计
java·python·matlab
l1x1n01 小时前
No.37 笔记 | Python面向对象编程学习笔记:探索代码世界的奇妙之旅
笔记·python·学习
Jacob程序员1 小时前
leaflet绘制室内平面图
android·开发语言·javascript
wanfeng_091 小时前
视频m3u8形式播放 -- python and html
python·html·video·hls·m3u8