识别SQL里的列名

需求：根据生成的SQL语句，识别出要查询的列名。

思路：

bash 复制代码

1、定位与提取：找到SELECT子句的文本区域
2、将字段定义字符串拆分为独立单元
3、从每个字段单元中剥离出别名
4、处理边界情况并输出结果

代码：

python 复制代码

import sqlparse
from sqlparse.sql import TokenList, Identifier, Function
from sqlparse.tokens import Keyword, Name, Punctuation, Whitespace

def extract_select_columns_simple(sql_text):
    """
    简化版本：专门处理标准格式的 SELECT 字段（都有明确的AS别名）
    """
    parsed = sqlparse.parse(sql_text)[0]

    # 找到 SELECT 关键字
    for i, token in enumerate(parsed.tokens):
        if token.is_keyword and token.normalized == 'SELECT':
            # 收集 SELECT 之后的所有字段定义
            select_tokens = []
            for j in range(i + 1, len(parsed.tokens)):
                next_token = parsed.tokens[j]
                if next_token.is_keyword and next_token.normalized == 'FROM':
                    break
                select_tokens.append(str(next_token))

            select_str = ''.join(select_tokens)

            # 按逗号分割，但需要处理 CAST 函数中的逗号
            # 简单方法：先按逗号分割，然后合并被括号包围的片段
            parts = select_str.split(',')
            columns = []

            i = 0
            while i < len(parts):
                part = parts[i].strip()
                if not part:
                    i += 1
                    continue

                # 检查括号是否配对
                open_paren = part.count('(')
                close_paren = part.count(')')

                # 如果括号未配对，需要合并后续部分
                while open_paren != close_paren and i + 1 < len(parts):
                    i += 1
                    part += ', ' + parts[i]
                    open_paren = part.count('(')
                    close_paren = part.count(')')

                # 提取别名
                alias = None
                expr = part

                # 查找 AS 关键字
                as_index = part.upper().rfind(' AS ')
                if as_index != -1:
                    expr = part[:as_index].strip()
                    alias = part[as_index + 4:].strip().strip(' "\'`')
                else:
                    # 检查是否有引号包裹的别名
                    words = part.split()
                    if len(words) >= 2:
                        last_word = words[-1]
                        if (last_word.startswith('"') and last_word.endswith('"')) or \
                                (last_word.startswith("'") and last_word.endswith("'")):
                            alias = last_word.strip(' "\'`')
                            expr = ' '.join(words[:-1])

                if alias:
                    columns.append(alias)
                else:
                    columns.append(expr)

                i += 1

            return columns

    return []

# 使用示例
if __name__ == '__main__':
    your_sql = """
               select  to_char(dt :: date, 'yyyy-mm') as "数据日期"
                    , jine::numeric(18,2) as "实际值", name "姓名"
, cast((jine/yswcjd) as numeric(18,2)) as "目标值"
                    , cast((yswcjd*100) as numeric(18,2))||'%' as "进度"
               from ads_sjmh_cwzb
               where cwzb='净值' and dt=(select max(dt) from tb_zb where cwzb is not null and dt > 0)
                 and  dt ='日期'

               """

    print("=" * 60)
    simple_columns = extract_select_columns_simple(your_sql)
    for idx, col in enumerate(simple_columns, 1):
        print(f"字段 {idx}: {col}")

结果：

bash 复制代码

字段 1: 数据日期
字段 2: 实际值
字段 3: 姓名
字段 4: 目标值
字段 5: 进度