需求: 根据生成的SQL语句,识别出要查询的列名。
思路:
bash
1、定位与提取:找到SELECT子句的文本区域
2、将字段定义字符串拆分为独立单元
3、从每个字段单元中剥离出别名
4、处理边界情况并输出结果
代码:
python
import sqlparse
from sqlparse.sql import TokenList, Identifier, Function
from sqlparse.tokens import Keyword, Name, Punctuation, Whitespace
def extract_select_columns_simple(sql_text):
"""
简化版本:专门处理标准格式的 SELECT 字段(都有明确的AS别名)
"""
parsed = sqlparse.parse(sql_text)[0]
# 找到 SELECT 关键字
for i, token in enumerate(parsed.tokens):
if token.is_keyword and token.normalized == 'SELECT':
# 收集 SELECT 之后的所有字段定义
select_tokens = []
for j in range(i + 1, len(parsed.tokens)):
next_token = parsed.tokens[j]
if next_token.is_keyword and next_token.normalized == 'FROM':
break
select_tokens.append(str(next_token))
select_str = ''.join(select_tokens)
# 按逗号分割,但需要处理 CAST 函数中的逗号
# 简单方法:先按逗号分割,然后合并被括号包围的片段
parts = select_str.split(',')
columns = []
i = 0
while i < len(parts):
part = parts[i].strip()
if not part:
i += 1
continue
# 检查括号是否配对
open_paren = part.count('(')
close_paren = part.count(')')
# 如果括号未配对,需要合并后续部分
while open_paren != close_paren and i + 1 < len(parts):
i += 1
part += ', ' + parts[i]
open_paren = part.count('(')
close_paren = part.count(')')
# 提取别名
alias = None
expr = part
# 查找 AS 关键字
as_index = part.upper().rfind(' AS ')
if as_index != -1:
expr = part[:as_index].strip()
alias = part[as_index + 4:].strip().strip(' "\'`')
else:
# 检查是否有引号包裹的别名
words = part.split()
if len(words) >= 2:
last_word = words[-1]
if (last_word.startswith('"') and last_word.endswith('"')) or \
(last_word.startswith("'") and last_word.endswith("'")):
alias = last_word.strip(' "\'`')
expr = ' '.join(words[:-1])
if alias:
columns.append(alias)
else:
columns.append(expr)
i += 1
return columns
return []
# 使用示例
if __name__ == '__main__':
your_sql = """
select to_char(dt :: date, 'yyyy-mm') as "数据日期"
, jine::numeric(18,2) as "实际值", name "姓名"
, cast((jine/yswcjd) as numeric(18,2)) as "目标值"
, cast((yswcjd*100) as numeric(18,2))||'%' as "进度"
from ads_sjmh_cwzb
where cwzb='净值' and dt=(select max(dt) from tb_zb where cwzb is not null and dt > 0)
and dt ='日期'
"""
print("=" * 60)
simple_columns = extract_select_columns_simple(your_sql)
for idx, col in enumerate(simple_columns, 1):
print(f"字段 {idx}: {col}")
结果:
bash
字段 1: 数据日期
字段 2: 实际值
字段 3: 姓名
字段 4: 目标值
字段 5: 进度