数据库:Hive转Presto(二)

继续上节代码,补充了replace_func函数,

python 复制代码
import re
import os
from tkinter import *


class Hive2Presto:
    def __int__(self):
        self.t_funcs = ['substr', 'nvl', 'substring', 'unix_timestamp'] + \
                       ['to_date', 'concat', 'sum', 'avg', 'abs', 'year', 'month', 'ceiling', 'floor']
        self.time_funcs = ['date_add', 'datediff', 'add_months']
        self.funcs = self.t_funcs + self.time_funcs
        self.current_path = os.path.abspath(__file__)
        self.dir = os.path.dirname(self.current_path)
        self.result = []
        self.error = []
        self.filename = ''

    def main(self):
        self.root = Tk()
        self.root.config(bg='#ff741d')  # 背景颜色设置为公司主题色^_^
        self.root.title('Hive转Presto')
        self.win_width = 550
        self.win_height = 500
        self.screen_width = self.root.winfo_screenwidth()
        self.screen_height = self.root.winfo_screenheight()
        self.x = (self.screen_width - self.win_width) // 2
        self.y = (self.screen_height - self.win_height) // 2
        self.root.geometry(f'{self.win_width}x{self.win_height}+{self.x}+{self.y}')

        font = ('楷体', 11)

        self.button = Button(self.root, text='转换', command=self.trans, bg='#ffcc8c', font=font, anchor='e')
        self.button.grid(row=0, column=0, padx=100, pady=10, sticky=W)

        self.file_button = Button(self.root, text='选择文件', command=self.choose_file, bg='#ffcc8c', font=font,
                                  anchor='e')
        self.file_button.grid(row=0, column=1, padx=0, pady=10, sticky=W)

        self.entry = Entry(self.root, width=65, font=font)
        self.entry.insert(0, '输入Hive代码')
        self.entry.grid(row=1, column=0, padx=10, pady=10, columnspan=2)
        self.entry.bind('<Button-1>', self.delete_text)

        self.text = Text(self.root, width=75, height=20)
        self.text.grid(row=2, column=0, padx=10, pady=10, columnspan=2)

        self.des_label = Label(self.root, text='可以复制结果,也有生成的文件,与选取的文件同文件夹', bg='#ffcc8c',
                               font=('楷体', 10))
        self.des_label.grid(row=3, column=0, padx=10, pady=10, columnspan=2)

        s = ''
        for i in range(0, (n := len(self.funcs)), 4):
            if i + 4 <= n:
                s += ','.join(self.funcs[i:i + 4]) + '\n'
            else:
                s += ','.join(self.funcs[i:]) + '\n'
        s = s[:-1]
        self.des_label1 = Label(self.root, text=s, bg='#ffcc8c',
                                font=('楷体', 10))
        self.des_label1.grid(row=4, column=0, padx=10, pady=10, columnspan=2)

        self.root.columnconfigure(0, minsize=10)
        self.root.columnconfigure(1, minsize=10)
        self.root.columnconfigure(0, pad=5)

        self.root.mainloop()

    def replace_func(self, s, res):
        """
        把搜索到函数整体取出来,处理括号中的参数
        :param s: 
        :param res: 
        :return: 
        """
        for f in res:
            f1 = f.replace('\n', '').strip()
            f1 = re.sub(r'(\s*)', '(', f1)
            # 搜索括号里的字符串
            if re.findall(r'(\w*)\(', f1):
                func_name = re.findall(r'(\w*)\(', f1)[0].strip()
            else:
                continue
            try:
                if 'date_add' == func_name.lower():
                    date, date_num = self.extact_func(f1, func_name)
                    s_n = f"date_add('day',{date_num},cast(substr(cast{date} as varchar,1,10) as date))"
                    s = s.replace(f, s_n)
                elif 'datediff' == func_name.lower():
                    date1, date2 = self.extact_func(f1, func_name)
                    s_n = f"date_add('day',{date2},cast(substr(cast{date} as varchar,1,10) as date),cast(substr(cast{date1} as varchar),1,10) as date))"
                    s = s.replace(f, s_n)
                elif 'nvl' == func_name.lower():
                    s1, s2 = self.extact_func(f1, func_name)
                    s_n = f"coalesce({s1},{s2})"
                    s = s.replace(f, s_n)
                elif 'substr' == func_name.lower():
                    date, start, end = self.extact_func(f1, func_name)
                    s_n = f"substr(cast({date} as varchar),{start},{end}"
                    s = s.replace(f, s_n)
                elif 'substring' == func_name.lower():
                    date, start, end = self.extact_func(f1, func_name)
                    s_n = f"substring(cast({date} as varchar),{start},{end}"
                    s = s.replace(f, s_n)
                elif 'unit_timestamp' == func_name.lower():
                    date = self.extact_func(f1, func_name)[0]
                    s_n = f"to_unixtime(cast({date} as timestanp))"
                    s = s.replace(f, s_n)
                elif 'to_date' == func_name.lower():
                    date = self.extact_func(f1, func_name)[0]
                    s_n = f"cast({date} as date)"
                    s = s.replace(f, s_n)
                elif 'concat' == func_name.lower():
                    res = self.extact_func(f1, func_name)[0]
                    s_n = f'concat('
                    for r in res:
                        r = r.strip().replace('\n', '')
                        s_n += f"cast({r} as varchar),"
                    s_n = s_n[:-1] + ')'
                    s = s.replace(f, s_n)
                elif 'sum' == func_name.lower():
                    if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
                        continue
                    ss = self.extact_func(f1, func_name)[0]
                    if 'if(' in ss.replace(' ', ''):
                        continue
                    s = self.func_trans(f, f1, func_name, ss, s)
                elif 'avg' == func_name.lower():
                    if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
                        continue
                    ss = self.extact_func(f1, func_name)[0]
                    if 'if(' in ss.replace(' ', ''):
                        continue
                    s = self.func_trans(f, f1, func_name, ss, s)
                elif 'abs' == func_name.lower():
                    if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
                        continue
                    ss = self.extact_func(f1, func_name)[0]
                    if 'if(' in ss.replace(' ', ''):
                        continue
                    s = self.func_trans(f, f1, func_name, ss, s)
                elif 'ceiling' == func_name.lower():
                    if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
                        continue
                    ss = self.extact_func(f1, func_name)[0]
                    if 'if(' in ss.replace(' ', ''):
                        continue
                    s = self.func_trans(f, f1, func_name, ss, s)
                elif 'floor' == func_name.lower():
                    if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
                        continue
                    ss = self.extact_func(f1, func_name)[0]
                    if 'if(' in ss.replace(' ', ''):
                        continue
                    s = self.func_trans(f, f1, func_name, ss, s)
                elif 'year' == func_name.lower():
                    date = self.extact_func(f1, func_name)[0]
                    s_n = f"year(cast(substr(cast({date} as varchar,1,10) as date))"
                    s = s.replace(f, s_n)
                elif 'month' == func_name.lower():
                    date = self.extact_func(f1, func_name)[0]
                    s_n = f"month(cast(substr(cast({date} as varchar,1,10) as date))"
                    s = s.replace(f, s_n)
            except:
                self.error.append(f"源代码中{func_name}函数参数输入可能有错误,具体为:{f1}")
                continue
        if self.error:
            self.entry.delete(0,END)
            self.text.delete("1.0",END)
            self.text.insert("end",f"{s}")
            self.error.insert(0,'转换失败,有部分没有转成功\n')
            
            root_ex = Tk()
            root_ex.title('错误')
            win_width = 600
            win_height = 200
            screen_width = root_ex.winfo_screenwidth()
            screen_height = root_ex.winfo_screenheight()
            x = (screen_width - win_width) // 2
            y = (screen_height - win_height) // 2
            root_ex.geometry(f'{win_width}x{win_height}+{x}+{y}')
            label_ex = Label(root_ex,text="\n".join(self.error),font=("楷体",10))
            label_ex.pack()
            root_ex.mainloop()
            
            return s

    def func_trans(self, f, f1, func_name, ss, s):
        pass

    def choose_file(self):
        """
        如果代码太多,从text中输入会很卡,直接选择代码文件输入会很快
        :return:
        """
        pass

    def findvar(self, ss):
        """
        搜索与计算有关的字段
        :param ss:
        :return:
        """
        pass

    def mysplit(self, s):
        """
        分割字段
        :param s:
        :return:
        """
        pass

    def extact_func(self, s, func_name):
        pass

    def delete_text(self, event):
        pass

    def trans(self):
        pass


if __name__ == '__main__':
    pro = Hive2Presto()
    pro.__int__()
    pro.main()
相关推荐
数据智能老司机1 小时前
CockroachDB权威指南——SQL调优
数据库·分布式·架构
数据智能老司机1 小时前
CockroachDB权威指南——应用设计与实现
数据库·分布式·架构
数据智能老司机1 小时前
CockroachDB权威指南——CockroachDB 模式设计
数据库·分布式·架构
数据智能老司机20 小时前
CockroachDB权威指南——CockroachDB SQL
数据库·分布式·架构
数据智能老司机20 小时前
CockroachDB权威指南——开始使用
数据库·分布式·架构
松果猿21 小时前
空间数据库学习(二)—— PostgreSQL数据库的备份转储和导入恢复
数据库
无名之逆21 小时前
Rust 开发提效神器:lombok-macros 宏库
服务器·开发语言·前端·数据库·后端·python·rust
s91236010121 小时前
rust 同时处理多个异步任务
java·数据库·rust
数据智能老司机21 小时前
CockroachDB权威指南——CockroachDB 架构
数据库·分布式·架构
hzulwy21 小时前
Redis常用的数据结构及其使用场景
数据库·redis