Python词云生成工具3:定制更多参数

文章目录

添加整型参数

我们所有的设置都放在了wcDct中,所以若想用更多的参数来定制词云,那么只需在wcDct中添加内容,例如下面这些整型参数

其次,WordCloud中有很多参数的数据类型都是整型,这些适用于Spinbox

参数 说明 合适的范围 步长
width 词云宽度 100-2000 10
height 词云高度 100-2000 10
min_font_size 最小文字尺寸 1-50 1
max_font_size 最大文字尺寸 10-1000 10
font_step 字体步长 1-20 1
max_words 最大单词数 10-500 10
min_word_length 最短单词长度 0-10 1
scale 图像缩放 默认是1

下面就是要向wcDct中添加的内容。

python 复制代码
wcDct = {
    "最小文字" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":1, "to":50, "increment":1},
        "default":4,
        "call" : "min_font_size"},
    "最大文字" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":10, "to":1e3, "increment":10},
        "default":400,
        "call" : "max_font_size"},
    "字体步长" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":1, "to":20, "increment":1},
        "default":10,
        "call" : "font_step"},
    "最短词长" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":0, "to":10, "increment":1},
        "default":1,
        "call" : "min_word_length"},
    "最多词数" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":10, "to":500, "increment":10},
        "default":200,
        "call" : "max_words"},
    "图像缩放" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":0.5, "to":5, "increment":0.1},
        "default":1,
        "call" : "scale"},
}

布尔型参数

然后是一些布尔类型的参数,适合用Checkbutton

参数 说明 类型 适用组件
repeat 是否重复单词 默认False Checkbutton
include_numbers 是否包含数字 默认False
normalize_plurals 是否去掉词尾的s 默认True
python 复制代码
wcDct = {
    "单词重复" : {
        "ctrl": ttk.Checkbutton, 
        "paras" : {"width":10},
        "default": False,
        "call" : "repeat"},
    "包含数字" : {
        "ctrl": ttk.Checkbutton, 
        "paras" : {"width":10},
        "default": False,
        "call" : "include_numbers"},
    "去词尾s" : {
        "ctrl": ttk.Checkbutton, 
        "paras" : {"width":10},
        "default": False,
        "call" : "normalize_plurals"},
}

背景颜色

最后,还有一个背景颜色对话框

参数 说明 对话框类型 说明
background_color 背景色 颜色对话框 默认"black"
python 复制代码
wcDct = {
    "背景颜色" : {
        "ctrl": DialogButton,
        "paras" : {"height":5, "widthL":22, "widthR":8, "logtype":"颜色"},
        "call" : "background_color",
        "default" : "balck"},
}

更改之后的界面如下

词云生成逻辑

有了这些之后,还要修改词云生成逻辑,即调用这些参数所获得的值,最后根据上图中的参数,得到点云如下

源代码

所有源代码如下

python 复制代码
import tkinter as tk 
import tkinter.ttk as ttk
from tkinter.filedialog import (askopenfilename,
    askopenfilenames, askdirectory, asksaveasfilename)
from tkinter.colorchooser import askcolor

from threading import Thread

import numpy as np
import re
import csv
import jieba
from wordcloud import WordCloud

import os

class DialogButton(ttk.Frame):
    def __init__(self, master, 
        height, widthL, widthR, logtype, label=None, text=None, 
        frmDct={}, btnDct={}, enyDct={}, logDct={}):
        w = widthL + widthR
        super().__init__(master, 
            height=height, width = w, **frmDct)
        self.pack(fill=tk.X)

        self.text = tk.StringVar() if not text else text
        ttk.Entry(self, width=widthL, textvariable=self.text, 
            **enyDct).pack(side=tk.LEFT, fill = tk.X, padx=5)
        
        ttk.Button(self, width=widthR, 
            text=self.setLabel(logtype, label),
            command = self.Click, **btnDct).pack(side=tk.RIGHT)
        self.logtype = logtype
        self.logDct = logDct

    def setLabel(self, key, label=None):
        if label:
            return label
        labelDct = {
            "文件"   : "选择文件",
            "文件夹" : "选择路径",
            "多文件" : "选择多个文件",
            "保存" : "存储路径",
            "颜色"   : "选择颜色",
        }
        return labelDct[key]

    def Click(self):
        typeDct = {
            "文件"  : askopenfilename,
            "文件夹": askdirectory,
            "多文件": askopenfilenames,
            "保存"  : asksaveasfilename,
            "颜色"  : askcolor,
        }
        text = typeDct[self.logtype](**self.logDct)
        if self.logtype == "颜色":
            text = text[1]
        self.text.set(text)

    def get(self):
        return self.text.get()

    def set(self, txt):
        self.text.set(txt)

wcDct = {
    "词云宽度" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":100, "to":2000, "increment":10},
        "default":800,
        "call" : "width"},
    "词云高度" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":100, "to":2000, "increment":10},
        "default":450,
        "call" : "height"},
    "图像缩放" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":0.5, "to":10, "increment":0.1},
        "default":1,
        "call" : "scale"},
    "最小文字" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":1, "to":50, "increment":1},
        "default":4,
        "call" : "min_font_size"},
    "最大文字" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":10, "to":1e3, "increment":10},
        "default":400,
        "call" : "max_font_size"},
    "字体步长" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":1, "to":20, "increment":1},
        "default":10,
        "call" : "font_step"},
    "最短词长" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":0, "to":10, "increment":1},
        "default":1,
        "call" : "min_word_length"},
    "最多词数" : {
        "ctrl": ttk.Spinbox, 
        "paras" : {"width":10, "from_":10, "to":500, "increment":10},
        "default":200,
        "call" : "max_words"},
    "字体路径" : {"ctrl": DialogButton,
                 "paras" : {"height":5, "widthL":22, "widthR":8, "logtype":"文件"},
                 "call" : "font_path",
                 "default" : r"C:\Windows\Fonts\simhei.ttf"},
    "输入路径" : {"ctrl": DialogButton, "paras" : {"width":25},
                "paras" : {"height":5, "widthL":22, "widthR":8, "logtype":"文件"},},
    "输出路径" : {"ctrl": DialogButton, "paras" : {"width":25},
                "paras" : {"height":5, "widthL":22, "widthR":8, "logtype":"保存"},},
    "背景颜色" : {
        "ctrl": DialogButton,
        "paras" : {"height":5, "widthL":22, "widthR":8, "logtype":"颜色"},
        "call" : "background_color",
        "default" : "balck"},
    "单词重复" : {
        "ctrl": ttk.Checkbutton, 
        "paras" : {"width":10},
        "default": False,
        "call" : "repeat"},
    "包含数字" : {
        "ctrl": ttk.Checkbutton, 
        "paras" : {"width":10},
        "default": False,
        "call" : "include_numbers"},
    "去词尾s" : {
        "ctrl": ttk.Checkbutton, 
        "paras" : {"width":10},
        "default": False,
        "call" : "normalize_plurals"},
}


class DrawWords(ttk.Frame):
    def __init__(self, master, **options):
        super().__init__(master, **options)
        self.pack()
        self.words = None

        self.initWidgets()

    
    def initWidgets(self):
        frm = ttk.Frame(self)
        frm.pack(side=tk.LEFT, fill=tk.Y)
        self.initPara(frm)

        frm = ttk.LabelFrame(self, text="分词结果")
        frm.pack(fill=tk.BOTH, expand=True)
        self.txtSplit = tk.Text(frm)
        self.txtSplit.pack(side=tk.LEFT, fill=tk.BOTH, padx=5, pady=5, expand=True)
        self.addScroll(frm, self.txtSplit)
    
    def addScroll(self, frm, txt):
        scroll = ttk.Scrollbar(frm)
        scroll.pack(side=tk.RIGHT,fill=tk.Y)
        txt.config(yscrollcommand=scroll.set)
        scroll.config(command=txt.yview)

    def setOneCheck(self, frm, key):
        v = wcDct[key]      # 组件参数
        n = v["call"]       # 调用名
        self.vars[n] = tk.BooleanVar()
        self.vars[n].set(v["default"])
        self.checks[n] = v["ctrl"](frm, text=key,
            variable=self.vars[n], **v["paras"])
        self.checks[n].pack(side=tk.LEFT)
        

    def setOneSpinBox(self, frm, key):
        ttk.Label(frm, width=8, text=key).pack(side=tk.LEFT)
        v = wcDct[key]      # 组件参数
        n = v["call"]       # 调用名
        self.spins[n] = v["ctrl"](frm, **v["paras"])
        self.spins[n].set(v["default"])
        self.spins[n].pack(side=tk.LEFT)
    
    def setOneDiaButton(self, frmPara, key):
        frm = ttk.Frame(frmPara)
        frm.pack(side=tk.TOP, fill=tk.X)
        ttk.Label(frm, width=8, text=key).pack(side=tk.LEFT)
        v = wcDct[key]
        n = v["call"] if 'call' in v else key
        self.paths[n] = v["ctrl"](frm, **v['paras'])
        self.paths[n].pack(side=tk.LEFT)
        if 'default' in v:
            self.paths[n].set(v['default'])

    def setOneColButton(self, frm, key):
        frm = ttk.Frame(frmPara)
        frm.pack(side=tk.TOP, fill=tk.X)
        ttk.Label(frm, width=8, text=key).pack(side=tk.LEFT)
        v = wcDct[key]
        n = v["call"] if 'call' in v else key
        self.paths[n] = v["ctrl"](frm, **v['paras'])
        self.paths[n].pack(side=tk.LEFT)
        if 'default' in v:
            self.paths[n].set(v['default'])


    def initPara(self, frmPara):
        self.spins = {}
        self.checks = {}
        self.vars = {}
        keys = ["词云宽度", "词云高度", "最小文字", "最大文字", 
            "字体步长", "图像缩放", "最短词长", "最多词数"]
        for i,key in enumerate(keys):
            if i%2==0:
                frm = ttk.Frame(frmPara)
                frm.pack(side=tk.TOP, fill=tk.X, pady=2)
            self.setOneSpinBox(frm, key)
        
        keys = ["单词重复", "包含数字", "去词尾s"]
        for i,key in enumerate(keys):
            if i%4==0:
                frm = ttk.Frame(frmPara)
                frm.pack(side=tk.TOP, fill=tk.X, pady=2)
            self.setOneCheck(frm, key)

        self.paths = {}
        for key in ["背景颜色", "输入路径", "输出路径", "字体路径"]:
            self.setOneDiaButton(frmPara, key)
                
        frm = ttk.Frame(frmPara)
        frm.pack(side=tk.TOP, fill=tk.X)
        ttk.Button(frm, text="分词预览", 
            command=self.splitWords).pack(side=tk.LEFT)
        ttk.Button(frm, text="分词保存", 
            command=self.saveWords).pack(side=tk.LEFT)
        ttk.Button(frm, text="输出词云", 
            command=self.genWordCloud).pack(side=tk.LEFT)

    def splitWords(self):
        p = self.paths["输入路径"].get()
        with open(p, encoding='utf8') as f:
            text = f.read()
        words = jieba.lcut(text)
        self.words = [w for w in words if len(w)>1] # 取出长度大于1的词
        self.setSplit("\n".join(self.words))

    def saveWords(self):
        path = asksaveasfilename()
        with open(path) as f:
            f.write(self.txtSplit.get(1.0, 'end'))

    def genWordCloud(self):
        dct = {}
        keys = ['width', 'height', 'font_path', 'scale',
            'min_font_size', 'max_font_size', 'font_step', 
            'max_words', 'min_word_length', 'background_color',
            'repeat', 'include_numbers', 'normalize_plurals']
        for key in keys:
            if key in self.spins:
                dct[key] = int(self.spins[key].get())
            if key in self.paths:
                dct[key] = self.paths[key].get()
            if key in self.checks:
                dct[key] = self.vars[key].get()=='1'  
        print(dct)
        try:
            cloud = WordCloud(**dct)
        except Exception as e:
            print(e)
        txt = self.txtSplit.get(1.0, "end")
        txt = " ".join(txt.split("\n"))
        cloud.generate(txt)
        p = self.paths["输出路径"].get()
        if not (p.endswith('.png') or p.endswith('.svg')):
            p = p+".png"
        cloud.to_file(p)
    
    def setSplit(self, txt):
        self.txtSplit.delete(1.0, "end")
        self.txtSplit.insert("end", txt)
        self.txtSplit.see("end")

if __name__ == "__main__":
    root = tk.Tk()
    DrawWords(root).pack(side=tk.TOP, fill=tk.BOTH)
    root.mainloop()
相关推荐
阿华的代码王国1 分钟前
【JavaEE】——文件IO的应用
开发语言·python
电饭叔39 分钟前
《python语言程序设计》2018版第8章19题几何Rectangle2D类(下)-头疼的几何和数学
开发语言·python
程序猿小D1 小时前
第二百六十七节 JPA教程 - JPA查询AND条件示例
java·开发语言·前端·数据库·windows·python·jpa
杰哥在此2 小时前
Python知识点:如何使用Multiprocessing进行并行任务管理
linux·开发语言·python·面试·编程
zaim15 小时前
计算机的错误计算(一百一十四)
java·c++·python·rust·go·c·多项式
PythonFun9 小时前
Python批量下载PPT模块并实现自动解压
开发语言·python·powerpoint
炼丹师小米9 小时前
Ubuntu24.04.1系统下VideoMamba环境配置
python·环境配置·videomamba
GFCGUO9 小时前
ubuntu18.04运行OpenPCDet出现的问题
linux·python·学习·ubuntu·conda·pip
985小水博一枚呀11 小时前
【深度学习基础模型】神经图灵机(Neural Turing Machines, NTM)详细理解并附实现代码。
人工智能·python·rnn·深度学习·lstm·ntm
萧鼎12 小时前
Python调试技巧:高效定位与修复问题
服务器·开发语言·python