Gradio 案例——将文本文件转为词云图

文章目录

Gradio 案例------将文本文件转为词云图

界面截图

依赖安装

  • 新建一个虚拟环境 Python 3.9.16
  • 依赖
    • $ pip install gradio==4.29 -i "https://pypi.doubanio.com/simple/"
    • $ pip install wordcloud==1.9.3 -i "https://pypi.doubanio.com/simple/"
    • $ pip install jieba==0.42.1 -i "https://pypi.doubanio.com/simple/"

项目目录结构

wordcloud-webui         # 目录
--/resources             # 资源目录
--/consts.py             # py文件,常量
--/gradio_interfaces.py  # py文件,Gradio视图
--/jieba_util.py         # py文件,工具库文件
--/lib_word_cloud.py     # py文件,工具库文件
--/main.py               # py文件,入口

代码

python 复制代码
from gradio_interfaces import iface

if __name__ == "__main__":
    iface.launch()
  • lib_word_cloud.py
python 复制代码
from wordcloud import WordCloud, ImageColorGenerator
import numpy as np
from PIL import Image

from consts import *

def text2wordcount_normal(
    text: str,
    background_color: str = "white",
    margin = 2,
    min_font_size = 4,
    max_font_size = 200,
    font_path = None,
    width: int = 400,
    height: int = 200,
):
    if not background_color or "" == str(background_color).strip():
        background_color = "white"
    if not min_font_size or  min_font_size < 1:
        min_font_size = 4
    if not max_font_size or max_font_size < 4:
        max_font_size = 200    
    if not font_path or "" == str(font_path).strip():
        font_path = DEFAULT_FONT_PATH
    if not width or width < 1:
        width = 400
    if not height or height < 1:
        height = 200 

    # Generate a word cloud image
    wordcloud = WordCloud(
        font_path=font_path,
        width=width, height=height, background_color=background_color, 
        max_words=2000, 
        margin=margin, min_font_size=min_font_size, max_font_size=max_font_size, 
        random_state=42
    ).generate(text)
    return wordcloud.to_image()

def text2wordcount_mask(
    text: str,
    background_color: str = "white",
    margin = 2,
    min_font_size = 4,
    max_font_size = 200,
    font_path = None,
    mask_image = None,
    mask_color = None,
    contour_width=3,
    contour_color="steelblue",
):
    if not background_color or "" == str(background_color).strip():
        background_color = "white"
    if not min_font_size or  min_font_size < 1:
        min_font_size = 4
    if not max_font_size or max_font_size < 4:
        max_font_size = 200   
    if not font_path or "" == str(font_path).strip():
        font_path = DEFAULT_FONT_PATH
    if not contour_width or contour_width < 0:
        contour_width = 3      
    if not contour_color or "" == str(contour_color).strip():
        contour_color = "steelblue"
    
    # mask_color
    if mask_color is not None:
        image_colors = ImageColorGenerator(mask_color, True)
    else:
        image_colors = ImageColorGenerator(mask_image, True)

    # Generate a word cloud image
    wordcloud = WordCloud(
        font_path=font_path,
        mask=mask_image,
        background_color=background_color,
        color_func=image_colors,
        contour_width=contour_width,
        contour_color=contour_color,
        max_words=2000, 
        margin=margin, min_font_size=min_font_size, max_font_size=max_font_size, 
        random_state=42
    ).generate(text)

    return wordcloud.to_image()
  • jieba_util.py
python 复制代码
import jieba
# jieba.enable_parallel(4)

from consts import *

# The function for processing text with Jieba
def jieba_processing_txt(text, userdict_list=['阿Q', '孔乙己', '单四嫂子']):
    if userdict_list is not None:
        for word in userdict_list:
            jieba.add_word(word)

    mywordlist = []
    seg_list = jieba.cut(text, cut_all=False)
    liststr = "/ ".join(seg_list)

    with open(STOPWORDS_PATH, encoding='utf-8') as f_stop:
        f_stop_text = f_stop.read()
        f_stop_seg_list = f_stop_text.splitlines()

    for myword in liststr.split('/'):
        if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1:
            mywordlist.append(myword)
    return ' '.join(mywordlist)
  • gradio_interfaces.py
python 复制代码
import gradio as gr

import lib_word_cloud
import jieba_util

from consts import *

def service_text2wc(
    text_file,
    text_lang,
    text_dict: str,
    background_color,
    margin,
    max_font_size,
    min_font_size,
    font_file,
    width,
    height,
    mask_image,
    mask_color,
    contour_width,
    contour_color,
):
    if not text_file:
        gr.Warning(f"请传入正确的文本文件!")
        return
    if margin < 0 :
        gr.Warning(f"字体间隔配置不合法!")
        return
    if min_font_size < 0 or max_font_size < 0 or min_font_size > max_font_size:
        gr.Warning(f"字体大小配置不合法!")
        return

    try:
        with open(file=text_file.name, encoding="utf-8") as file:
            text = file.read()
            
        if text_lang == '中文':
            gr.Info(f"选择了中文,将使用Jieba库解析文本!")
            userdict_list = []
            if text_dict is not None:
                # userdict_list = map(lambda w: w.strip(), text_dict.split(", "))
                userdict_list = [w.strip() for w in text_dict.split(",")]
            text = jieba_util.jieba_processing_txt(text, userdict_list)
            
        font_path = font_file.name if font_file else None
        
        if mask_image is not None:
            return lib_word_cloud.text2wordcount_mask(
                text,
                background_color,
                margin,
                min_font_size,
                max_font_size,
                font_path,
                mask_image,
                mask_color,
                contour_width,
                contour_color,
            )
        else:
            return lib_word_cloud.text2wordcount_normal(
                text, 
                background_color, 
                margin,
                min_font_size,
                max_font_size,
                font_path, 
                width, 
                height
            )
    except Exception as e:
        print(e)
        raise gr.Error("文本转词云图时,发生异常:" + str(e))

js = """
function createGradioAnimation() {
    var container = document.createElement('div');
    container.id = 'gradio-animation';
    container.style.fontSize = '2em';
    container.style.fontWeight = 'bold';
    container.style.textAlign = 'center';
    container.style.marginBottom = '20px';

    var text = '欢迎使用"词云转换器"!';
    for (var i = 0; i < text.length; i++) {
        (function(i){
            setTimeout(function(){
                var letter = document.createElement('span');
                letter.style.opacity = '0';
                letter.style.transition = 'opacity 0.5s';
                letter.innerText = text[i];

                container.appendChild(letter);

                setTimeout(function() {
                    letter.style.opacity = '1';
                }, 50);
            }, i * 200);
        })(i);
    }

    var gradioContainer = document.querySelector('.gradio-container');
    gradioContainer.insertBefore(container, gradioContainer.firstChild);

    return 'Animation created';
}
"""

with gr.Blocks(title="词云转换器", js=js) as iface:
    with gr.Row():
        with gr.Column():
            with gr.Group():
                with gr.Row():
                    input_text_file = gr.File(label="待处理的文本文件(必填)")
                    with gr.Column():
                        gr.Label(label="Tips", value="请传入正常可读的文本文件,如以.txt结尾的文档", color="#fee2e2")
                        gr.File(value=EXAMPLE_TEXT_FILE, label="文本文件的样例")
                        input_text_lang = gr.Radio(label="文本语言模式", choices=["中文", "英文"], value="中文")
                input_text_dict = gr.Textbox(label="自定义分词词典(可选)", info="中文模式使用,多个词之间用英文逗号分隔,例如'阿Q, 孔乙己, 单四嫂子'")
            with gr.Tab("普通模式"):
                with gr.Row():
                    input_width = gr.Number(value=400, label="生成图像的宽", minimum=1)
                    input_height = gr.Number(value=200, label="生成图像的高", minimum=1)
                gr.Label(label="Tips", value="使用该模式时,记得清理掉"Mask模式"下的"Mask图像"", color="#fee2e2")
            with gr.Tab("Mask模式"):
                with gr.Row():
                    input_contour_width = gr.Number(value=3, label="轮廓线的粗细", minimum=0)
                    input_contour_color = gr.Textbox(value="steelblue", label="轮廓线的颜色")
                with gr.Row():
                    input_mask_image = gr.Image(label="Mask图像(决定词云的形状、颜色、宽高)")
                    input_mask_color = gr.Image(label="若传入该图,则词云的颜色由该图决定")
                # gr.Image(value=EXAMPLE_MASK_IMAGE_PATH, label="Mask图像的样例", interactive=False)
                gr.Gallery(value=[EXAMPLE_MASK_IMAGE_PATH, EXAMPLE_MASK_IMAGE_PATH, EXAMPLE_MASK_IMAGE_PATH], label="Mask图像的样例", interactive=False)
        with gr.Column():
            with gr.Group():
                with gr.Row():
                    with gr.Group():
                        input_bg_color = gr.Textbox(value="white", label="词云图的背景色(默认为'white')")
                        input_margin = gr.Number(value=2, label="字体间隔(默认为'2')", minimum=0)
                        with gr.Row():
                            input_min_font_size = gr.Number(value=4, label="字体大小-最小值", minimum=1)
                            input_max_font_size = gr.Number(value=200, label="字体大小-最大值", minimum=4)    
                    input_font_file = gr.File(label="词云图的字体文件(可选,如otf文件)")
                format_radio = gr.Radio(choices=["png", "jpeg", "webp", "bmp", "tiff"], label="词云图像格式", value="png")
            submit_button = gr.Button("开始处理", variant="primary")
            output_image = gr.Image(label="词云图", format="png")

    def fix_format(x):
        output_image.format = x 
        return None

    format_radio.change(fn=fix_format, inputs=format_radio)

    submit_button.click(
        fn=service_text2wc,
        inputs=[
            input_text_file,
            input_text_lang,
            input_text_dict,
            input_bg_color,
            input_margin,
            input_max_font_size,
            input_min_font_size,
            input_font_file,
            input_width,
            input_height,
            input_mask_image,
            input_mask_color,
            input_contour_width,
            input_contour_color,
        ],
        outputs=output_image,
    )
  • consts.py,记得修改下下面文件的地址,和resource目录对应
python 复制代码
# 样例文本
EXAMPLE_TEXT_FILE = r".\wordcloud-webui\resources\CalltoArms.txt"
# MASK图像样例
EXAMPLE_MASK_IMAGE_PATH = r".\wordcloud-webui\resources\parrot_mask.png "
# 分词器的 stop word 库
STOPWORDS_PATH = r".\wordcloud-webui\resources\stopwords_cn_en.txt"
# 词云图的默认字体
DEFAULT_FONT_PATH = r".\wordcloud-webui\resources\SourceHanSerifK-Light.otf"
相关推荐
觅远1 小时前
python实现word转html
python·html·word
养个小橘猫1 小时前
Word使用分隔符实现页面部分分栏
word
SimonLiu0093 小时前
[AI]30分钟用cursor开发一个chrome插件
chrome·ai·ai编程
伯牙碎琴3 小时前
智能体实战(需求分析助手)二、需求分析助手第一版实现(支持需求提取、整理、痛点分析、需求分类、优先级分析、需求文档生成等功能)
ai·大模型·agent·需求分析·智能体
SEO-狼术15 小时前
Document Solutions for Word CRACK
word
卓琢19 小时前
2024 年 IA 技术大爆发深度解析
深度学习·ai·论文笔记
zaim11 天前
计算机的错误计算(一百八十七)
人工智能·ai·大模型·llm·错误·正弦/sin·误差/error
m0_748237151 天前
前端:纯前端快速实现html导出word和pdf
前端·html·word
m0_748246871 天前
前端实现读取word文件,并将其进行原样式展示的几种方案
前端·word
不坑老师1 天前
不坑盒子2024.1218更新了,模板库上线、一键添加拼音、一键翻译……支持Word、Excel、PPT、WPS
microsoft·word·powerpoint·excel·wps