Gradio 案例——将文本文件转为词云图

文章目录

Gradio 案例------将文本文件转为词云图

界面截图

依赖安装

  • 新建一个虚拟环境 Python 3.9.16
  • 依赖
    • $ pip install gradio==4.29 -i "https://pypi.doubanio.com/simple/"
    • $ pip install wordcloud==1.9.3 -i "https://pypi.doubanio.com/simple/"
    • $ pip install jieba==0.42.1 -i "https://pypi.doubanio.com/simple/"

项目目录结构

复制代码
wordcloud-webui         # 目录
--/resources             # 资源目录
--/consts.py             # py文件,常量
--/gradio_interfaces.py  # py文件,Gradio视图
--/jieba_util.py         # py文件,工具库文件
--/lib_word_cloud.py     # py文件,工具库文件
--/main.py               # py文件,入口

代码

python 复制代码
from gradio_interfaces import iface

if __name__ == "__main__":
    iface.launch()
  • lib_word_cloud.py
python 复制代码
from wordcloud import WordCloud, ImageColorGenerator
import numpy as np
from PIL import Image

from consts import *

def text2wordcount_normal(
    text: str,
    background_color: str = "white",
    margin = 2,
    min_font_size = 4,
    max_font_size = 200,
    font_path = None,
    width: int = 400,
    height: int = 200,
):
    if not background_color or "" == str(background_color).strip():
        background_color = "white"
    if not min_font_size or  min_font_size < 1:
        min_font_size = 4
    if not max_font_size or max_font_size < 4:
        max_font_size = 200    
    if not font_path or "" == str(font_path).strip():
        font_path = DEFAULT_FONT_PATH
    if not width or width < 1:
        width = 400
    if not height or height < 1:
        height = 200 

    # Generate a word cloud image
    wordcloud = WordCloud(
        font_path=font_path,
        width=width, height=height, background_color=background_color, 
        max_words=2000, 
        margin=margin, min_font_size=min_font_size, max_font_size=max_font_size, 
        random_state=42
    ).generate(text)
    return wordcloud.to_image()

def text2wordcount_mask(
    text: str,
    background_color: str = "white",
    margin = 2,
    min_font_size = 4,
    max_font_size = 200,
    font_path = None,
    mask_image = None,
    mask_color = None,
    contour_width=3,
    contour_color="steelblue",
):
    if not background_color or "" == str(background_color).strip():
        background_color = "white"
    if not min_font_size or  min_font_size < 1:
        min_font_size = 4
    if not max_font_size or max_font_size < 4:
        max_font_size = 200   
    if not font_path or "" == str(font_path).strip():
        font_path = DEFAULT_FONT_PATH
    if not contour_width or contour_width < 0:
        contour_width = 3      
    if not contour_color or "" == str(contour_color).strip():
        contour_color = "steelblue"
    
    # mask_color
    if mask_color is not None:
        image_colors = ImageColorGenerator(mask_color, True)
    else:
        image_colors = ImageColorGenerator(mask_image, True)

    # Generate a word cloud image
    wordcloud = WordCloud(
        font_path=font_path,
        mask=mask_image,
        background_color=background_color,
        color_func=image_colors,
        contour_width=contour_width,
        contour_color=contour_color,
        max_words=2000, 
        margin=margin, min_font_size=min_font_size, max_font_size=max_font_size, 
        random_state=42
    ).generate(text)

    return wordcloud.to_image()
  • jieba_util.py
python 复制代码
import jieba
# jieba.enable_parallel(4)

from consts import *

# The function for processing text with Jieba
def jieba_processing_txt(text, userdict_list=['阿Q', '孔乙己', '单四嫂子']):
    if userdict_list is not None:
        for word in userdict_list:
            jieba.add_word(word)

    mywordlist = []
    seg_list = jieba.cut(text, cut_all=False)
    liststr = "/ ".join(seg_list)

    with open(STOPWORDS_PATH, encoding='utf-8') as f_stop:
        f_stop_text = f_stop.read()
        f_stop_seg_list = f_stop_text.splitlines()

    for myword in liststr.split('/'):
        if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1:
            mywordlist.append(myword)
    return ' '.join(mywordlist)
  • gradio_interfaces.py
python 复制代码
import gradio as gr

import lib_word_cloud
import jieba_util

from consts import *

def service_text2wc(
    text_file,
    text_lang,
    text_dict: str,
    background_color,
    margin,
    max_font_size,
    min_font_size,
    font_file,
    width,
    height,
    mask_image,
    mask_color,
    contour_width,
    contour_color,
):
    if not text_file:
        gr.Warning(f"请传入正确的文本文件!")
        return
    if margin < 0 :
        gr.Warning(f"字体间隔配置不合法!")
        return
    if min_font_size < 0 or max_font_size < 0 or min_font_size > max_font_size:
        gr.Warning(f"字体大小配置不合法!")
        return

    try:
        with open(file=text_file.name, encoding="utf-8") as file:
            text = file.read()
            
        if text_lang == '中文':
            gr.Info(f"选择了中文,将使用Jieba库解析文本!")
            userdict_list = []
            if text_dict is not None:
                # userdict_list = map(lambda w: w.strip(), text_dict.split(", "))
                userdict_list = [w.strip() for w in text_dict.split(",")]
            text = jieba_util.jieba_processing_txt(text, userdict_list)
            
        font_path = font_file.name if font_file else None
        
        if mask_image is not None:
            return lib_word_cloud.text2wordcount_mask(
                text,
                background_color,
                margin,
                min_font_size,
                max_font_size,
                font_path,
                mask_image,
                mask_color,
                contour_width,
                contour_color,
            )
        else:
            return lib_word_cloud.text2wordcount_normal(
                text, 
                background_color, 
                margin,
                min_font_size,
                max_font_size,
                font_path, 
                width, 
                height
            )
    except Exception as e:
        print(e)
        raise gr.Error("文本转词云图时,发生异常:" + str(e))

js = """
function createGradioAnimation() {
    var container = document.createElement('div');
    container.id = 'gradio-animation';
    container.style.fontSize = '2em';
    container.style.fontWeight = 'bold';
    container.style.textAlign = 'center';
    container.style.marginBottom = '20px';

    var text = '欢迎使用"词云转换器"!';
    for (var i = 0; i < text.length; i++) {
        (function(i){
            setTimeout(function(){
                var letter = document.createElement('span');
                letter.style.opacity = '0';
                letter.style.transition = 'opacity 0.5s';
                letter.innerText = text[i];

                container.appendChild(letter);

                setTimeout(function() {
                    letter.style.opacity = '1';
                }, 50);
            }, i * 200);
        })(i);
    }

    var gradioContainer = document.querySelector('.gradio-container');
    gradioContainer.insertBefore(container, gradioContainer.firstChild);

    return 'Animation created';
}
"""

with gr.Blocks(title="词云转换器", js=js) as iface:
    with gr.Row():
        with gr.Column():
            with gr.Group():
                with gr.Row():
                    input_text_file = gr.File(label="待处理的文本文件(必填)")
                    with gr.Column():
                        gr.Label(label="Tips", value="请传入正常可读的文本文件,如以.txt结尾的文档", color="#fee2e2")
                        gr.File(value=EXAMPLE_TEXT_FILE, label="文本文件的样例")
                        input_text_lang = gr.Radio(label="文本语言模式", choices=["中文", "英文"], value="中文")
                input_text_dict = gr.Textbox(label="自定义分词词典(可选)", info="中文模式使用,多个词之间用英文逗号分隔,例如'阿Q, 孔乙己, 单四嫂子'")
            with gr.Tab("普通模式"):
                with gr.Row():
                    input_width = gr.Number(value=400, label="生成图像的宽", minimum=1)
                    input_height = gr.Number(value=200, label="生成图像的高", minimum=1)
                gr.Label(label="Tips", value="使用该模式时,记得清理掉"Mask模式"下的"Mask图像"", color="#fee2e2")
            with gr.Tab("Mask模式"):
                with gr.Row():
                    input_contour_width = gr.Number(value=3, label="轮廓线的粗细", minimum=0)
                    input_contour_color = gr.Textbox(value="steelblue", label="轮廓线的颜色")
                with gr.Row():
                    input_mask_image = gr.Image(label="Mask图像(决定词云的形状、颜色、宽高)")
                    input_mask_color = gr.Image(label="若传入该图,则词云的颜色由该图决定")
                # gr.Image(value=EXAMPLE_MASK_IMAGE_PATH, label="Mask图像的样例", interactive=False)
                gr.Gallery(value=[EXAMPLE_MASK_IMAGE_PATH, EXAMPLE_MASK_IMAGE_PATH, EXAMPLE_MASK_IMAGE_PATH], label="Mask图像的样例", interactive=False)
        with gr.Column():
            with gr.Group():
                with gr.Row():
                    with gr.Group():
                        input_bg_color = gr.Textbox(value="white", label="词云图的背景色(默认为'white')")
                        input_margin = gr.Number(value=2, label="字体间隔(默认为'2')", minimum=0)
                        with gr.Row():
                            input_min_font_size = gr.Number(value=4, label="字体大小-最小值", minimum=1)
                            input_max_font_size = gr.Number(value=200, label="字体大小-最大值", minimum=4)    
                    input_font_file = gr.File(label="词云图的字体文件(可选,如otf文件)")
                format_radio = gr.Radio(choices=["png", "jpeg", "webp", "bmp", "tiff"], label="词云图像格式", value="png")
            submit_button = gr.Button("开始处理", variant="primary")
            output_image = gr.Image(label="词云图", format="png")

    def fix_format(x):
        output_image.format = x 
        return None

    format_radio.change(fn=fix_format, inputs=format_radio)

    submit_button.click(
        fn=service_text2wc,
        inputs=[
            input_text_file,
            input_text_lang,
            input_text_dict,
            input_bg_color,
            input_margin,
            input_max_font_size,
            input_min_font_size,
            input_font_file,
            input_width,
            input_height,
            input_mask_image,
            input_mask_color,
            input_contour_width,
            input_contour_color,
        ],
        outputs=output_image,
    )
  • consts.py,记得修改下下面文件的地址,和resource目录对应
python 复制代码
# 样例文本
EXAMPLE_TEXT_FILE = r".\wordcloud-webui\resources\CalltoArms.txt"
# MASK图像样例
EXAMPLE_MASK_IMAGE_PATH = r".\wordcloud-webui\resources\parrot_mask.png "
# 分词器的 stop word 库
STOPWORDS_PATH = r".\wordcloud-webui\resources\stopwords_cn_en.txt"
# 词云图的默认字体
DEFAULT_FONT_PATH = r".\wordcloud-webui\resources\SourceHanSerifK-Light.otf"
相关推荐
m0_603888711 小时前
FineInstructions Scaling Synthetic Instructions to Pre-Training Scale
人工智能·深度学习·机器学习·ai·论文速览
爬台阶的蚂蚁1 小时前
RAG概念和使用
ai·rag
undsky_1 小时前
【RuoYi-SpringBoot3-Pro】:将 AI 编程融入传统 java 开发
java·人工智能·spring boot·ai·ai编程
AI应用开发实战派1 小时前
AI人工智能中Bard的智能电子商务优化
人工智能·ai·bard
AI原生应用开发2 小时前
AIGC领域Bard在通信领域的内容创作
ai·aigc·bard
唐诺2 小时前
深入了解AI
人工智能·ai
ZEGO即构开发者2 小时前
如何用一句话让AI集成 ZEGO 产品
ai·实时互动·实时音视频·rtc
阿杰学AI2 小时前
AI核心知识76——大语言模型之RAG 2.0(简洁且通俗易懂版)
人工智能·ai·语言模型·自然语言处理·rag·检索增强生成·rag2.0
GuoDongOrange2 小时前
智能体来了从 0 到 1:工作流在智能体系统中的真实作用
ai·智能体·从0到1·智能体来了·智能体来了从0到1
爱吃涮肉3 小时前
# 第二章:ClaudeCode核心功能(详细版)
ai