文本生成图片效果
文本为一首古诗:孤帆远影碧空尽,唯见长江天际流。 不同风格生成的图片
模型地址
初始化pipeline
python
task = Tasks.text_to_image_synthesis
model_id = 'damo/multi-modal_chinese_stable_diffusion_v1.0'
pipe = pipeline(task=task, model=model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
生成图片
python
# 反向提示词
negative_prompt = (
"blood, gore, violence, murder, kill, dead, corpse, "
"horrible, frightening, scary, monster, ghost, skeleton, zombie, "
"sex, nudity, pornography, adult, erotic, mature, "
"drugs, alcohol, smoking, tobacco, illegal, "
"dark, night, storm, thunder, lightning, apocalypse, disaster, "
"gun, knife, sword, bomb, explosion, firearm, "
"mean, angry, sadistic, hostile, aggressive, bullying, "
"dangerous, unsafe, hazardous, poison, toxic, pollution"
)
output = pipe(
{
'text': '孤帆远影碧空尽,唯见长江天际流。中国画',
'num_inference_steps': 120,
'guidance_scale': 11,
'negative_prompt': negative_prompt
}
)
cv2.imwrite('result1.png', output['output_imgs'][0])
# 输出为opencv numpy格式,转为PIL.Image
img = output['output_imgs'][0]
img = Image.fromarray(img[:,:,::-1])
img.save('result1.png')
封装为http接口的完整代码
python
from flask import Flask, request, send_file
import io
import torch
import cv2
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from PIL import Image
app = Flask(__name__)
# 初始化pipeline
task = Tasks.text_to_image_synthesis
model_id = 'damo/multi-modal_chinese_stable_diffusion_v1.0'
pipe = pipeline(task=task, model=model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
@app.route('/generate', methods=['POST'])
def generate_image():
data = request.json
text = data.get('text', '')
guidance_scale = data.get('guidance_scale', 9)
if not text:
return {'error': 'No text provided'}, 400
negative_prompt = (
"blood, gore, violence, murder, kill, dead, corpse, "
"horrible, frightening, scary, monster, ghost, skeleton, zombie, "
"sex, nudity, pornography, adult, erotic, mature, "
"drugs, alcohol, smoking, tobacco, illegal, "
"dark, night, storm, thunder, lightning, apocalypse, disaster, "
"gun, knife, sword, bomb, explosion, firearm, "
"mean, angry, sadistic, hostile, aggressive, bullying, "
"dangerous, unsafe, hazardous, poison, toxic, pollution"
)
output = pipe(
{
'text': text,
'num_inference_steps': 120,
'guidance_scale': guidance_scale,
'negative_prompt': negative_prompt
}
)
img = output['output_imgs'][0]
img = Image.fromarray(img[:, :, ::-1]) # Convert BGR to RGB
# Save image to bytes
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG')
img_byte_arr.seek(0)
return send_file(img_byte_arr, mimetype='image/png')
if __name__ == '__main__':
app.run(debug=False, host='0.0.0.0', port=5000)
在python环境下运行代码
第一次运行会下载大模型文件,需要等待一段时间 启动成功会有如下提示
csharp
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:5000
* Running on http://10.10.10.132:5000