RagFlow主要有两个启动文件,本文从核心启动文件ragflow_server.py入手,讲解源码并梳理ragflow的启动逻辑。
python
if __name__ == '__main__':
logging.info(r"""
____ ___ ______ ______ __
/ __ \ / | / ____// ____// /____ _ __
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
""")
logging.info(
f'RAGFlow version: {get_ragflow_version()}'
)
logging.info(
f'project base: {utils.file_utils.get_project_base_directory()}'
)
# 打印配置信息,例如版本号、服务名称等
# 主要涉及两个文件,一个是源码constants.py,另一个是配置文件service_conf.yaml
show_configs()
# 初始化配置,包括数据库、es、minio、大模型等
settings.init_settings()
print_rag_settings()
if RAGFLOW_DEBUGPY_LISTEN > 0:
logging.info(f"debugpy listen on {RAGFLOW_DEBUGPY_LISTEN}")
import debugpy
debugpy.listen(("0.0.0.0", RAGFLOW_DEBUGPY_LISTEN))
# init db
# 初始化数据库表,如果不存在表,则执行表创建
init_web_db()
init_web_data()
# init runtime config
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--version", default=False, help="RAGFlow version", action="store_true"
)
parser.add_argument(
"--debug", default=False, help="debug mode", action="store_true"
)
args = parser.parse_args()
if args.version:
print(get_ragflow_version())
sys.exit(0)
RuntimeConfig.DEBUG = args.debug
if RuntimeConfig.DEBUG:
logging.info("run on debug mode")
RuntimeConfig.init_env()
RuntimeConfig.init_config(JOB_SERVER_HOST=settings.HOST_IP, HTTP_PORT=settings.HOST_PORT)
GlobalPluginManager.load_plugins()
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
def delayed_start_update_progress():
logging.info("Starting update_progress thread (delayed)")
t = threading.Thread(target=update_progress, daemon=True)
t.start()
# 处理未完成解析的文档,会更新文档状态
# 未完成解析的文档会生成列表,存入redis, 然后通过从redis读取进行解析
if RuntimeConfig.DEBUG:
if os.environ.get("WERKZEUG_RUN_MAIN") == "true":
threading.Timer(1.0, delayed_start_update_progress).start()
else:
threading.Timer(1.0, delayed_start_update_progress).start()
# start http server
try:
logging.info("RAGFlow HTTP server start...")
run_simple(
hostname=settings.HOST_IP,
port=settings.HOST_PORT,
application=app,
threaded=True,
use_reloader=RuntimeConfig.DEBUG,
use_debugger=RuntimeConfig.DEBUG,
)
except Exception:
traceback.print_exc()
stop_event.set()
time.sleep(1)
os.kill(os.getpid(), signal.SIGKILL)
init_web_db()的源码如下:
python
@DB.connection_context()
@DB.lock("init_database_tables", 60)
def init_database_tables(alter_fields=[]):
members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
table_objs = []
create_failed_list = []
for name, obj in members:
if obj != DataBaseModel and issubclass(obj, DataBaseModel):
table_objs.append(obj)
if not obj.table_exists():
logging.debug(f"start create table {obj.__name__}")
try:
obj.create_table(safe=True)
logging.debug(f"create table success: {obj.__name__}")
except Exception as e:
logging.exception(e)
create_failed_list.append(obj.__name__)
else:
logging.debug(f"table {obj.__name__} already exists, skip creation.")
if create_failed_list:
logging.error(f"create tables failed: {create_failed_list}")
raise Exception(f"create tables failed: {create_failed_list}")
# 表中添加字段
migrate_db()
init_web_data()源码如下:
python
def init_web_data():
start_time = time.time()
#初始化大模型工厂,主要涉及表llm,llm_fatories,tenant_llm中的初始化数据
# 同时也会更新知识库表中的文档数
init_llm_factory()
# if not UserService.get_all().count():
# 这段代码源码是注释掉的,用于初始化管理员账号
# 管理员账号可以用于自定义扩展功能
# init_superuser()
#初始化表Canvas_Template中的数据
add_graph_templates()
logging.info("init web data success:{}".format(time.time() - start_time))