简易的图
gRPC-gateway 调用的返回的数据流程如下:
gRPC返回的是 error 接口,通常使用 Status 结构表示错误码
go
type Status struct {
// The status code, which should be an enum value of
// [google.rpc.Code][google.rpc.Code].
Code int32
// A developer-facing error message, which should be in English. Any
// user-facing error message should be localized and sent in the
// [google.rpc.Status.details][google.rpc.Status.details] field, or localized
// by the client.
Message string
// A list of messages that carry the error details. There is a common set of
// message types for APIs to use.
Details []*anypb.Any
}
而 Details 字段通常会选择 error_details.pb.go 其中一个或多个结构。
对于 HTTP 我们选择如下的结构:
go
{
"code": xxxxxx,
"message": "......",
"data": {},
"details":[]
}
对比 Status 增加一个 data 字段用于存放 gRPC 服务请求成功时返回的业务数据,当 gRPC 出错时复制 Status 同名字段即可,目的是为了少写点代码,降低 HTTP 和 gRPC 返回错误码的认知负担。
设计错误码结构
上面那两个结构是给人看的,那给牛马看的又长什么样?如果你是负责开发服务的人,当你看到服务的错误信息,你希望这信息帮你干些啥?
Go 标准包 errors ,打印只有文字信息,错在哪里全靠猜。所以最重要的一个,堆栈信息!!!
所以得设计一个出错时包含堆栈信息的结构,然后跟 Status 兼容的错误码,于是乎
-
Error:标准库或者第三方库产生的错误,会被当作原始错误封装到 CodeError
-
CodeError:业务范围使用的错误结构,包含原始错误Error,自定义错误码,错误码对应的HTTP状态码,错误信息,堆栈信息
-
Status:gRPC返回给 client 时,需要把 CodeError 转换成 Status 状态码形式
-
HTTP Response:gateway 是一个冒充的 gRPC client,也会接收到 Stauts ,所以给 Rest Client 返回时需要把 Status 转换成 Response 形式
CodeError
Code 字段会被映射成 Map,里面记录着错误码,错误码对应的 HTTP Status,默认错误信息
go
package errors
import (
"bytes"
"fmt"
"reflect"
"runtime"
"strings"
)
const MaxStackDepth = 50
type CodeError struct {
cause error // underly error
code Code // gRPC status or error code
message string // error message
stack []uintptr // program counters
}
func New(code int, cause error, message string) error {
if cause == nil {
cause = fmt.Errorf("%v", cause)
}
stack := make([]uintptr, MaxStackDepth)
n := runtime.Callers(2, stack[:])
message = strings.TrimSpace(message)
return &CodeError{code: Code(code), cause: cause, message: message, stack: stack[:n]}
}
func (ce *CodeError) Error() string {
return fmt.Sprintf("error: code = %d, %s, cause by %v.\n", ce.code.Code(), ce.Message(), ce.cause.Error())
}
func (ce *CodeError) Code() Code { return ce.code }
func (ce *CodeError) Message() string {
if ce.message != "" {
return ce.message
}
return ce.code.Error()
}
func (ce *CodeError) Stack() string {
buf := bytes.Buffer{}
if ce.cause != nil {
buf.WriteString(reflect.TypeOf(ce.cause).String() + ", ")
}
frames := runtime.CallersFrames(ce.stack)
for {
f, more := frames.Next()
buf.WriteString(fmt.Sprintf("%s\n\t %s:%d +(0x%x)\n", f.Function, f.File, f.Line, f.PC))
if !more {
break
}
}
return buf.String()
}
// provides compatibility for error chains. Since go1.13
func (ce *CodeError) Unwrap() error { return ce.cause }
Code
go
package errors
import (
"fmt"
"net/http"
"strconv"
"sync"
)
var (
codes = map[Code]ErrCode{}
mutex = new(sync.Mutex)
)
type Code int
func (code Code) Code() int { return int(code) }
func (code Code) Status() int {
if c, ok := codes[code]; ok {
return c.status
}
return http.StatusInternalServerError
}
func (code Code) Error() string {
if c, ok := codes[code]; ok {
return c.message
}
return strconv.FormatInt(int64(code), 10)
}
type ErrCode struct {
code Code // gRPC status or error code
status int // http status code
message string // fallback message
}
func Register(code int, status int, message string) {
if code <= 16 {
panic("error code must be greater then 16, compatible with gRPC status.")
}
register(Code(code), status, message)
}
func register(code Code, status int, message string) {
mutex.Lock()
defer mutex.Unlock()
if _, ok := codes[code]; ok {
panic(fmt.Sprintf("error code %d is duplicated.", code))
}
codes[code] = ErrCode{code: code, status: status, message: message}
}
保留16个gRPC状态
go
package errors
// gRPC compatible
const (
OK Code = iota // Not an error; returned on success.
CANCELLED // The operation was cancelled, typically by the caller.
UNKNOWN // Unknown error.
INVALID_ARGUMENT // The client specified an invalid argument.
DEADLINE_EXCEEDED // The deadline expired before the operation could complete.
NOT_FOUND // Some requested entity (e.g., file or directory) was not found.
ALREADY_EXISTS // The entity that a client attempted to create (e.g., file or directory) already exists.
PERMISSION_DENIED // The caller does not have permission to execute the specified operation.
RESOURCE_EXHAUSTED // Some resource has been exhausted, perhaps a per-user quota, or perhaps the entire file system is out of space.
FAILED_PRECONDITION // The operation was rejected because the system is not in a state required for the operation's execution.
ABORTED // The operation was aborted, typically due to a concurrency issue such as a sequencer check failure or transaction abort.
OUT_OF_RANGE // The operation was attempted past the valid range.
UNIMPLEMENTED // The operation is not implemented or is not supported/enabled in this service.
INTERNAL // Internal errors. This error code is reserved for serious errors.
UNAVAILABLE // The service is currently unavailable.
DATA_LOSS // Unrecoverable data loss or corruption.
UNAUTHENTICATED // The request does not have valid authentication credentials for the operation.
)
func init() {
// refer to https://github.com/googleapis/googleapis/blob/master/google/rpc/code.proto
register(OK, 200, "Not an error; returned on success.")
register(CANCELLED, 499, "The operation was cancelled.")
register(UNKNOWN, 500, "Unknown error.")
register(INVALID_ARGUMENT, 400, "The client specified an invalid argument.")
register(DEADLINE_EXCEEDED, 504, "The deadline expired before the operation could complete.")
register(NOT_FOUND, 404, "Some requested entity (e.g., file or directory) was not found.")
register(ALREADY_EXISTS, 409, "The entity that a client attempted to create (e.g., file or directory) already exists.")
register(PERMISSION_DENIED, 403, "The caller does not have permission to execute the specified operation.")
register(RESOURCE_EXHAUSTED, 429, "Some resource has been exhausted, perhaps a per-user quota, or perhaps the entire file system is out of space.")
register(FAILED_PRECONDITION, 400, "The operation was rejected because the system is not in a state required for the operation's execution.")
register(ABORTED, 409, "The operation was aborted.")
register(OUT_OF_RANGE, 400, "The operation was attempted past the valid range.")
register(UNIMPLEMENTED, 501, "The operation is not implemented or is not supported/enabled in this service.")
register(INTERNAL, 500, "Internal errors.")
register(UNAVAILABLE, 503, "The service is currently unavailable.")
register(DATA_LOSS, 500, "Unrecoverable data loss or corruption.")
register(UNAUTHENTICATED, 401, "The request does not have valid authentication credentials for the operation.")
}
错误码转换
CodeError 转 Status
拦截器,必须得上拦截器,统一在 gRPC 拦截器里面判断 error 的具体类型,然后转成 gRPC 的 Status 表示错误码
go
func chainUnaryInterceptor() grpc.ServerOption {
// error interceptor parses code error to grpc status
error := func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) {
resp, err := handler(ctx, req)
if err == nil {
return resp, status.Error(codes.OK, errors.OK.Error())
}
var ce *errors.CodeError
if errors.As(err, &ce) {
return resp, status.Error(codes.Code(ce.Code()), ce.Message())
}
var c errors.Code
if errors.As(err, &c) {
return resp, status.Error(codes.Code(c.Code()), c.Error())
}
return resp, status.Error(codes.Unknown, err.Error())
}
interceptors := []grpc.UnaryServerInterceptor{
error,
}
return grpc.ChainUnaryInterceptor(interceptors...)
}
go
func NewGRPC(srv *service.EchoService) (grpcSvr *grpc.Server, err error) {
......
opts := []grpc.ServerOption{
chainUnaryInterceptor(),
tlsServerOption(),
}
grpcSvr = grpc.NewServer(opts...)
......
}
Status 转 HTTP Response
老实说如果不是为了兼容旧代码的返回格式,比如后端是重构的或者原来的HTTP服务叠加gRPC服务的,本人是不太愿意把返回格式弄成:
json
{
"code": xxxxxx,
"message": "......",
"data": {},
"details":[]
}
如果选用了映射HTTP状态码方式,因为对于有业务数据的成功返回,code 是固定的,message 有和没有也没区别,状态码200就能判断数据正确与否。只是如果历史代码单一是200状态码才会去判断 code 字段,才会去统一业务数据和错误码的结构。
如果能改动前端调用的代码的话,保持和 gRPC 返回的 数据 + error 也是个不错的选择。
那么对于 Status 转 HTTP Response,有很多种方式去改写
-
How to elegant rewrite/custom resp body from gRPC resp ? · Issue #1610 · grpc-ecosystem/grpc-gateway
综合一下个人选择了添加 ForwardResponseRewriter 处理器的方式
错误码转换
go
func errorHandler() runtime.ServeMuxOption {
handler := func(ctx context.Context, mux *runtime.ServeMux, marshaler runtime.Marshaler, writer http.ResponseWriter, request *http.Request, err error) {
st := status.Convert(err)
if st.Code() > 16 {
err = &runtime.HTTPStatusError{
HTTPStatus: errors.Code(st.Code()).Status(),
Err: err,
}
}
runtime.DefaultHTTPErrorHandler(ctx, mux, marshaler, writer, request, err)
}
return runtime.WithErrorHandler(handler)
}
业务数据转换
go
func forwardResponseRewriter() runtime.ServeMuxOption {
rewriter := func(ctx context.Context, response proto.Message) (any, error) {
if s, ok := response.(*statuspb.Status); ok {
return s, nil
}
return map[string]any{
"code": errors.OK,
"message": errors.OK.Error(),
"data": response,
}, nil
}
return runtime.WithForwardResponseRewriter(rewriter)
}
go
func NewHTTP() (httpSvr *http.Server, err error) {
tlsOption, _ := secure.TLSDialOption(configs.Path(CA_CERT), "localhost")
opts := []grpc.DialOption{tlsOption}
mux := runtime.NewServeMux(
forwardResponseRewriter(),
errorHandler(),
)
err = pb.RegisterEchoServiceHandlerFromEndpoint(context.Background(), mux, GRPC_ADDR, opts)
if err != nil {
panic(err)
}
httpSvr = &http.Server{Addr: HTTP_ADDR, Handler: mux}
go func() {
err = httpSvr.ListenAndServeTLS(configs.Path(CERT), configs.Path(KEY))
if err != nil && !errors.Is(err, http.ErrServerClosed) {
panic(err)
}
}()
return
}
测试一下
bash
curl --cacert ca.cert.pem --location 'https://localhost:8081/api/v1/health'
{"code":0,"data":{},"message":"Not an error; returned on success."}
bash
curl --cacert ca.cert.pem \
--request POST \
--url https://localhost:8081/api/v1/echo \
--header 'content-type: application/json' \
--data '{"message": "Hello, World!"}'
{"code":0,"data":{"message":"Hello, World!"},"message":"Not an error; returned on success."}
简单改写一下 health 服务,实例化一个 CodoError
go
func (srv *EchoService) HealthCheck(context.Context, *emptypb.Empty) (*emptypb.Empty, error) {
// return &emptypb.Empty{}, nil
return nil, errors.New(10086, nil, "error from health check, for testing.")
}
bash
curl --cacert ca.cert.pem --location 'https://localhost:8081/api/v1/health'
{"code":10086, "message":"error from health check, for testing.", "details":[]}
至此,完成对业务内使用 CodeError,HTTP返回统一数据结构。
此后会将HTTP统一结构删除,客户端利用HTTP Stauts判断数据,返回如 gRPC 的 数据 + Error ,即不再统一数据结构。