该工具可以将CURL脚本中的Header解析为KV Map结构;获取URL路径、请求类型;解析URL参数列表;解析Body请求体:Form表单、Raw Body、KV Body、XML/JSON/TEXT结构体等。
使用示例
获取一个http curl脚本:
bash
curl --location --request POST 'https://cainiao-inc.com?param_1=value_1¶m_2=value_2' \
--header 'Cookie: USER_COOKIE' \
--header 'Content-Type: application/json' \
--data-raw '{
"appName": "link",
"apiId": "TEST_API",
"content": {
"address": "Cainiao Home",
"city": "Hangzhou"
}
}'
执行解析例子:
实现原理
实现原理很简单:基于Java正则 + 责任链设计模式,按照Curl脚本的常见语法去匹配、解析即可~
按照Curl语法结构,可以将其拆分为 5 个部分:
-
URL路径:http://cainiao.com
-
URL参数列表:?param_1=valie_1¶m_2=valie_2
-
请求方法类型: 例如 POST、GET、DELETE、PUT...... 需要正则匹配
-X
--request
等标识符 -
Header请求头:例如 Cookie、Token、Content-Type...... 需要正则匹配
-H
--header
等标识符 -
Body请求体:可以分为
form-data/-form
、data-raw
、data-urlencode
、-d
、--data
、kvbody
等。格式可能包含JSON、XML、文本、KV键值对,二进制流(暂不支持解析)等等。
具体实现
流程简图:
类关系图:
CurlParserUtil
Curl解析工具类:
java
public class CurlParserUtil {
/**
* 该方法是用来解析CURL的入口。
*
* @param curl 输入的CURL文本字符串
* @return 返回解析后生成的CURL实体对象
*/
public static CurlEntity parse(String curl) {
CurlEntity entity = CurlEntity.builder().build();
ICurlHandler<CurlEntity, String> handlerChain = CurlHandlerChain.init();
// 如需扩展其他解析器,继续往链表中add即可
handlerChain.next(new UrlPathHandler())
.next(new UrlParamsHandler())
.next(new HttpMethodHandler())
.next(new HeaderHandler())
.next(new HttpBodyHandler());
handlerChain.handle(entity, curl);
return entity;
}
}
CurlEntity
解析后得到的Curl实体类(这里分了5个部分)
java
@Data
@Builder
public class CurlEntity {
/**
* URL路径
*/
private String url;
/**
* 请求方法类型
*/
private Method method;
/**
* URL参数
*/
private Map<String, String> urlParams;
/**
* header参数
*/
private Map<String, String> headers;
/**
* 请求体
*/
private JSONObject body;
public enum Method {
GET,
POST,
PUT,
DELETE
}
}
ICurlHandler
责任链链表结构定义:
java
public interface ICurlHandler<R, S> {
ICurlHandler<CurlEntity, String> next(ICurlHandler<CurlEntity, String> handler);
void handle(CurlEntity entity, String curl);
}
CurlHandlerChain
责任链载体:
java
public abstract class CurlHandlerChain implements ICurlHandler<CurlEntity, String> {
ICurlHandler<CurlEntity, String> next;
@Override
public ICurlHandler<CurlEntity, String> next(ICurlHandler<CurlEntity, String> handler) {
this.next = handler;
return this.next;
}
@Override
public abstract void handle(CurlEntity entity, String curl);
/**
* for subclass call
*/
protected void nextHandle(CurlEntity curlEntity, String curl) {
if (next != null) {
next.handle(curlEntity, curl);
}
}
protected void validate(String curl) {
if (StringUtils.isBlank(curl)) {
throw new IllegalArgumentException("Curl script is empty");
}
Matcher matcher = CURL_BASIC_STRUCTURE_PATTERN.matcher(curl);
if (!matcher.find()) {
throw new IllegalArgumentException("Curl script is invalid");
}
}
public static CurlHandlerChain init() {
return new CurlHandlerChain() {
@Override
public void handle(CurlEntity entity, String curl) {
this.validate(curl);
// 替换掉可能存在的转译
curl = curl.replace("\\", "");
if (next != null) {
next.handle(entity, curl);
}
}
};
}
public void log(Object... logParams) {
// Write log for subclass extensions
}
}
UrlPathHandler
URL路径解析:
java
public class UrlPathHandler extends CurlHandlerChain {
@Override
public void handle(CurlEntity entity, String curl) {
String url = parseUrlPath(curl);
entity.setUrl(url);
this.log(url);
super.nextHandle(entity, curl);
}
/**
* 该方法用于解析URL路径。
*
* @param curl 需要解析的URL,以字符串形式给出
* @return URL中的路径部分。如果找不到,将返回null
*/
private String parseUrlPath(String curl) {
Matcher matcher = CurlPatternConstants.URL_PATH_PATTERN.matcher(curl);
if (matcher.find()) {
return matcher.group(1) != null ? matcher.group(1) : matcher.group(3);
}
return null;
}
@Override
public void log(Object... logParams) {
LogPrinter.info("UrlPathHandler execute: url={}", logParams);
}
}
HttpMethodHandler
请求类型解析:
java
public class HttpMethodHandler extends CurlHandlerChain {
@Override
public void handle(CurlEntity entity, String curl) {
CurlEntity.Method method = parseMethod(curl);
entity.setMethod(method);
this.log(method);
super.nextHandle(entity, curl);
}
private CurlEntity.Method parseMethod(String curl) {
Matcher matcher = CurlPatternConstants.HTTP_METHOD_PATTERN.matcher(curl);
Matcher defaultMatcher = CurlPatternConstants.DEFAULT_HTTP_METHOD_PATTERN.matcher(curl);
if (matcher.find()) {
String method = matcher.group(1);
return CurlEntity.Method.valueOf(method.toUpperCase());
} else if (defaultMatcher.find()) {
// 如果命令中包含 -d 或 --data,没有明确请求方法,默认为 POST
return CurlEntity.Method.POST;
} else {
// 没有明确指定请求方法,默认为 GET
return CurlEntity.Method.GET;
}
}
@Override
public void log(Object... logParams) {
LogPrinter.info("HttpMethodHandler execute: method={}", logParams);
}
}
UrlParamsHandler
URL参数列表解析:
java
public class UrlParamsHandler extends CurlHandlerChain {
@Override
public void handle(CurlEntity entity, String curl) {
String url = extractUrl(curl);
Map<String, String> urlParams = parseUrlParams(url);
entity.setUrlParams(urlParams);
this.log(urlParams);
super.nextHandle(entity, curl);
}
private String extractUrl(String curl) {
Matcher matcher = CurlPatternConstants.URL_PARAMS_PATTERN.matcher(curl);
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
private Map<String, String> parseUrlParams(String url) {
if (StringUtils.isBlank(url)) {
return Collections.emptyMap();
}
Map<String, String> urlParams = new HashMap<>();
// 提取URL的查询参数部分
String[] urlParts = url.split("\\?");
if (urlParts.length > 1) {
// 只处理存在查询参数的情况
String query = urlParts[1];
// 解析查询参数到Map
String[] pairs = query.split("&");
for (String pair : pairs) {
int idx = pair.indexOf("=");
if (idx != -1 && idx < pair.length() - 1) {
String key = pair.substring(0, idx);
String value = pair.substring(idx + 1);
urlParams.put(key, value);
} else {
// 存在无值的参数时
urlParams.put(pair, null);
}
}
}
return urlParams;
}
@Override
public void log(Object... logParams) {
LogPrinter.info("UrlParamsHandler execute: urlParams={}", logParams);
}
}
HeaderHandler
Http Header解析:
java
public class HeaderHandler extends CurlHandlerChain{
@Override
public void handle(CurlEntity entity, String curl) {
Map<String, String> headers = parseHeaders(curl);
entity.setHeaders(headers);
this.log(headers);
super.nextHandle(entity, curl);
}
private Map<String, String> parseHeaders(String curl) {
if (StringUtils.isBlank(curl)) {
return Collections.emptyMap();
}
Matcher matcher = CurlPatternConstants.CURL_HEADERS_PATTERN.matcher(curl);
Map<String, String> headers = new HashMap<>();
while (matcher.find()) {
String header = matcher.group(1);
String[] headerKeyValue = header.split(":", 2);
if (headerKeyValue.length == 2) {
// 去除键和值的首尾空白字符
headers.put(headerKeyValue[0].trim(), headerKeyValue[1].trim());
}
}
return headers;
}
@Override
public void log(Object... logParams) {
LogPrinter.info("HeaderHandler execute: headers={}", logParams);
}
}
HttpBodyHandler
Request Body请求体解析:
-
form-data/-form
-
data-urlencode
-
data-raw
-
default/-d/--data
格式可能包含JSON、XML、文本、KV键值对,二进制流(暂不支持解析)等等。
java
public class HttpBodyHandler extends CurlHandlerChain {
@Override
public void handle(CurlEntity entity, String curl) {
JSONObject body = parseBody(curl);
entity.setBody(body);
this.log(body);
super.nextHandle(entity, curl);
}
private JSONObject parseBody(String curl) {
Matcher formMatcher = CurlPatternConstants.HTTP_FROM_BODY_PATTERN.matcher(curl);
if (formMatcher.find()) {
return parseFormBody(formMatcher);
}
Matcher urlencodeMatcher = CurlPatternConstants.HTTP_URLENCODE_BODY_PATTERN.matcher(curl);
if (urlencodeMatcher.find()) {
return parseUrlEncodeBody(urlencodeMatcher);
}
Matcher rawMatcher = CurlPatternConstants.HTTP_ROW_BODY_PATTERN.matcher(curl);
if (rawMatcher.find()) {
return parseRowBody(rawMatcher);
}
Matcher defaultMatcher = CurlPatternConstants.DEFAULT_HTTP_BODY_PATTERN.matcher(curl);
if (defaultMatcher.find()) {
return parseDefaultBody(defaultMatcher);
}
return new JSONObject();
}
private JSONObject parseDefaultBody(Matcher defaultMatcher) {
String bodyStr = "";
if (defaultMatcher.group(1) != null) {
// 单引号包裹的数据
bodyStr = defaultMatcher.group(1);
} else if (defaultMatcher.group(2) != null) {
// 双引号包裹的数据
bodyStr = defaultMatcher.group(2);
} else {
// 无引号的数据
bodyStr = defaultMatcher.group(3);
}
// 特殊Case: username=test&password=secret
Matcher kvMatcher = CurlPatternConstants.DEFAULT_HTTP_BODY_PATTERN_KV.matcher(bodyStr);
if (kvMatcher.find()) {
return parseKVBody(bodyStr);
}
return JSONObject.parseObject(bodyStr);
}
private JSONObject parseKVBody(String kvBodyStr) {
JSONObject json = new JSONObject();
String[] pairs = kvBodyStr.split("&");
for (String pair : pairs) {
int idx = pair.indexOf("=");
String key = URLDecoder.decode(pair.substring(0, idx), StandardCharsets.UTF_8);
String value = URLDecoder.decode(pair.substring(idx + 1), StandardCharsets.UTF_8);
json.put(key, value);
}
return json;
}
private JSONObject parseFormBody(Matcher formMatcher) {
JSONObject formData = new JSONObject();
// 重置指针匹配的位置
formMatcher.reset();
while (formMatcher.find()) {
// 提取表单项
String formItem = formMatcher.group(1) != null ? formMatcher.group(1) : formMatcher.group(2);
// 分割键和值
String[] keyValue = formItem.split("=", 2);
if (keyValue.length == 2) {
String key = keyValue[0];
String value = keyValue[1];
// 检测文件字段标记
// PS: 理论上文件标记字段不需要支持
if (value.startsWith("@")) {
// 只提取文件名,不读取文件内容
formData.put(key, value.substring(1));
} else {
// 放入表单数据
formData.put(key, value);
}
}
}
return formData;
}
private JSONObject parseUrlEncodeBody(Matcher urlencodeMatcher) {
JSONObject urlEncodeData = new JSONObject();
// 重置指针匹配的位置
urlencodeMatcher.reset();
while (urlencodeMatcher.find()) {
// 提取键值对字符串
String keyValueEncoded = urlencodeMatcher.group(1);
// 分隔键和值
String[] keyValue = keyValueEncoded.split("=", 2);
if (keyValue.length == 2) {
String key = keyValue[0];
String value = keyValue[1];
// 对值进行URL解码
String decodedValue = URLDecoder.decode(value, StandardCharsets.UTF_8);
// 存入数据到JSON对象
urlEncodeData.put(key, decodedValue);
}
}
return urlEncodeData;
}
private JSONObject parseRowBody(Matcher rowMatcher) {
String rawData = rowMatcher.group(1);
if (isXML(rawData)) {
// throw new IllegalArgumentException("Curl --data-raw content cant' be XML");
return xml2json(rawData);
}
try {
return JSON.parseObject(rawData);
} catch (Exception e) {
throw new IllegalArgumentException("Curl --data-raw content is not a valid JSON");
}
}
public static boolean isXML(String xmlStr) {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(xmlStr));
builder.parse(is);
return true;
} catch (Exception e) {
return false;
}
}
private JSONObject xml2json(String xmlStr) {
try {
org.json.JSONObject orgJsonObj = XML.toJSONObject(xmlStr);
String jsonString = orgJsonObj.toString();
return JSON.parseObject(jsonString);
} catch (JSONException e) {
throw new LinkConsoleException("Curl --data-raw content xml2json error", e);
}
}
@Override
public void log(Object... logParams) {
LogPrinter.info("HttpBodyHandler execute: body={}", logParams);
}
}
CurlPatternConstants
正则匹配常量定义:
java
public interface CurlPatternConstants {
/**
* CURL基本结构校验
*/
Pattern CURL_BASIC_STRUCTURE_PATTERN = Pattern.compile("^curl (\\S+)");
/**
* URL路径匹配
*/
Pattern URL_PATH_PATTERN =
Pattern.compile("(?:\\s|^)(?:'|\")?(https?://[^?\\s'\"]*)(?:\\?[^\\s'\"]*)?(?:'|\")?(?:\\s|$)");
/**
* 请求参数列表匹配
*/
Pattern URL_PARAMS_PATTERN = Pattern.compile("(?:\\s|^)(?:'|\")?(https?://[^\\s'\"]+)(?:'|\")?(?:\\s|$)");
/**
* HTTP请求方法匹配
*/
Pattern HTTP_METHOD_PATTERN = Pattern.compile("(?:-X|--request)\\s+(\\S+)");
/**
* 默认HTTP请求方法匹配
*/
Pattern DEFAULT_HTTP_METHOD_PATTERN = Pattern.compile(".*\\s(-d|--data|--data-binary)\\s.*");
/**
* 请求头匹配
*/
Pattern CURL_HEADERS_PATTERN = Pattern.compile("(?:-H|--header)\\s+'(.*?:.*?)'");
/**
* -d/--data 请求体匹配
*/
Pattern DEFAULT_HTTP_BODY_PATTERN = Pattern.compile("(?:--data|-d)\\s+(?:'([^']*)'|\"([^\"]*)\"|(\\S+))");
Pattern DEFAULT_HTTP_BODY_PATTERN_KV = Pattern.compile("^([^=&]+=[^=&]+)(?:&[^=&]+=[^=&]+)*$");
/**
* --data-raw 请求体匹配
*/
Pattern HTTP_ROW_BODY_PATTERN = Pattern.compile("--data-raw '(.+?)'(?s)", Pattern.DOTALL);
/**
* --form 请求体匹配
*/
Pattern HTTP_FROM_BODY_PATTERN = Pattern.compile("--form\\s+'(.*?)'|-F\\s+'(.*?)'");
/**
* --data-urlencode 请求体匹配
*/
Pattern HTTP_URLENCODE_BODY_PATTERN = Pattern.compile("--data-urlencode\\s+'(.*?)'");
}
有问题可以留言讨论,刷流量评论定期删除!