原始文本
bash
------=_Part_46705_715015081.1699589700255
Content-Type: text/html;charset=UTF-8
Content-Transfer-Encoding: base64
PGh0bWw+CiAgICA8aGVhZD4KICAgICAgICA8bWV0YSBodHRwLW
VxdWl2PSJDb250ZW50LVR5cGUiIGNvbnRlbnQ9InRleHQvaHRt
bDsgY2hhcnNldD1VVEYtOCI+CiAgICAgICAgPHRpdGxlPkpTUC
BQYWdlPC90aXRsZT4KICAgIDwvaGVhZD4KICAgIDxib2R5Pgog
ICAgICAgIDxoMT5IZWxsbyBXb3JsZCE8L2gxPgogICAgPC9ib2
R5Pgo8L2h0bWw+
------=_Part_46705_715015081.1699589700255--
Maven
xml
<dependency>
<groupId>org.apache.james</groupId>
<artifactId>apache-mime4j-core</artifactId>
<version>0.8.9</version>
</dependency>
解析方法
java
String data = "------=_Part_46705_715015081.1699589700255\n" +
"Content-Type: text/html;charset=UTF-8\n" +
"Content-Transfer-Encoding: base64\n" +
"\n" +
"PGh0bWw+CiAgICA8aGVhZD4KICAgICAgICA8bWV0YSBodHRwLW\n" +
"VxdWl2PSJDb250ZW50LVR5cGUiIGNvbnRlbnQ9InRleHQvaHRt\n" +
"bDsgY2hhcnNldD1VVEYtOCI+CiAgICAgICAgPHRpdGxlPkpTUC\n" +
"BQYWdlPC90aXRsZT4KICAgIDwvaGVhZD4KICAgIDxib2R5Pgog\n" +
"ICAgICAgIDxoMT5IZWxsbyBXb3JsZCE8L2gxPgogICAgPC9ib2\n" +
"R5Pgo8L2h0bWw+\n" +
"------=_Part_46705_715015081.1699589700255--";
System.out.println(data);
HtmContentHandler contentHandler = new HtmContentHandler();
MimeConfig mime4jParserConfig = MimeConfig.DEFAULT;
BodyDescriptorBuilder bodyDescriptorBuilder = new DefaultBodyDescriptorBuilder();
MimeStreamParser mime4jParser = new MimeStreamParser(mime4jParserConfig, DecodeMonitor.SILENT, bodyDescriptorBuilder);
mime4jParser.setContentDecoding(true);
mime4jParser.setContentHandler(contentHandler);
mime4jParser.parse(new ByteArrayInputStream(data.getBytes(UTF_8)));
System.out.println(contentHandler.getData());
HtmContentHandler
java
import org.apache.commons.io.IOUtils;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.dom.Header;
import org.apache.james.mime4j.field.ContentTypeFieldImpl;
import org.apache.james.mime4j.message.SimpleContentHandler;
import org.apache.james.mime4j.stream.BodyDescriptor;
import org.apache.james.mime4j.stream.Field;
import java.io.IOException;
import java.io.InputStream;
import java.util.Optional;
/**
* @author zengrenyuan
* @date 2023/11/10
**/
public class HtmContentHandler extends SimpleContentHandler {
private String data;
private String charset;
private String contentType;
@Override
public void body(BodyDescriptor bd, InputStream is) throws MimeException, IOException {
this.data = IOUtils.toString(is, Optional.ofNullable(charset).orElse("UTF-8"));
//这里可以处理文本内容
}
@Override
public void headers(Header header) {
//在这里解析头信息
Field contentType = header.getField("Content-Type");
if (contentType != null) {
if (contentType instanceof ContentTypeFieldImpl) {
this.contentType = ((ContentTypeFieldImpl) contentType).getMimeType();
charset = ((ContentTypeFieldImpl) contentType).getParameter("charset");
}
}
}
public String getData() {
return data;
}
public String getCharset() {
return charset;
}
public String getContentType() {
return contentType;
}
}
参考资料
https://james.apache.org/mime4j/index.html
https://github.com/apache/james-mime4j
如果想解析一段Email数据也可以参考