现象
使用HttpClient工具上传文件时,如果文件名是中文,文件名会乱码
文件名乱码的代码:
1
2
3
4
5
6
7
8
9
10
11
12
|
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,File fileToUpload) { MultipartEntityBuilder builder = MultipartEntityBuilder.create(); builder.addTextBody( "scenarioId" , scenarioId.toString()); for (String groupId : groupIds) { builder.addTextBody( "groupIds" , groupId); } builder.addTextBody( "extension" , extension); builder.addPart( "fileToUpload" , new FileBody(fileToUpload)); builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName()); builder.addTextBody( "isSplit" , "false" ); builder.addTextBody( "isRefresh" , "false" ); return builder.build(); |
乱码原因:
HttpClient上传文件时,会调用doWriteTo方法,写一个输出流,但是在调用formatMultipartHeader方法时,底层主要有3种不同的实现,3种方式的采用的字符集不一样
HttpClient中的doWriteTo方法:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
void doWriteTo( final OutputStream out, final boolean writeContent) throws IOException { final ByteArrayBuffer boundaryEncoded = encode( this .charset, this .boundary); for ( final FormBodyPart part: getBodyParts()) { writeBytes(TWO_DASHES, out); writeBytes(boundaryEncoded, out); writeBytes(CR_LF, out); //此处代码主要有3种不同的实现,不同的mode,实现方式不一样,采用的字符集也不同 formatMultipartHeader(part, out); writeBytes(CR_LF, out); if (writeContent) { part.getBody().writeTo(out); } writeBytes(CR_LF, out); } writeBytes(TWO_DASHES, out); writeBytes(boundaryEncoded, out); writeBytes(TWO_DASHES, out); writeBytes(CR_LF, out); } |
其中的formatMultipartHeader方法,不同的模式有不同的实现方式
MultipartEntityBuilder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
MultipartFormEntity buildEntity() { String boundaryCopy = boundary; if (boundaryCopy == null && contentType != null ) { boundaryCopy = contentType.getParameter( "boundary" ); } if (boundaryCopy == null ) { boundaryCopy = generateBoundary(); } Charset charsetCopy = charset; if (charsetCopy == null && contentType != null ) { charsetCopy = contentType.getCharset(); } final List<NameValuePair> paramsList = new ArrayList<NameValuePair>( 2 ); paramsList.add( new BasicNameValuePair( "boundary" , boundaryCopy)); if (charsetCopy != null ) { paramsList.add( new BasicNameValuePair( "charset" , charsetCopy.name())); } final NameValuePair[] params = paramsList.toArray( new NameValuePair[paramsList.size()]); final ContentType contentTypeCopy = contentType != null ? contentType.withParameters(params) : ContentType.create( "multipart/" + DEFAULT_SUBTYPE, params); final List<FormBodyPart> bodyPartsCopy = bodyParts != null ? new ArrayList<FormBodyPart>(bodyParts) : Collections.<FormBodyPart>emptyList(); //此处将mode赋值给modeCopy final HttpMultipartMode modeCopy = mode != null ? mode : HttpMultipartMode.STRICT; final AbstractMultipartForm form; //此处根据modeCopy的值不同,构造3种form,每种的字符集都不一样,也是产生乱码的根源 switch (modeCopy) { case BROWSER_COMPATIBLE: form = new HttpBrowserCompatibleMultipart(charsetCopy, boundaryCopy, bodyPartsCopy); break ; case RFC6532: form = new HttpRFC6532Multipart(charsetCopy, boundaryCopy, bodyPartsCopy); break ; default : form = new HttpStrictMultipart(charsetCopy, boundaryCopy, bodyPartsCopy); } return new MultipartFormEntity(form, contentTypeCopy, form.getTotalLength()); } public HttpEntity build() { return buildEntity(); } |
BROWSER_COMPATIBLE模式中的formatMultipartHeader方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
class HttpBrowserCompatibleMultipart extends AbstractMultipartForm { private final List<FormBodyPart> parts; public HttpBrowserCompatibleMultipart( final Charset charset, final String boundary, final List<FormBodyPart> parts) { super (charset, boundary); this .parts = parts; } @Override public List<FormBodyPart> getBodyParts() { return this .parts; } /** * Write the multipart header fields; depends on the style. */ @Override protected void formatMultipartHeader( final FormBodyPart part, final OutputStream out) throws IOException { // For browser-compatible, only write Content-Disposition // Use content charset final Header header = part.getHeader(); final MinimalField cd = header.getField(MIME.CONTENT_DISPOSITION); //可以看到此处的字符集采用的是设置的字符集 writeField(cd, this .charset, out); final String filename = part.getBody().getFilename(); if (filename != null ) { final MinimalField ct = header.getField(MIME.CONTENT_TYPE); //可以看到此处的字符集采用的也是设置的字符集 writeField(ct, this .charset, out); } } } |
RFC6532模式中的formatMultipartHeader方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
class HttpRFC6532Multipart extends AbstractMultipartForm { private final List<FormBodyPart> parts; public HttpRFC6532Multipart( final Charset charset, final String boundary, final List<FormBodyPart> parts) { super (charset, boundary); this .parts = parts; } @Override public List<FormBodyPart> getBodyParts() { return this .parts; } @Override protected void formatMultipartHeader( final FormBodyPart part, final OutputStream out) throws IOException { // For RFC6532, we output all fields with UTF-8 encoding. final Header header = part.getHeader(); for ( final MinimalField field: header) { //可以看到此处的字符集默认采用UTF8 writeField(field, MIME.UTF8_CHARSET, out); } } } |
默认模式中的formatMultipartHeader方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
class HttpStrictMultipart extends AbstractMultipartForm { private final List<FormBodyPart> parts; public HttpStrictMultipart( final Charset charset, final String boundary, final List<FormBodyPart> parts) { super (charset, boundary); this .parts = parts; } @Override public List<FormBodyPart> getBodyParts() { return this .parts; } @Override protected void formatMultipartHeader( final FormBodyPart part, final OutputStream out) throws IOException { // For strict, we output all fields with MIME-standard encoding. //从上面注释中可以看到,此处的字符集采用的是默认字符集即ASCII(下面MIME类中可以看到) final Header header = part.getHeader(); for ( final MinimalField field: header) { writeField(field, out); } } } |
MIME类
1
2
3
4
5
6
7
8
9
10
11
|
public final class MIME { public static final String CONTENT_TYPE = "Content-Type" ; public static final String CONTENT_TRANSFER_ENC = "Content-Transfer-Encoding" ; public static final String CONTENT_DISPOSITION = "Content-Disposition" ; public static final String ENC_8BIT = "8bit" ; public static final String ENC_BINARY = "binary" ; /** The default character set to be used, i.e. "US-ASCII" */ public static final Charset DEFAULT_CHARSET = Consts.ASCII; /** UTF-8 is used for RFC6532 */ public static final Charset UTF8_CHARSET = Consts.UTF_8; } |
解决方法
知道乱码产生的根源,乱码问题也就好解决了,解决方式有两种
设置mode为:BROWSER_COMPATIBLE,并设置字符集为UTF8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension, File fileToUpload) { MultipartEntityBuilder builder = MultipartEntityBuilder.create(); //设置模式为BROWSER_COMPATIBLE,并设置字符集为UTF8 builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE); builder.setCharset(Charset.forName( "UTF-8" )); builder.addTextBody( "scenarioId" , scenarioId.toString()); for (String groupId : groupIds) { builder.addTextBody( "groupIds" , groupId); } builder.addTextBody( "extension" , extension); builder.addPart( "fileToUpload" , new FileBody(fileToUpload)); builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName()); builder.addTextBody( "isSplit" , "false" ); builder.addTextBody( "isRefresh" , "false" ); return builder.build(); } |
设置模式为:RFC6532
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension, File fileToUpload) { MultipartEntityBuilder builder = MultipartEntityBuilder.create(); //设置模式为RFC6532 builder.setMode(HttpMultipartMode.RFC6532); builder.addTextBody( "scenarioId" , scenarioId.toString()); for (String groupId : groupIds) { builder.addTextBody( "groupIds" , groupId); } builder.addTextBody( "extension" , extension); builder.addPart( "fileToUpload" , new FileBody(fileToUpload)); builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName()); builder.addTextBody( "isSplit" , "false" ); builder.addTextBody( "isRefresh" , "false" ); return builder.build(); } |
以上为个人经验,希望能给大家一个参考,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/youshounianhua123/article/details/81100778