1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  package org.htmlunit.protocol.data;
16  
17  import static java.nio.charset.StandardCharsets.US_ASCII;
18  import static org.htmlunit.protocol.data.DataURLConnection.DATA_PREFIX;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.net.URL;
22  import java.nio.charset.Charset;
23  import java.nio.charset.IllegalCharsetNameException;
24  import java.nio.charset.UnsupportedCharsetException;
25  import java.util.Base64;
26  
27  import org.apache.commons.lang3.StringUtils;
28  import org.htmlunit.util.MimeType;
29  import org.htmlunit.util.UrlUtils;
30  
31  
32  
33  
34  
35  
36  
37  
38  public class DataUrlDecoder {
39      private static final Charset DEFAULT_CHARSET = US_ASCII;
40      private static final String DEFAULT_MEDIA_TYPE = MimeType.TEXT_PLAIN;
41      private final String mediaType_;
42      private final Charset charset_;
43      private final byte[] content_;
44  
45      
46  
47  
48  
49  
50  
51      protected DataUrlDecoder(final byte[] data, final String mediaType, final Charset charset) {
52          content_ = data;
53          mediaType_ = mediaType;
54          charset_ = charset;
55      }
56  
57      
58  
59  
60  
61  
62  
63      public static DataUrlDecoder decode(final URL url) throws UnsupportedEncodingException {
64          return decodeDataURL(url.toExternalForm());
65      }
66  
67      
68  
69  
70  
71  
72  
73  
74      public static DataUrlDecoder decodeDataURL(final String url) throws UnsupportedEncodingException {
75          if (!url.startsWith(DATA_PREFIX)) {
76              throw new UnsupportedEncodingException("Invalid data url: '" + url + "' (wrong prefix)");
77          }
78          final int comma = url.indexOf(',');
79          if (comma < 0) {
80              throw new UnsupportedEncodingException("Invalid data url: '" + url + "' (no data)");
81          }
82  
83          String beforeData = url.substring(DATA_PREFIX.length(), comma);
84          final boolean base64 = beforeData.endsWith(";base64");
85          if (base64) {
86              beforeData = beforeData.substring(0, beforeData.length() - 7);
87          }
88          final String mediaType = extractMediaType(beforeData);
89          final Charset charset = extractCharset(beforeData);
90  
91          try {
92              byte[] data = url.substring(comma + 1).getBytes(charset);
93              data = UrlUtils.decodeDataUrl(data, base64);
94              if (base64) {
95                  data = Base64.getDecoder().decode(data);
96              }
97              return new DataUrlDecoder(data, mediaType, charset);
98          }
99          catch (final IllegalArgumentException e) {
100             final UnsupportedEncodingException ex =
101                     new UnsupportedEncodingException("Invalid data url: '" + url + "' (data decoding failed)");
102             ex.initCause(e);
103             throw ex;
104         }
105     }
106 
107     private static Charset extractCharset(final String beforeData) {
108         if (beforeData.contains(";")) {
109             String charsetName = StringUtils.substringAfter(beforeData, ";");
110             charsetName = charsetName.trim();
111             if (charsetName.startsWith("charset=")) {
112                 charsetName = charsetName.substring(8);
113             }
114             try {
115                 return Charset.forName(charsetName);
116             }
117             catch (final UnsupportedCharsetException | IllegalCharsetNameException e) {
118                 return DEFAULT_CHARSET;
119             }
120         }
121         return DEFAULT_CHARSET;
122     }
123 
124     private static String extractMediaType(final String beforeData) {
125         if (beforeData.contains("/")) {
126             if (beforeData.contains(";")) {
127                 return org.htmlunit.util.StringUtils.substringBefore(beforeData, ";");
128             }
129             return beforeData;
130         }
131         return DEFAULT_MEDIA_TYPE;
132     }
133 
134     
135 
136 
137 
138     public String getMediaType() {
139         return mediaType_;
140     }
141 
142     
143 
144 
145 
146     public String getCharset() {
147         return charset_.name();
148     }
149 
150     
151 
152 
153 
154     public byte[] getBytes() {
155         return content_;
156     }
157 
158     
159 
160 
161 
162 
163 
164     public String getDataAsString() throws UnsupportedEncodingException {
165         return new String(content_, charset_);
166     }
167 }