View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package com.zone.weixin4j.base64;
19  
20  import com.zone.weixin4j.util.ServerToolkits;
21  
22  import java.util.Arrays;
23  
24  /**
25   * <p>
26   * <font color="red">reference of apache pivot</font>
27   * </p>
28   * 
29   * Abstract superclass for Base-N encoders and decoders.
30   *
31   * <p>
32   * This class is thread-safe.
33   * </p>
34   *
35   * @version $Id: BaseNCodec.java 1465182 2013-04-06 04:03:12Z ggregory $
36   */
37  public abstract class BaseNCodec {
38  
39  	/**
40  	 * Holds thread context so classes can be thread-safe.
41  	 *
42  	 * This class is not itself thread-safe; each thread must allocate its own
43  	 * copy.
44  	 *
45  	 * @since 1.7
46  	 */
47  	static class Context {
48  
49  		/**
50  		 * Place holder for the bytes we're dealing with for our based logic.
51  		 * Bitwise operations store and extract the encoding or decoding from
52  		 * this variable.
53  		 */
54  		int ibitWorkArea;
55  
56  		/**
57  		 * Place holder for the bytes we're dealing with for our based logic.
58  		 * Bitwise operations store and extract the encoding or decoding from
59  		 * this variable.
60  		 */
61  		long lbitWorkArea;
62  
63  		/**
64  		 * Buffer for streaming.
65  		 */
66  		byte[] buffer;
67  
68  		/**
69  		 * Position where next character should be written in the buffer.
70  		 */
71  		int pos;
72  
73  		/**
74  		 * Position where next character should be read from the buffer.
75  		 */
76  		int readPos;
77  
78  		/**
79  		 * Boolean flag to indicate the EOF has been reached. Once EOF has been
80  		 * reached, this object becomes useless, and must be thrown away.
81  		 */
82  		boolean eof;
83  
84  		/**
85  		 * Variable tracks how many characters have been written to the current
86  		 * line. Only used when encoding. We use it to make sure each encoded
87  		 * line never goes beyond lineLength (if lineLength > 0).
88  		 */
89  		int currentLinePos;
90  
91  		/**
92  		 * Writes to the buffer only occur after every 3/5 reads when encoding,
93  		 * and every 4/8 reads when decoding. This variable helps track that.
94  		 */
95  		int modulus;
96  
97  		Context() {
98  		}
99  
100 		/**
101 		 * Returns a String useful for debugging (especially within a debugger.)
102 		 *
103 		 * @return a String useful for debugging.
104 		 */
105 		@SuppressWarnings("boxing")
106 		// OK to ignore boxing here
107 		@Override
108 		public String toString() {
109 			return String.format(
110 					"%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, "
111 							+ "modulus=%s, pos=%s, readPos=%s]", this
112 							.getClass().getSimpleName(), Arrays
113 							.toString(buffer), currentLinePos, eof,
114 					ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
115 		}
116 	}
117 
118 	/**
119 	 * EOF
120 	 *
121 	 * @since 1.7
122 	 */
123 	static final int EOF = -1;
124 
125 	/**
126 	 * MIME chunk size per RFC 2045 section 6.8.
127 	 *
128 	 * <p>
129 	 * The {@value} character limit does not count the trailing CRLF, but counts
130 	 * all other characters, including any equal signs.
131 	 * </p>
132 	 *
133 	 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
134 	 *      6.8</a>
135 	 */
136 	public static final int MIME_CHUNK_SIZE = 76;
137 
138 	/**
139 	 * PEM chunk size per RFC 1421 section 4.3.2.4.
140 	 *
141 	 * <p>
142 	 * The {@value} character limit does not count the trailing CRLF, but counts
143 	 * all other characters, including any equal signs.
144 	 * </p>
145 	 *
146 	 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section
147 	 *      4.3.2.4</a>
148 	 */
149 	public static final int PEM_CHUNK_SIZE = 64;
150 
151 	private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
152 
153 	/**
154 	 * Defines the default buffer size - currently {@value} - must be large
155 	 * enough for at least one encoded block+separator
156 	 */
157 	private static final int DEFAULT_BUFFER_SIZE = 8192;
158 
159 	/** Mask used to extract 8 bits, used in decoding bytes */
160 	protected static final int MASK_8BITS = 0xff;
161 
162 	/**
163 	 * Byte used to pad output.
164 	 */
165 	protected static final byte PAD_DEFAULT = '='; // Allow static access to
166 													// default
167 
168 	protected final byte PAD = PAD_DEFAULT; // instance variable just in case it
169 											// needs to vary later
170 
171 	/**
172 	 * Number of bytes in each full block of unencoded data, e.g. 4 for Base64
173 	 * and 5 for Base32
174 	 */
175 	private final int unencodedBlockSize;
176 
177 	/**
178 	 * Number of bytes in each full block of encoded data, e.g. 3 for Base64 and
179 	 * 8 for Base32
180 	 */
181 	private final int encodedBlockSize;
182 
183 	/**
184 	 * Chunksize for encoding. Not used when decoding. A value of zero or less
185 	 * implies no chunking of the encoded data. Rounded down to nearest multiple
186 	 * of encodedBlockSize.
187 	 */
188 	protected final int lineLength;
189 
190 	/**
191 	 * Size of chunk separator. Not used unless {@link #lineLength} > 0.
192 	 */
193 	private final int chunkSeparatorLength;
194 
195 	/**
196 	 * Note <code>lineLength</code> is rounded down to the nearest multiple of
197 	 * {@link #encodedBlockSize} If <code>chunkSeparatorLength</code> is zero,
198 	 * then chunking is disabled.
199 	 * 
200 	 * @param unencodedBlockSize
201 	 *            the size of an unencoded block (e.g. Base64 = 3)
202 	 * @param encodedBlockSize
203 	 *            the size of an encoded block (e.g. Base64 = 4)
204 	 * @param lineLength
205 	 *            if &gt; 0, use chunking with a length <code>lineLength</code>
206 	 * @param chunkSeparatorLength
207 	 *            the chunk separator length, if relevant
208 	 */
209 	protected BaseNCodec(final int unencodedBlockSize,
210 			final int encodedBlockSize, final int lineLength,
211 			final int chunkSeparatorLength) {
212 		this.unencodedBlockSize = unencodedBlockSize;
213 		this.encodedBlockSize = encodedBlockSize;
214 		final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
215 		this.lineLength = useChunking ? (lineLength / encodedBlockSize)
216 				* encodedBlockSize : 0;
217 		this.chunkSeparatorLength = chunkSeparatorLength;
218 	}
219 
220 	/**
221 	 * Returns true if this object has buffered data for reading.
222 	 *
223 	 * @param context
224 	 *            the context to be used
225 	 * @return true if there is data still available for reading.
226 	 */
227 	boolean hasData(final Context context) { // package protected for access
228 												// from I/O streams
229 		return context.buffer != null;
230 	}
231 
232 	/**
233 	 * Returns the amount of buffered data available for reading.
234 	 *
235 	 * @param context
236 	 *            the context to be used
237 	 * @return The amount of buffered data available for reading.
238 	 */
239 	int available(final Context context) { // package protected for access from
240 											// I/O streams
241 		return context.buffer != null ? context.pos - context.readPos : 0;
242 	}
243 
244 	/**
245 	 * Get the default buffer size. Can be overridden.
246 	 *
247 	 * @return {@link #DEFAULT_BUFFER_SIZE}
248 	 */
249 	protected int getDefaultBufferSize() {
250 		return DEFAULT_BUFFER_SIZE;
251 	}
252 
253 	/**
254 	 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
255 	 * 
256 	 * @param context
257 	 *            the context to be used
258 	 */
259 	private byte[] resizeBuffer(final Context context) {
260 		if (context.buffer == null) {
261 			context.buffer = new byte[getDefaultBufferSize()];
262 			context.pos = 0;
263 			context.readPos = 0;
264 		} else {
265 			final byte[] b = new byte[context.buffer.length
266 					* DEFAULT_BUFFER_RESIZE_FACTOR];
267 			System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
268 			context.buffer = b;
269 		}
270 		return context.buffer;
271 	}
272 
273 	/**
274 	 * Ensure that the buffer has room for <code>size</code> bytes
275 	 *
276 	 * @param size
277 	 *            minimum spare space required
278 	 * @param context
279 	 *            the context to be used
280 	 */
281 	protected byte[] ensureBufferSize(final int size, final Context context) {
282 		if ((context.buffer == null)
283 				|| (context.buffer.length < context.pos + size)) {
284 			return resizeBuffer(context);
285 		}
286 		return context.buffer;
287 	}
288 
289 	/**
290 	 * Extracts buffered data into the provided byte[] array, starting at
291 	 * position bPos, up to a maximum of bAvail bytes. Returns how many bytes
292 	 * were actually extracted.
293 	 * <p>
294 	 * Package protected for access from I/O streams.
295 	 *
296 	 * @param b
297 	 *            byte[] array to extract the buffered data into.
298 	 * @param bPos
299 	 *            position in byte[] array to start extraction at.
300 	 * @param bAvail
301 	 *            amount of bytes we're allowed to extract. We may extract fewer
302 	 *            (if fewer are available).
303 	 * @param context
304 	 *            the context to be used
305 	 * @return The number of bytes successfully extracted into the provided
306 	 *         byte[] array.
307 	 */
308 	int readResults(final byte[] b, final int bPos, final int bAvail,
309 			final Context context) {
310 		if (context.buffer != null) {
311 			final int len = Math.min(available(context), bAvail);
312 			System.arraycopy(context.buffer, context.readPos, b, bPos, len);
313 			context.readPos += len;
314 			if (context.readPos >= context.pos) {
315 				context.buffer = null; // so hasData() will return false, and
316 										// this method can return -1
317 			}
318 			return len;
319 		}
320 		return context.eof ? EOF : 0;
321 	}
322 
323 	/**
324 	 * Checks if a byte value is whitespace or not. Whitespace is taken to mean:
325 	 * space, tab, CR, LF
326 	 * 
327 	 * @param byteToCheck
328 	 *            the byte to check
329 	 * @return true if byte is whitespace, false otherwise
330 	 */
331 	protected static boolean isWhiteSpace(final byte byteToCheck) {
332 		switch (byteToCheck) {
333 		case ' ':
334 		case '\n':
335 		case '\r':
336 		case '\t':
337 			return true;
338 		default:
339 			return false;
340 		}
341 	}
342 
343 	/**
344 	 * Encodes a byte[] containing binary data, into a String containing
345 	 * characters in the Base-N alphabet. Uses UTF8 encoding.
346 	 *
347 	 * @param pArray
348 	 *            a byte array containing binary data
349 	 * @return A String containing only Base-N character data
350 	 */
351 	public String encodeToString(final byte[] pArray) {
352 		return ServerToolkits.newStringUtf8(encode(pArray));
353 	}
354 
355 	/**
356 	 * Encodes a byte[] containing binary data, into a String containing
357 	 * characters in the appropriate alphabet. Uses UTF8 encoding.
358 	 *
359 	 * @param pArray
360 	 *            a byte array containing binary data
361 	 * @return String containing only character data in the appropriate
362 	 *         alphabet.
363 	 */
364 	public String encodeAsString(final byte[] pArray) {
365 		return ServerToolkits.newStringUtf8(encode(pArray));
366 	}
367 
368 
369 
370 	/**
371 	 * Decodes a String containing characters in the Base-N alphabet.
372 	 *
373 	 * @param pArray
374 	 *            A String containing Base-N character data
375 	 * @return a byte array containing binary data
376 	 */
377 	public byte[] decode(final String pArray) {
378 		return decode(ServerToolkits.getBytesUtf8(pArray));
379 	}
380 
381 	/**
382 	 * Decodes a byte[] containing characters in the Base-N alphabet.
383 	 *
384 	 * @param pArray
385 	 *            A byte array containing Base-N character data
386 	 * @return a byte array containing binary data
387 	 */
388 	public byte[] decode(final byte[] pArray) {
389 		if (pArray == null || pArray.length == 0) {
390 			return pArray;
391 		}
392 		final Context context = new Context();
393 		decode(pArray, 0, pArray.length, context);
394 		decode(pArray, 0, EOF, context); // Notify decoder of EOF.
395 		final byte[] result = new byte[context.pos];
396 		readResults(result, 0, result.length, context);
397 		return result;
398 	}
399 
400 	/**
401 	 * Encodes a byte[] containing binary data, into a byte[] containing
402 	 * characters in the alphabet.
403 	 *
404 	 * @param pArray
405 	 *            a byte array containing binary data
406 	 * @return A byte array containing only the basen alphabetic character data
407 	 */
408 	public byte[] encode(final byte[] pArray) {
409 		if (pArray == null || pArray.length == 0) {
410 			return pArray;
411 		}
412 		final Context context = new Context();
413 		encode(pArray, 0, pArray.length, context);
414 		encode(pArray, 0, EOF, context); // Notify encoder of EOF.
415 		final byte[] buf = new byte[context.pos - context.readPos];
416 		readResults(buf, 0, buf.length, context);
417 		return buf;
418 	}
419 
420 	// package protected for access from I/O streams
421 	abstract void encode(byte[] pArray, int i, int length, Context context);
422 
423 	// package protected for access from I/O streams
424 	abstract void decode(byte[] pArray, int i, int length, Context context);
425 
426 	/**
427 	 * Returns whether or not the <code>octet</code> is in the current alphabet.
428 	 * Does not allow whitespace or pad.
429 	 *
430 	 * @param value
431 	 *            The value to test
432 	 *
433 	 * @return {@code true} if the value is defined in the current alphabet,
434 	 *         {@code false} otherwise.
435 	 */
436 	protected abstract boolean isInAlphabet(byte value);
437 
438 	/**
439 	 * Tests a given byte array to see if it contains only valid characters
440 	 * within the alphabet. The method optionally treats whitespace and pad as
441 	 * valid.
442 	 *
443 	 * @param arrayOctet
444 	 *            byte array to test
445 	 * @param allowWSPad
446 	 *            if {@code true}, then whitespace and PAD are also allowed
447 	 *
448 	 * @return {@code true} if all bytes are valid characters in the alphabet or
449 	 *         if the byte array is empty; {@code false}, otherwise
450 	 */
451 	public boolean isInAlphabet(final byte[] arrayOctet,
452 			final boolean allowWSPad) {
453 		for (int i = 0; i < arrayOctet.length; i++) {
454 			if (!isInAlphabet(arrayOctet[i])
455 					&& (!allowWSPad || (arrayOctet[i] != PAD)
456 							&& !isWhiteSpace(arrayOctet[i]))) {
457 				return false;
458 			}
459 		}
460 		return true;
461 	}
462 
463 	/**
464 	 * Tests a given String to see if it contains only valid characters within
465 	 * the alphabet. The method treats whitespace and PAD as valid.
466 	 *
467 	 * @param basen
468 	 *            String to test
469 	 * @return {@code true} if all characters in the String are valid characters
470 	 *         in the alphabet or if the String is empty; {@code false},
471 	 *         otherwise
472 	 * @see #isInAlphabet(byte[], boolean)
473 	 */
474 	public boolean isInAlphabet(final String basen) {
475 		return isInAlphabet(ServerToolkits.getBytesUtf8(basen), true);
476 	}
477 
478 	/**
479 	 * Tests a given byte array to see if it contains any characters within the
480 	 * alphabet or PAD.
481 	 *
482 	 * Intended for use in checking line-ending arrays
483 	 *
484 	 * @param arrayOctet
485 	 *            byte array to test
486 	 * @return {@code true} if any byte is a valid character in the alphabet or
487 	 *         PAD; {@code false} otherwise
488 	 */
489 	protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
490 		if (arrayOctet == null) {
491 			return false;
492 		}
493 		for (final byte element : arrayOctet) {
494 			if (PAD == element || isInAlphabet(element)) {
495 				return true;
496 			}
497 		}
498 		return false;
499 	}
500 
501 	/**
502 	 * Calculates the amount of space needed to encode the supplied array.
503 	 *
504 	 * @param pArray
505 	 *            byte[] array which will later be encoded
506 	 *
507 	 * @return amount of space needed to encoded the supplied array. Returns a
508 	 *         long since a max-len array will require > Integer.MAX_VALUE
509 	 */
510 	public long getEncodedLength(final byte[] pArray) {
511 		// Calculate non-chunked size - rounded up to allow for padding
512 		// cast to long is needed to avoid possibility of overflow
513 		long len = ((pArray.length + unencodedBlockSize - 1) / unencodedBlockSize)
514 				* (long) encodedBlockSize;
515 		if (lineLength > 0) { // We're using chunking
516 			// Round up to nearest multiple
517 			len += ((len + lineLength - 1) / lineLength) * chunkSeparatorLength;
518 		}
519 		return len;
520 	}
521 }