1 /* Copyright 2002-2025 CS GROUP
2 * Licensed to CS GROUP (CS) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * CS licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.orekit.data;
18
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.util.Arrays;
22
23 import org.hipparchus.util.FastMath;
24 import org.orekit.errors.OrekitException;
25 import org.orekit.errors.OrekitIOException;
26 import org.orekit.errors.OrekitMessages;
27
28 /** Filter for Unix compressed data.
29 * @author Luc Maisonobe
30 * @since 9.2
31 */
32 public class UnixCompressFilter implements DataFilter {
33
34 /** Suffix for Unix compressed files. */
35 private static final String SUFFIX = ".Z";
36
37 /** Empty constructor.
38 * <p>
39 * This constructor is not strictly necessary, but it prevents spurious
40 * javadoc warnings with JDK 18 and later.
41 * </p>
42 * @since 12.0
43 */
44 public UnixCompressFilter() {
45 // nothing to do
46 }
47
48 /** {@inheritDoc} */
49 @Override
50 public DataSource filter(final DataSource original) {
51 final String oName = original.getName();
52 final DataSource.Opener oOpener = original.getOpener();
53 if (oName.endsWith(SUFFIX)) {
54 final String fName = oName.substring(0, oName.length() - SUFFIX.length());
55 final DataSource.StreamOpener fOpener = () -> new ZInputStream(oName, new Buffer(oOpener.openStreamOnce()));
56 return new DataSource(fName, fOpener);
57 } else {
58 return original;
59 }
60 }
61
62 /** Filtering of Unix compressed stream. */
63 private static class ZInputStream extends InputStream {
64
65 /** First magic header byte. */
66 private static final int MAGIC_HEADER_1 = 0x1f;
67
68 /** Second magic header byte. */
69 private static final int MAGIC_HEADER_2 = 0x9d;
70
71 /** Byte bits width. */
72 private static final int BYTE_WIDTH = 8;
73
74 /** Initial bits width. */
75 private static final int INIT_WIDTH = 9;
76
77 /** Reset table code. */
78 private static final int RESET_TABLE = 256;
79
80 /** First non-predefined entry. */
81 private static final int FIRST = 257;
82
83 /** File name. */
84 private final String name;
85
86 /** Indicator for end of input. */
87 private boolean endOfInput;
88
89 /** Common sequences table. */
90 private final UncompressedSequence[] table;
91
92 /** Next available entry in the table. */
93 private int available;
94
95 /** Flag for block mode when table is full. */
96 private final boolean blockMode;
97
98 /** Maximum width allowed. */
99 private final int maxWidth;
100
101 /** Current input width in bits. */
102 private int currentWidth;
103
104 /** Maximum key that can be encoded with current width. */
105 private int currentMaxKey;
106
107 /** Number of bits read since last reset. */
108 private int bitsRead;
109
110 /** Lookahead byte, already read but not yet used. */
111 private int lookAhead;
112
113 /** Number of bits in the lookahead byte. */
114 private int lookAheadWidth;
115
116 /** Input buffer. */
117 private Buffer input;
118
119 /** Previous uncompressed sequence output. */
120 private UncompressedSequence previousSequence;
121
122 /** Uncompressed sequence being output. */
123 private UncompressedSequence currentSequence;
124
125 /** Number of bytes of the current sequence already output. */
126 private int alreadyOutput;
127
128 /** Simple constructor.
129 * @param name file name
130 * @param input underlying compressed stream
131 * @exception IOException if first bytes cannot be read
132 */
133 ZInputStream(final String name, final Buffer input)
134 throws IOException {
135
136 this.name = name;
137 this.input = input;
138 this.endOfInput = false;
139
140 // check header
141 if (input.getByte() != MAGIC_HEADER_1 || input.getByte() != MAGIC_HEADER_2) {
142 throw new OrekitException(OrekitMessages.NOT_A_SUPPORTED_UNIX_COMPRESSED_FILE, name);
143 }
144
145 final int header3 = input.getByte();
146 this.blockMode = (header3 & 0x80) != 0;
147 this.maxWidth = header3 & 0x1f;
148
149 // set up table, with at least all entries for one byte
150 this.table = new UncompressedSequence[1 << FastMath.max(INIT_WIDTH, maxWidth)];
151 for (int i = 0; i < FIRST; ++i) {
152 table[i] = new UncompressedSequence(null, (byte) i);
153 }
154
155 // initialize decompression state
156 initialize();
157
158 }
159
160 /** Initialize compression state.
161 */
162 private void initialize() {
163 this.available = FIRST;
164 this.bitsRead = 0;
165 this.lookAhead = 0;
166 this.lookAheadWidth = 0;
167 this.currentWidth = INIT_WIDTH;
168 this.currentMaxKey = (1 << currentWidth) - 1;
169 this.previousSequence = null;
170 this.currentSequence = null;
171 this.alreadyOutput = 0;
172 }
173
174 /** Read next input key.
175 * @return next input key or -1 if end of stream is reached
176 * @exception IOException if a read error occurs
177 */
178 private int nextKey() throws IOException {
179
180 int keyMask = (1 << currentWidth) - 1;
181
182 while (true) {
183 // initialize key with the last bits remaining from previous read
184 int key = lookAhead & keyMask;
185
186 // read more bits until key is complete
187 for (int remaining = currentWidth - lookAheadWidth; remaining > 0; remaining -= BYTE_WIDTH) {
188 lookAhead = input.getByte();
189 lookAheadWidth += BYTE_WIDTH;
190 if (lookAhead < 0) {
191 if (key == 0 || key == keyMask) {
192 // the key is either a set of padding 0 bits
193 // or a full key containing -1 if read() is called several times after EOF
194 return -1;
195 } else {
196 // end of stream encountered in the middle of a read
197 throw new OrekitIOException(OrekitMessages.UNEXPECTED_END_OF_FILE, name);
198 }
199 }
200 key = (key | lookAhead << (currentWidth - remaining)) & keyMask;
201 }
202
203 // store the extra bits already read in the lookahead byte for next call
204 lookAheadWidth -= currentWidth;
205 lookAhead = lookAhead >>> (BYTE_WIDTH - lookAheadWidth);
206
207 bitsRead += currentWidth;
208
209 if (blockMode && key == RESET_TABLE) {
210
211 // skip the padding bits inserted when compressor flushed its buffer
212 final int superSize = currentWidth * 8;
213 int padding = (superSize - 1 - (bitsRead + superSize - 1) % superSize) / 8;
214 while (padding-- > 0) {
215 input.getByte();
216 }
217
218 // reset the table to handle a new block and read again next key
219 Arrays.fill(table, FIRST, table.length, null);
220 initialize();
221
222 // reset the lookahead mask as the current width has changed
223 keyMask = (1 << currentWidth) - 1;
224
225 } else {
226 // return key at current width
227 return key;
228 }
229
230 }
231
232 }
233
234 /** Select next uncompressed sequence to output.
235 * @return true if there is a next sequence
236 * @exception IOException if a read error occurs
237 */
238 private boolean selectNext() throws IOException {
239
240 // read next input key
241 final int key = nextKey();
242 if (key < 0) {
243 // end of stream reached
244 return false;
245 }
246
247 if (previousSequence != null && available < table.length) {
248 // update the table with the next uncompressed byte appended to previous sequence
249 final byte nextByte;
250 if (key == available) {
251 nextByte = previousSequence.getByte(0);
252 } else if (table[key] != null) {
253 nextByte = table[key].getByte(0);
254 } else {
255 throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
256 }
257 table[available++] = new UncompressedSequence(previousSequence, nextByte);
258 if (available > currentMaxKey && currentWidth < maxWidth) {
259 // we need to increase the key size
260 currentMaxKey = (1 << ++currentWidth) - 1;
261 }
262 }
263
264 currentSequence = table[key];
265 if (currentSequence == null) {
266 // the compressed file references a non-existent table entry
267 // (this is not the well-known case of entry being used just before
268 // being defined, which is already handled above), the file is corrupted
269 throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
270 }
271 alreadyOutput = 0;
272
273 return true;
274
275 }
276
277 /** {@inheritDoc} */
278 @Override
279 public int read() throws IOException {
280 final byte[] b = new byte[1];
281 return read(b, 0, 1) < 0 ? -1 : b[0];
282 }
283
284 /** {@inheritDoc} */
285 @Override
286 public int read(final byte[] b, final int offset, final int len) throws IOException {
287
288 if (currentSequence == null) {
289 if (endOfInput || !selectNext()) {
290 // we have reached end of data
291 endOfInput = true;
292 return -1;
293 }
294 }
295
296 // copy as many bytes as possible from current sequence
297 final int n = FastMath.min(len, currentSequence.length() - alreadyOutput);
298 for (int i = 0; i < n; ++i) {
299 b[offset + i] = currentSequence.getByte(alreadyOutput++);
300 }
301 if (alreadyOutput >= currentSequence.length()) {
302 // we have just exhausted the current sequence
303 previousSequence = currentSequence;
304 currentSequence = null;
305 alreadyOutput = 0;
306 }
307
308 return n;
309
310 }
311
312 /** {@inheritDoc} */
313 @Override
314 public int available() {
315 return currentSequence == null ? 0 : currentSequence.length() - alreadyOutput;
316 }
317
318 }
319
320 /** Uncompressed bits sequence. */
321 private static class UncompressedSequence {
322
323 /** Prefix sequence (null if this is a start sequence). */
324 private final UncompressedSequence prefix;
325
326 /** Last byte in the sequence. */
327 private final byte last;
328
329 /** Index of the last byte in the sequence (i.e. length - 1). */
330 private final int index;
331
332 /** Simple constructor.
333 * @param prefix prefix of the sequence (null if this is a start sequence)
334 * @param last last byte of the sequence
335 */
336 UncompressedSequence(final UncompressedSequence prefix, final byte last) {
337 this.prefix = prefix;
338 this.last = last;
339 this.index = prefix == null ? 0 : prefix.index + 1;
340 }
341
342 /** Get the length of the sequence.
343 * @return length of the sequence
344 */
345 public int length() {
346 return index + 1;
347 }
348
349 /** Get a byte from the sequence.
350 * @param outputIndex index of the byte in the sequence, counting from 0
351 * @return byte at {@code outputIndex}
352 */
353 public byte getByte(final int outputIndex) {
354 return index == outputIndex ? last : prefix.getByte(outputIndex);
355 }
356
357 }
358
359 /** Buffer for reading input data. */
360 private static class Buffer {
361
362 /** Size of input/output buffers. */
363 private static final int BUFFER_SIZE = 4096;
364
365 /** Underlying compressed stream. */
366 private final InputStream input;
367
368 /** Buffer data. */
369 private final byte[] data;
370
371 /** Start of pending data. */
372 private int start;
373
374 /** End of pending data. */
375 private int end;
376
377 /** Simple constructor.
378 * @param input input stream
379 */
380 Buffer(final InputStream input) {
381 this.input = input;
382 this.data = new byte[BUFFER_SIZE];
383 this.start = 0;
384 this.end = start;
385 }
386
387 /** Get one input byte.
388 * @return input byte, or -1 if end of input has been reached
389 * @throws IOException if input data cannot be read
390 */
391 private int getByte() throws IOException {
392
393 if (start == end) {
394 // the buffer is empty
395 start = 0;
396 end = input.read(data);
397 if (end == -1) {
398 return -1;
399 }
400 }
401
402 return data[start++] & 0xFF;
403
404 }
405
406 }
407 }