1   /* Copyright 2002-2025 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.data;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.util.Arrays;
22  
23  import org.hipparchus.util.FastMath;
24  import org.orekit.errors.OrekitException;
25  import org.orekit.errors.OrekitIOException;
26  import org.orekit.errors.OrekitMessages;
27  
28  /** Filter for Unix compressed data.
29   * @author Luc Maisonobe
30   * @since 9.2
31   */
32  public class UnixCompressFilter implements DataFilter {
33  
34      /** Suffix for Unix compressed files. */
35      private static final String SUFFIX = ".Z";
36  
37      /** Empty constructor.
38       * <p>
39       * This constructor is not strictly necessary, but it prevents spurious
40       * javadoc warnings with JDK 18 and later.
41       * </p>
42       * @since 12.0
43       */
44      public UnixCompressFilter() {
45          // nothing to do
46      }
47  
48      /** {@inheritDoc} */
49      @Override
50      public DataSource filter(final DataSource original) {
51          final String            oName   = original.getName();
52          final DataSource.Opener oOpener = original.getOpener();
53          if (oName.endsWith(SUFFIX)) {
54              final String                  fName   = oName.substring(0, oName.length() - SUFFIX.length());
55              final DataSource.StreamOpener fOpener = () -> new ZInputStream(oName, new Buffer(oOpener.openStreamOnce()));
56              return new DataSource(fName, fOpener);
57          } else {
58              return original;
59          }
60      }
61  
62      /** Filtering of Unix compressed stream. */
63      private static class ZInputStream extends InputStream {
64  
65          /** First magic header byte. */
66          private static final int MAGIC_HEADER_1 = 0x1f;
67  
68          /** Second magic header byte. */
69          private static final int MAGIC_HEADER_2 = 0x9d;
70  
71          /** Byte bits width. */
72          private static final int BYTE_WIDTH = 8;
73  
74          /** Initial bits width. */
75          private static final int INIT_WIDTH = 9;
76  
77          /** Reset table code. */
78          private static final int RESET_TABLE = 256;
79  
80          /** First non-predefined entry. */
81          private static final int FIRST = 257;
82  
83          /** File name. */
84          private final String name;
85  
86          /** Indicator for end of input. */
87          private boolean endOfInput;
88  
89          /** Common sequences table. */
90          private final UncompressedSequence[] table;
91  
92          /** Next available entry in the table. */
93          private int available;
94  
95          /** Flag for block mode when table is full. */
96          private final boolean blockMode;
97  
98          /** Maximum width allowed. */
99          private final int maxWidth;
100 
101         /** Current input width in bits. */
102         private int currentWidth;
103 
104         /** Maximum key that can be encoded with current width. */
105         private int currentMaxKey;
106 
107         /** Number of bits read since last reset. */
108         private int bitsRead;
109 
110         /** Lookahead byte, already read but not yet used. */
111         private int lookAhead;
112 
113         /** Number of bits in the lookahead byte. */
114         private int lookAheadWidth;
115 
116         /** Input buffer. */
117         private Buffer input;
118 
119         /** Previous uncompressed sequence output. */
120         private UncompressedSequence previousSequence;
121 
122         /** Uncompressed sequence being output. */
123         private UncompressedSequence currentSequence;
124 
125         /** Number of bytes of the current sequence already output. */
126         private int alreadyOutput;
127 
128         /** Simple constructor.
129          * @param name file name
130          * @param input underlying compressed stream
131          * @exception IOException if first bytes cannot be read
132          */
133         ZInputStream(final String name, final Buffer input)
134             throws IOException {
135 
136             this.name       = name;
137             this.input      = input;
138             this.endOfInput = false;
139 
140             // check header
141             if (input.getByte() != MAGIC_HEADER_1 || input.getByte() != MAGIC_HEADER_2) {
142                 throw new OrekitException(OrekitMessages.NOT_A_SUPPORTED_UNIX_COMPRESSED_FILE, name);
143             }
144 
145             final int header3 = input.getByte();
146             this.blockMode = (header3 & 0x80) != 0;
147             this.maxWidth  = header3 & 0x1f;
148 
149             // set up table, with at least all entries for one byte
150             this.table = new UncompressedSequence[1 << FastMath.max(INIT_WIDTH, maxWidth)];
151             for (int i = 0; i < FIRST; ++i) {
152                 table[i] = new UncompressedSequence(null, (byte) i);
153             }
154 
155             // initialize decompression state
156             initialize();
157 
158         }
159 
160         /** Initialize compression state.
161          */
162         private void initialize() {
163             this.available        = FIRST;
164             this.bitsRead         = 0;
165             this.lookAhead        = 0;
166             this.lookAheadWidth   = 0;
167             this.currentWidth     = INIT_WIDTH;
168             this.currentMaxKey    = (1 << currentWidth) - 1;
169             this.previousSequence = null;
170             this.currentSequence  = null;
171             this.alreadyOutput    = 0;
172         }
173 
174         /** Read next input key.
175          * @return next input key or -1 if end of stream is reached
176          * @exception IOException if a read error occurs
177          */
178         private int nextKey() throws IOException {
179 
180             int keyMask = (1 << currentWidth) - 1;
181 
182             while (true) {
183                 // initialize key with the last bits remaining from previous read
184                 int key = lookAhead & keyMask;
185 
186                 // read more bits until key is complete
187                 for (int remaining = currentWidth - lookAheadWidth; remaining > 0; remaining -= BYTE_WIDTH) {
188                     lookAhead       = input.getByte();
189                     lookAheadWidth += BYTE_WIDTH;
190                     if (lookAhead < 0) {
191                         if (key == 0 || key == keyMask) {
192                             // the key is either a set of padding 0 bits
193                             // or a full key containing -1 if read() is called several times after EOF
194                             return -1;
195                         } else {
196                             // end of stream encountered in the middle of a read
197                             throw new OrekitIOException(OrekitMessages.UNEXPECTED_END_OF_FILE, name);
198                         }
199                     }
200                     key = (key | lookAhead << (currentWidth - remaining)) & keyMask;
201                 }
202 
203                 // store the extra bits already read in the lookahead byte for next call
204                 lookAheadWidth -= currentWidth;
205                 lookAhead       = lookAhead >>> (BYTE_WIDTH - lookAheadWidth);
206 
207                 bitsRead += currentWidth;
208 
209                 if (blockMode && key == RESET_TABLE) {
210 
211                     // skip the padding bits inserted when compressor flushed its buffer
212                     final int superSize = currentWidth * 8;
213                     int padding = (superSize - 1 - (bitsRead + superSize - 1) % superSize) / 8;
214                     while (padding-- > 0) {
215                         input.getByte();
216                     }
217 
218                     // reset the table to handle a new block and read again next key
219                     Arrays.fill(table, FIRST, table.length, null);
220                     initialize();
221 
222                     // reset the lookahead mask as the current width has changed
223                     keyMask = (1 << currentWidth) - 1;
224 
225                 } else {
226                     // return key at current width
227                     return key;
228                 }
229 
230             }
231 
232         }
233 
234         /** Select next uncompressed sequence to output.
235          * @return true if there is a next sequence
236          * @exception IOException if a read error occurs
237          */
238         private boolean selectNext() throws IOException {
239 
240             // read next input key
241             final int key = nextKey();
242             if (key < 0) {
243                 // end of stream reached
244                 return false;
245             }
246 
247             if (previousSequence != null && available < table.length) {
248                 // update the table with the next uncompressed byte appended to previous sequence
249                 final byte nextByte;
250                 if (key == available) {
251                     nextByte = previousSequence.getByte(0);
252                 } else if (table[key] != null) {
253                     nextByte = table[key].getByte(0);
254                 } else {
255                     throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
256                 }
257                 table[available++] = new UncompressedSequence(previousSequence, nextByte);
258                 if (available > currentMaxKey && currentWidth < maxWidth) {
259                     // we need to increase the key size
260                     currentMaxKey = (1 << ++currentWidth) - 1;
261                 }
262             }
263 
264             currentSequence = table[key];
265             if (currentSequence == null) {
266                 // the compressed file references a non-existent table entry
267                 // (this is not the well-known case of entry being used just before
268                 //  being defined, which is already handled above), the file is corrupted
269                 throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
270             }
271             alreadyOutput   = 0;
272 
273             return true;
274 
275         }
276 
277         /** {@inheritDoc} */
278         @Override
279         public int read() throws IOException {
280             final byte[] b = new byte[1];
281             return read(b, 0, 1) < 0 ? -1 : b[0];
282         }
283 
284         /** {@inheritDoc} */
285         @Override
286         public int read(final byte[] b, final int offset, final int len) throws IOException {
287 
288             if (currentSequence == null) {
289                 if (endOfInput || !selectNext()) {
290                     // we have reached end of data
291                     endOfInput = true;
292                     return -1;
293                 }
294             }
295 
296             // copy as many bytes as possible from current sequence
297             final int n = FastMath.min(len, currentSequence.length() - alreadyOutput);
298             for (int i = 0; i < n; ++i) {
299                 b[offset + i] = currentSequence.getByte(alreadyOutput++);
300             }
301             if (alreadyOutput >= currentSequence.length()) {
302                 // we have just exhausted the current sequence
303                 previousSequence = currentSequence;
304                 currentSequence  = null;
305                 alreadyOutput    = 0;
306             }
307 
308             return n;
309 
310         }
311 
312         /** {@inheritDoc} */
313         @Override
314         public int available() {
315             return currentSequence == null ? 0 : currentSequence.length() - alreadyOutput;
316         }
317 
318     }
319 
320     /** Uncompressed bits sequence. */
321     private static class UncompressedSequence {
322 
323         /** Prefix sequence (null if this is a start sequence). */
324         private final UncompressedSequence prefix;
325 
326         /** Last byte in the sequence. */
327         private final byte last;
328 
329         /** Index of the last byte in the sequence (i.e. length - 1). */
330         private final int index;
331 
332         /** Simple constructor.
333          * @param prefix prefix of the sequence (null if this is a start sequence)
334          * @param last last byte of the sequence
335          */
336         UncompressedSequence(final UncompressedSequence prefix, final byte last) {
337             this.prefix = prefix;
338             this.last   = last;
339             this.index  = prefix == null ? 0 : prefix.index + 1;
340         }
341 
342         /** Get the length of the sequence.
343          * @return length of the sequence
344          */
345         public int length() {
346             return index + 1;
347         }
348 
349         /** Get a byte from the sequence.
350          * @param outputIndex index of the byte in the sequence, counting from 0
351          * @return byte at {@code outputIndex}
352          */
353         public byte getByte(final int outputIndex) {
354             return index == outputIndex ? last : prefix.getByte(outputIndex);
355         }
356 
357     }
358 
359     /** Buffer for reading input data. */
360     private static class Buffer {
361 
362         /** Size of input/output buffers. */
363         private static final int BUFFER_SIZE = 4096;
364 
365         /** Underlying compressed stream. */
366         private final InputStream input;
367 
368         /** Buffer data. */
369         private final byte[] data;
370 
371         /** Start of pending data. */
372         private int start;
373 
374         /** End of pending data. */
375         private int end;
376 
377         /** Simple constructor.
378          * @param input input stream
379          */
380         Buffer(final InputStream input) {
381             this.input = input;
382             this.data  = new byte[BUFFER_SIZE];
383             this.start = 0;
384             this.end   = start;
385         }
386 
387         /** Get one input byte.
388          * @return input byte, or -1 if end of input has been reached
389          * @throws IOException if input data cannot be read
390          */
391         private int getByte() throws IOException {
392 
393             if (start == end) {
394                 // the buffer is empty
395                 start = 0;
396                 end   = input.read(data);
397                 if (end == -1) {
398                     return -1;
399                 }
400             }
401 
402             return data[start++] & 0xFF;
403 
404         }
405 
406     }
407 }