UnixCompressFilter.java

  1. /* Copyright 2002-2020 CS GROUP
  2.  * Licensed to CS GROUP (CS) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * CS licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *   http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.orekit.data;

  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.util.Arrays;

  21. import org.hipparchus.util.FastMath;
  22. import org.orekit.errors.OrekitException;
  23. import org.orekit.errors.OrekitIOException;
  24. import org.orekit.errors.OrekitMessages;

  25. /** Filter for Unix compressed data.
  26.  * @author Luc Maisonobe
  27.  * @since 9.2
  28.  */
  29. public class UnixCompressFilter implements DataFilter {

  30.     /** Suffix for Unix compressed files. */
  31.     private static final String SUFFIX = ".Z";

  32.     /** {@inheritDoc} */
  33.     @Override
  34.     public NamedData filter(final NamedData original) {
  35.         final String                 oName   = original.getName();
  36.         final NamedData.StreamOpener oOpener = original.getStreamOpener();
  37.         if (oName.endsWith(SUFFIX)) {
  38.             final String                 fName   = oName.substring(0, oName.length() - SUFFIX.length());
  39.             final NamedData.StreamOpener fOpener = () -> new ZInputStream(oName, new Buffer(oOpener.openStream()));
  40.             return new NamedData(fName, fOpener);
  41.         } else {
  42.             return original;
  43.         }
  44.     }

  45.     /** Filtering of Unix compressed stream. */
  46.     private static class ZInputStream extends InputStream {

  47.         /** First magic header byte. */
  48.         private static final int MAGIC_HEADER_1 = 0x1f;

  49.         /** Second magic header byte. */
  50.         private static final int MAGIC_HEADER_2 = 0x9d;

  51.         /** Byte bits width. */
  52.         private static final int BYTE_WIDTH = 8;

  53.         /** Initial bits width. */
  54.         private static final int INIT_WIDTH = 9;

  55.         /** Reset table code. */
  56.         private static final int RESET_TABLE = 256;

  57.         /** First non-predefined entry. */
  58.         private static final int FIRST = 257;

  59.         /** File name. */
  60.         private final String name;

  61.         /** Indicator for end of input. */
  62.         private boolean endOfInput;

  63.         /** Common sequences table. */
  64.         private final UncompressedSequence[] table;

  65.         /** Next available entry in the table. */
  66.         private int available;

  67.         /** Flag for block mode when table is full. */
  68.         private final boolean blockMode;

  69.         /** Maximum width allowed. */
  70.         private final int maxWidth;

  71.         /** Current input width in bits. */
  72.         private int currentWidth;

  73.         /** Maximum key that can be encoded with current width. */
  74.         private int currentMaxKey;

  75.         /** Number of bits read since last reset. */
  76.         private int bitsRead;

  77.         /** Lookahead byte, already read but not yet used. */
  78.         private int lookAhead;

  79.         /** Number of bits in the lookahead byte. */
  80.         private int lookAheadWidth;

  81.         /** Input buffer. */
  82.         private Buffer input;

  83.         /** Previous uncompressed sequence output. */
  84.         private UncompressedSequence previousSequence;

  85.         /** Uncompressed sequence being output. */
  86.         private UncompressedSequence currentSequence;

  87.         /** Number of bytes of the current sequence already output. */
  88.         private int alreadyOutput;

  89.         /** Simple constructor.
  90.          * @param name file name
  91.          * @param input underlying compressed stream
  92.          * @exception IOException if first bytes cannot be read
  93.          */
  94.         ZInputStream(final String name, final Buffer input)
  95.             throws IOException {

  96.             this.name       = name;
  97.             this.input      = input;
  98.             this.endOfInput = false;

  99.             // check header
  100.             if (input.getByte() != MAGIC_HEADER_1 || input.getByte() != MAGIC_HEADER_2) {
  101.                 throw new OrekitException(OrekitMessages.NOT_A_SUPPORTED_UNIX_COMPRESSED_FILE, name);
  102.             }

  103.             final int header3 = input.getByte();
  104.             this.blockMode = (header3 & 0x80) != 0;
  105.             this.maxWidth  = header3 & 0x1f;

  106.             // set up table, with at least all entries for one byte
  107.             this.table = new UncompressedSequence[1 << FastMath.max(INIT_WIDTH, maxWidth)];
  108.             for (int i = 0; i < FIRST; ++i) {
  109.                 table[i] = new UncompressedSequence(null, (byte) i);
  110.             }

  111.             // initialize decompression state
  112.             initialize();

  113.         }

  114.         /** Initialize compression state.
  115.          */
  116.         private void initialize() {
  117.             this.available        = FIRST;
  118.             this.bitsRead         = 0;
  119.             this.lookAhead        = 0;
  120.             this.lookAheadWidth   = 0;
  121.             this.currentWidth     = INIT_WIDTH;
  122.             this.currentMaxKey    = (1 << currentWidth) - 1;
  123.             this.previousSequence = null;
  124.             this.currentSequence  = null;
  125.             this.alreadyOutput    = 0;
  126.         }

  127.         /** Read next input key.
  128.          * @return next input key or -1 if end of stream is reached
  129.          * @exception IOException if a read error occurs
  130.          */
  131.         private int nextKey() throws IOException {

  132.             int keyMask = (1 << currentWidth) - 1;

  133.             while (true) {
  134.                 // initialize key with the last bits remaining from previous read
  135.                 int key = lookAhead & keyMask;

  136.                 // read more bits until key is complete
  137.                 for (int remaining = currentWidth - lookAheadWidth; remaining > 0; remaining -= BYTE_WIDTH) {
  138.                     lookAhead       = input.getByte();
  139.                     lookAheadWidth += BYTE_WIDTH;
  140.                     if (lookAhead < 0) {
  141.                         if (key == 0 || key == keyMask) {
  142.                             // the key is either a set of padding 0 bits
  143.                             // or a full key containing -1 if read() is called several times after EOF
  144.                             return -1;
  145.                         } else {
  146.                             // end of stream encountered in the middle of a read
  147.                             throw new OrekitIOException(OrekitMessages.UNEXPECTED_END_OF_FILE, name);
  148.                         }
  149.                     }
  150.                     key = (key | lookAhead << (currentWidth - remaining)) & keyMask;
  151.                 }

  152.                 // store the extra bits already read in the lookahead byte for next call
  153.                 lookAheadWidth -= currentWidth;
  154.                 lookAhead       = lookAhead >>> (BYTE_WIDTH - lookAheadWidth);

  155.                 bitsRead += currentWidth;

  156.                 if (blockMode && key == RESET_TABLE) {

  157.                     // skip the padding bits inserted when compressor flushed its buffer
  158.                     final int superSize = currentWidth * 8;
  159.                     int padding = (superSize - 1 - (bitsRead + superSize - 1) % superSize) / 8;
  160.                     while (padding-- > 0) {
  161.                         input.getByte();
  162.                     }

  163.                     // reset the table to handle a new block and read again next key
  164.                     Arrays.fill(table, FIRST, table.length, null);
  165.                     initialize();

  166.                     // reset the lookahead mask as the current width has changed
  167.                     keyMask = (1 << currentWidth) - 1;

  168.                 } else {
  169.                     // return key at current width
  170.                     return key;
  171.                 }

  172.             }

  173.         }

  174.         /** Select next uncompressed sequence to output.
  175.          * @return true if there is a next sequence
  176.          * @exception IOException if a read error occurs
  177.          */
  178.         private boolean selectNext() throws IOException {

  179.             // read next input key
  180.             final int key = nextKey();
  181.             if (key < 0) {
  182.                 // end of stream reached
  183.                 return false;
  184.             }

  185.             if (previousSequence != null && available < table.length) {
  186.                 // update the table with the next uncompressed byte appended to previous sequence
  187.                 final byte nextByte;
  188.                 if (key == available) {
  189.                     nextByte = previousSequence.getByte(0);
  190.                 } else if (table[key] != null) {
  191.                     nextByte = table[key].getByte(0);
  192.                 } else {
  193.                     throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
  194.                 }
  195.                 table[available++] = new UncompressedSequence(previousSequence, nextByte);
  196.                 if (available > currentMaxKey && currentWidth < maxWidth) {
  197.                     // we need to increase the key size
  198.                     currentMaxKey = (1 << ++currentWidth) - 1;
  199.                 }
  200.             }

  201.             currentSequence = table[key];
  202.             if (currentSequence == null) {
  203.                 // the compressed file references a non-existent table entry
  204.                 // (this is not the well-known case of entry being used just before
  205.                 //  being defined, which is already handled above), the file is corrupted
  206.                 throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
  207.             }
  208.             alreadyOutput   = 0;

  209.             return true;

  210.         }

  211.         /** {@inheritDoc} */
  212.         @Override
  213.         public int read() throws IOException {
  214.             final byte[] b = new byte[1];
  215.             return read(b, 0, 1) < 0 ? -1 : b[0];
  216.         }

  217.         /** {@inheritDoc} */
  218.         @Override
  219.         public int read(final byte[] b, final int offset, final int len) throws IOException {

  220.             if (currentSequence == null) {
  221.                 if (endOfInput || !selectNext()) {
  222.                     // we have reached end of data
  223.                     endOfInput = true;
  224.                     return -1;
  225.                 }
  226.             }

  227.             // copy as many bytes as possible from current sequence
  228.             final int n = FastMath.min(len, currentSequence.length() - alreadyOutput);
  229.             for (int i = 0; i < n; ++i) {
  230.                 b[offset + i] = currentSequence.getByte(alreadyOutput++);
  231.             }
  232.             if (alreadyOutput >= currentSequence.length()) {
  233.                 // we have just exhausted the current sequence
  234.                 previousSequence = currentSequence;
  235.                 currentSequence  = null;
  236.                 alreadyOutput    = 0;
  237.             }

  238.             return n;

  239.         }

  240.         /** {@inheritDoc} */
  241.         @Override
  242.         public int available() {
  243.             return currentSequence == null ? 0 : currentSequence.length() - alreadyOutput;
  244.         }

  245.     }

  246.     /** Uncompressed bits sequence. */
  247.     private static class UncompressedSequence {

  248.         /** Prefix sequence (null if this is a start sequence). */
  249.         private final UncompressedSequence prefix;

  250.         /** Last byte in the sequence. */
  251.         private final byte last;

  252.         /** Index of the last byte in the sequence (i.e. length - 1). */
  253.         private final int index;

  254.         /** Simple constructor.
  255.          * @param prefix prefix of the sequence (null if this is a start sequence)
  256.          * @param last last byte of the sequence
  257.          */
  258.         UncompressedSequence(final UncompressedSequence prefix, final byte last) {
  259.             this.prefix = prefix;
  260.             this.last   = last;
  261.             this.index  = prefix == null ? 0 : prefix.index + 1;
  262.         }

  263.         /** Get the length of the sequence.
  264.          * @return length of the sequence
  265.          */
  266.         public int length() {
  267.             return index + 1;
  268.         }

  269.         /** Get a byte from the sequence.
  270.          * @param outputIndex index of the byte in the sequence, counting from 0
  271.          * @return byte at {@code outputIndex}
  272.          */
  273.         public byte getByte(final int outputIndex) {
  274.             return index == outputIndex ? last : prefix.getByte(outputIndex);
  275.         }

  276.     }

  277.     /** Buffer for reading input data. */
  278.     private static class Buffer {

  279.         /** Size of input/output buffers. */
  280.         private static final int BUFFER_SIZE = 4096;

  281.         /** Underlying compressed stream. */
  282.         private final InputStream input;

  283.         /** Buffer data. */
  284.         private final byte[] data;

  285.         /** Start of pending data. */
  286.         private int start;

  287.         /** End of pending data. */
  288.         private int end;

  289.         /** Simple constructor.
  290.          * @param input input stream
  291.          */
  292.         Buffer(final InputStream input) {
  293.             this.input = input;
  294.             this.data  = new byte[BUFFER_SIZE];
  295.             this.start = 0;
  296.             this.end   = start;
  297.         }

  298.         /** Get one input byte.
  299.          * @return input byte, or -1 if end of input has been reached
  300.          * @throws IOException if input data cannot be read
  301.          */
  302.         private int getByte() throws IOException {

  303.             if (start == end) {
  304.                 // the buffer is empty
  305.                 start = 0;
  306.                 end   = input.read(data);
  307.                 if (end == -1) {
  308.                     return -1;
  309.                 }
  310.             }

  311.             return data[start++] & 0xFF;

  312.         }

  313.     }
  314. }