UnixCompressFilter.java

  1. /* Copyright 2002-2019 CS Systèmes d'Information
  2.  * Licensed to CS Systèmes d'Information (CS) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * CS licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *   http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.orekit.data;

  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.util.Arrays;

  21. import org.hipparchus.util.FastMath;
  22. import org.orekit.errors.OrekitException;
  23. import org.orekit.errors.OrekitIOException;
  24. import org.orekit.errors.OrekitMessages;

  25. /** Filter for Unix compressed data.
  26.  * @author Luc Maisonobe
  27.  * @since 9.2
  28.  */
  29. public class UnixCompressFilter implements DataFilter {

  30.     /** Suffix for Unix compressed files. */
  31.     private static final String SUFFIX = ".Z";

  32.     /** {@inheritDoc} */
  33.     @Override
  34.     public NamedData filter(final NamedData original) {
  35.         final String                 oName   = original.getName();
  36.         final NamedData.StreamOpener oOpener = original.getStreamOpener();
  37.         if (oName.endsWith(SUFFIX)) {
  38.             final String                 fName   = oName.substring(0, oName.length() - SUFFIX.length());
  39.             final NamedData.StreamOpener fOpener = () -> new ZInputStream(oName, oOpener.openStream());
  40.             return new NamedData(fName, fOpener);
  41.         } else {
  42.             return original;
  43.         }
  44.     }

  45.     /** Filtering of Unix compressed stream. */
  46.     private static class ZInputStream extends InputStream {

  47.         /** First magic header byte. */
  48.         private static final int MAGIC_HEADER_1 = 0x1f;

  49.         /** Second magic header byte. */
  50.         private static final int MAGIC_HEADER_2 = 0x9d;

  51.         /** Byte bits width. */
  52.         private static final int BYTE_WIDTH = 8;

  53.         /** Initial bits width. */
  54.         private static final int INIT_WIDTH = 9;

  55.         /** Reset table code. */
  56.         private static final int RESET_TABLE = 256;

  57.         /** First non-predefined entry. */
  58.         private static final int FIRST = 257;

  59.         /** File name. */
  60.         private final String name;

  61.         /** Underlying compressed stream. */
  62.         private final InputStream input;

  63.         /** Common sequences table. */
  64.         private final UncompressedSequence[] table;

  65.         /** Next available entry in the table. */
  66.         private int available;

  67.         /** Flag for block mode when table is full. */
  68.         private final boolean blockMode;

  69.         /** Maximum width allowed. */
  70.         private final int maxWidth;

  71.         /** Current input width in bits. */
  72.         private int currentWidth;

  73.         /** Maximum key that can be encoded with current width. */
  74.         private int currentMaxKey;

  75.         /** Number of bits read since last reset. */
  76.         private int bitsRead;

  77.         /** Lookahead byte, already read but not yet used. */
  78.         private int lookAhead;

  79.         /** Number of bits in the lookahead byte. */
  80.         private int lookAheadWidth;

  81.         /** Previous uncompressed sequence output. */
  82.         private UncompressedSequence previousSequence;

  83.         /** Uncompressed sequence being output. */
  84.         private UncompressedSequence currentSequence;

  85.         /** Number of bytes of the current sequence already output. */
  86.         private int alreadyOutput;

  87.         /** Simple constructor.
  88.          * @param name file name
  89.          * @param input underlying compressed stream
  90.          * @exception IOException if first bytes cannot be read
  91.          */
  92.         ZInputStream(final String name, final InputStream input)
  93.             throws IOException {

  94.             this.name  = name;
  95.             this.input = input;


  96.             // check header
  97.             if (input.read() != MAGIC_HEADER_1 || input.read() != MAGIC_HEADER_2) {
  98.                 throw new OrekitException(OrekitMessages.NOT_A_SUPPORTED_UNIX_COMPRESSED_FILE, name);
  99.             }

  100.             final int header3 = input.read();
  101.             this.blockMode = (header3 & 0x80) != 0;
  102.             this.maxWidth  = header3 & 0x1f;

  103.             // set up table, with at least all entries for one byte
  104.             this.table = new UncompressedSequence[1 << FastMath.max(INIT_WIDTH, maxWidth)];
  105.             for (int i = 0; i < FIRST; ++i) {
  106.                 table[i] = new UncompressedSequence(null, (byte) i);
  107.             }

  108.             // initialize decompression state
  109.             initialize();

  110.         }

  111.         /** Initialize compression state.
  112.          */
  113.         private void initialize() {
  114.             this.available        = FIRST;
  115.             this.bitsRead         = 0;
  116.             this.lookAhead        = 0;
  117.             this.lookAheadWidth   = 0;
  118.             this.currentWidth     = INIT_WIDTH;
  119.             this.currentMaxKey    = (1 << currentWidth) - 1;
  120.             this.previousSequence = null;
  121.             this.currentSequence  = null;
  122.             this.alreadyOutput    = 0;
  123.         }

  124.         /** Read next input key.
  125.          * @return next input key or -1 if end of stream is reached
  126.          * @exception IOException if a read error occurs
  127.          */
  128.         private int nextKey() throws IOException {

  129.             int keyMask = (1 << currentWidth) - 1;

  130.             while (true) {
  131.                 // initialize key with the last bits remaining from previous read
  132.                 int key = lookAhead & keyMask;

  133.                 // read more bits until key is complete
  134.                 for (int remaining = currentWidth - lookAheadWidth; remaining > 0; remaining -= BYTE_WIDTH) {
  135.                     lookAhead       = input.read();
  136.                     lookAheadWidth += BYTE_WIDTH;
  137.                     if (lookAhead < 0) {
  138.                         if (key == 0 || key == keyMask) {
  139.                             // the key is either a set of padding 0 bits
  140.                             // or a full key containing -1 if read() is called several times after EOF
  141.                             return -1;
  142.                         } else {
  143.                             // end of stream encountered in the middle of a read
  144.                             throw new OrekitIOException(OrekitMessages.UNEXPECTED_END_OF_FILE, name);
  145.                         }
  146.                     }
  147.                     key = (key | lookAhead << (currentWidth - remaining)) & keyMask;
  148.                 }

  149.                 // store the extra bits already read in the lookahead byte for next call
  150.                 lookAheadWidth -= currentWidth;
  151.                 lookAhead       = lookAhead >>> (BYTE_WIDTH - lookAheadWidth);

  152.                 bitsRead += currentWidth;

  153.                 if (blockMode && key == RESET_TABLE) {

  154.                     // skip the padding bits inserted when compressor flushed its buffer
  155.                     final int superSize = currentWidth * 8;
  156.                     int padding = (superSize - 1 - (bitsRead + superSize - 1) % superSize) / 8;
  157.                     while (padding-- > 0) {
  158.                         input.read();
  159.                     }

  160.                     // reset the table to handle a new block and read again next key
  161.                     Arrays.fill(table, FIRST, table.length, null);
  162.                     initialize();

  163.                     // reset the lookahead mask as the current width has changed
  164.                     keyMask = (1 << currentWidth) - 1;

  165.                 } else {
  166.                     // return key at current width
  167.                     return key;
  168.                 }

  169.             }

  170.         }

  171.         /** Select next uncompressed sequence to output.
  172.          * @return true if there is a next sequence
  173.          * @exception IOException if a read error occurs
  174.          */
  175.         private boolean selectNext() throws IOException {

  176.             // read next input key
  177.             final int key = nextKey();
  178.             if (key < 0) {
  179.                 // end of stream reached
  180.                 return false;
  181.             }

  182.             if (previousSequence != null && available < table.length) {
  183.                 // update the table with the next uncompressed byte appended to previous sequence
  184.                 final byte nextByte = (key == available) ? previousSequence.getByte(0) : table[key].getByte(0);
  185.                 table[available++] = new UncompressedSequence(previousSequence, nextByte);
  186.                 if (available > currentMaxKey && currentWidth < maxWidth) {
  187.                     // we need to increase the key size
  188.                     currentMaxKey = (1 << ++currentWidth) - 1;
  189.                 }
  190.             }

  191.             currentSequence = table[key];
  192.             if (currentSequence == null) {
  193.                 // the compressed file references a non-existent table entry
  194.                 // (this is not the well-known case of entry being used just before
  195.                 //  being defined, which is already handled above), the file is corrupted
  196.                 throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
  197.             }
  198.             alreadyOutput   = 0;

  199.             return true;

  200.         }

  201.         /** {@inheritDoc} */
  202.         @Override
  203.         public int read() throws IOException {

  204.             if (currentSequence == null) {
  205.                 if (!selectNext()) {
  206.                     // we have reached end of data
  207.                     return -1;
  208.                 }
  209.             }

  210.             final int value = currentSequence.getByte(alreadyOutput++);
  211.             if (alreadyOutput >= currentSequence.length()) {
  212.                 // we have just exhausted the current sequence
  213.                 previousSequence = currentSequence;
  214.                 currentSequence  = null;
  215.                 alreadyOutput    = 0;
  216.             }

  217.             return value;

  218.         }

  219.     }

  220.     /** Uncompressed bits sequence. */
  221.     private static class UncompressedSequence {

  222.         /** Prefix sequence (null if this is a start sequence). */
  223.         private final UncompressedSequence prefix;

  224.         /** Last byte in the sequence. */
  225.         private final byte last;

  226.         /** Index of the last byte in the sequence (i.e. length - 1). */
  227.         private final int index;

  228.         /** Simple constructor.
  229.          * @param prefix prefix of the sequence (null if this is a start sequence)
  230.          * @param last last byte of the sequence
  231.          */
  232.         UncompressedSequence(final UncompressedSequence prefix, final byte last) {
  233.             this.prefix = prefix;
  234.             this.last   = last;
  235.             this.index  = prefix == null ? 0 : prefix.index + 1;
  236.         }

  237.         /** Get the length of the sequence.
  238.          * @return length of the sequence
  239.          */
  240.         public int length() {
  241.             return index + 1;
  242.         }

  243.         /** Get a byte from the sequence.
  244.          * @param outputIndex index of the byte in the sequence, counting from 0
  245.          * @return byte at {@code outputIndex}
  246.          */
  247.         public byte getByte(final int outputIndex) {
  248.             return index == outputIndex ? last : prefix.getByte(outputIndex);
  249.         }

  250.     }

  251. }