1   /* Copyright 2002-2022 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.utils.units;
18  
19  import org.hipparchus.fraction.Fraction;
20  import org.orekit.errors.OrekitException;
21  import org.orekit.errors.OrekitMessages;
22  
23  /** Lexer for units.
24   * @author Luc Maisonobe
25   * @since 11.0
26   */
27  class Lexer {
28  
29      /** Unit specification to tokenize. */
30      private final CharSequence unitSpecification;
31  
32      /** End index. */
33      private final int end;
34  
35      /** Start index for next token. */
36      private int start;
37  
38      /** Next to last token emitted. */
39      private Token nextToLast;
40  
41      /** Last token emitted. */
42      private Token last;
43  
44      /** Upcoming token (which was pushed back). */
45      private Token upcoming;
46  
47      /** Build a lexer for a unit specification.
48       * @param unitSpecification unit specification to tokenize
49       */
50      Lexer(final CharSequence unitSpecification) {
51          this.unitSpecification = unitSpecification;
52          this.end               = unitSpecification.length();
53          this.start             = 0;
54          this.last              = null;
55      }
56  
57      /** Get the complete unit specification.
58       * @return complete unit specification
59       */
60      public String getUnitSpecification() {
61          return unitSpecification.toString();
62      }
63  
64      /** Push back last returned token.
65       * <p>
66       * This can be called only once
67       * </p>
68       */
69      public void pushBack() {
70          upcoming = last;
71          last     = nextToLast;
72      }
73  
74      /** Get next token.
75       * @return next token, or null if there are no more tokens
76       */
77      public Token next() {
78  
79          if (upcoming != null) {
80              nextToLast = last;
81              last       = upcoming;
82              upcoming   = null;
83              return last;
84          }
85  
86          // skip whitespace
87          while (start < end && Character.isWhitespace(unitSpecification.charAt(start))) {
88              ++start;
89          }
90  
91          if (start >= end) {
92              // no more characters to analyze
93              nextToLast = last;
94              last       = null;
95              return null;
96          }
97  
98          // look for prefixed units
99          int current = start;
100         while (current < end &&
101                (Character.isLowerCase(unitSpecification.charAt(current)) ||
102                 Character.isUpperCase(unitSpecification.charAt(current)) ||
103                 unitSpecification.charAt(current) == '°'  ||
104                 unitSpecification.charAt(current) == '◦'  ||
105                 unitSpecification.charAt(current) == '′'  ||
106                 unitSpecification.charAt(current) == '\'' ||
107                 unitSpecification.charAt(current) == '″'  ||
108                 unitSpecification.charAt(current) == '"'  ||
109                 unitSpecification.charAt(current) == '%'  ||
110                 unitSpecification.charAt(current) == '#')) {
111             ++current;
112         }
113         if (current > start) {
114             return emit(current, TokenType.IDENTIFIER, 0, 1);
115         }
116 
117         // look for power
118         if (start < end - 1 &&
119             unitSpecification.charAt(start)     == '*' &&
120             unitSpecification.charAt(start + 1) == '*') {
121             // power indicator as **
122             return emit(start + 2, TokenType.POWER, 0, 1);
123         } else if (unitSpecification.charAt(start) == '^') {
124             // power indicator as ^
125             return emit(start + 1, TokenType.POWER, 0, 1);
126         } else if (convertSuperscript(start) != ' ' &&
127                    last != null &&
128                    last.getType() != TokenType.POWER) {
129             // virtual power indicator as we switch to superscript characters
130             return emit(start, TokenType.POWER, 0, 1);
131         }
132 
133         // look for one character tokens
134         if (unitSpecification.charAt(start) == '*') {
135             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
136         } else if (unitSpecification.charAt(start) == '×') {
137             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
138         } else if (unitSpecification.charAt(start) == '.') {
139             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
140         } else if (unitSpecification.charAt(start) == '·') {
141             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
142         } else if (unitSpecification.charAt(start) == '/') {
143             return emit(start + 1, TokenType.DIVISION, 0, 1);
144         } else if (unitSpecification.charAt(start) == '⁄') {
145             return emit(start + 1, TokenType.DIVISION, 0, 1);
146         } else if (unitSpecification.charAt(start) == '(') {
147             return emit(start + 1, TokenType.OPEN, 0, 1);
148         } else if (unitSpecification.charAt(start) == ')') {
149             return emit(start + 1, TokenType.CLOSE, 0, 1);
150         } else if (unitSpecification.charAt(start) == '√') {
151             return emit(start + 1, TokenType.SQUARE_ROOT, 0, 1);
152         }
153 
154         // look for special case "0.5" (used by CCSDS for square roots)
155         if (start < end - 2 &&
156              unitSpecification.charAt(start)     == '0' &&
157              unitSpecification.charAt(start + 1) == '.' &&
158              unitSpecification.charAt(start + 2) == '5') {
159             // ½ written as decimal number
160             return emit(start + 3, TokenType.FRACTION, 1, 2);
161         }
162 
163         // look for unicode fractions
164         if (unitSpecification.charAt(start) == '¼') {
165             return emit(start + 1, TokenType.FRACTION, 1, 4);
166         } else if (unitSpecification.charAt(start) == '½') {
167             return emit(start + 1, TokenType.FRACTION, 1, 2);
168         } else if (unitSpecification.charAt(start) == '¾') {
169             return emit(start + 1, TokenType.FRACTION, 3, 4);
170         } else if (unitSpecification.charAt(start) == '⅐') {
171             return emit(start + 1, TokenType.FRACTION, 1, 7);
172         } else if (unitSpecification.charAt(start) == '⅑') {
173             return emit(start + 1, TokenType.FRACTION, 1, 9);
174         } else if (unitSpecification.charAt(start) == '⅒') {
175             return emit(start + 1, TokenType.FRACTION, 1, 10);
176         } else if (unitSpecification.charAt(start) == '⅓') {
177             return emit(start + 1, TokenType.FRACTION, 1, 3);
178         } else if (unitSpecification.charAt(start) == '⅔') {
179             return emit(start + 1, TokenType.FRACTION, 2, 3);
180         } else if (unitSpecification.charAt(start) == '⅕') {
181             return emit(start + 1, TokenType.FRACTION, 1, 5);
182         } else if (unitSpecification.charAt(start) == '⅖') {
183             return emit(start + 1, TokenType.FRACTION, 2, 5);
184         } else if (unitSpecification.charAt(start) == '⅗') {
185             return emit(start + 1, TokenType.FRACTION, 3, 5);
186         } else if (unitSpecification.charAt(start) == '⅘') {
187             return emit(start + 1, TokenType.FRACTION, 4, 5);
188         } else if (unitSpecification.charAt(start) == '⅙') {
189             return emit(start + 1, TokenType.FRACTION, 1, 6);
190         } else if (unitSpecification.charAt(start) == '⅚') {
191             return emit(start + 1, TokenType.FRACTION, 5, 6);
192         } else if (unitSpecification.charAt(start) == '⅛') {
193             return emit(start + 1, TokenType.FRACTION, 1, 8);
194         } else if (unitSpecification.charAt(start) == '⅜') {
195             return emit(start + 1, TokenType.FRACTION, 3, 8);
196         } else if (unitSpecification.charAt(start) == '⅝') {
197             return emit(start + 1, TokenType.FRACTION, 5, 8);
198         } else if (unitSpecification.charAt(start) == '⅞') {
199             return emit(start + 1, TokenType.FRACTION, 7, 8);
200         }
201 
202         // it must be an integer, either as regular character or as superscript
203         final Converter converter = (convertSuperscript(start) == ' ') ?
204                                     this::noConvert :
205                                     this::convertSuperscript;
206 
207         // manage sign, taking care of counting characters properly
208         final int sign;
209         final int numberStart;
210         if (converter.convert(start) == '+') {
211             sign        = +1;
212             numberStart = start + 1;
213         } else if (converter.convert(start) == '-') {
214             sign        = -1;
215             numberStart = start + 1;
216         } else {
217             sign        = 1;
218             numberStart = start;
219         }
220         current = numberStart;
221 
222         int value = 0;
223         while (current < end) {
224             final int c = converter.convert(current);
225             if (c >= '0' && c <= '9') {
226                 value = value * 10 + (c - '0');
227                 ++current;
228             } else {
229                 break;
230             }
231         }
232         if (current > numberStart) {
233             // there were some digits
234             return emit(current, TokenType.INTEGER, sign * value, 1);
235         }
236 
237         throw generateException();
238 
239     }
240 
241     /** Generate an exception.
242      * @return generated exception
243      */
244     public OrekitException generateException() {
245         return new OrekitException(OrekitMessages.UNKNOWN_UNIT, unitSpecification);
246     }
247 
248     /** Emit one token.
249      * @param after index after token
250      * @param type token type
251      * @param numerator value of the token numerator
252      * @param denominator value of the token denominator
253      * @return new token
254      */
255     private Token emit(final int after, final TokenType type, final int numerator, final int denominator) {
256         final CharSequence subString = unitSpecification.subSequence(start, after);
257         start      = after;
258         nextToLast = last;
259         last       = new Token(subString, type, numerator,
260                                denominator == 1 ? null : new Fraction(numerator, denominator));
261         return last;
262     }
263 
264     /** Convert a superscript character to regular digit or sign character.
265      * @param index character index
266      * @return regular digit or sign character, or ' ' if character is not a superscript
267      */
268     private char convertSuperscript(final int index) {
269         // we can't do fancy stuff with code points
270         // superscripts for 1, 2 and 3 are not in the same range as others
271         switch (unitSpecification.charAt(index)) {
272             case '⁰' :
273                 return '0';
274             case '¹' :
275                 return '1';
276             case '²' :
277                 return '2';
278             case '³' :
279                 return '3';
280             case '⁴' :
281                 return '4';
282             case '⁵' :
283                 return '5';
284             case '⁶' :
285                 return '6';
286             case '⁷' :
287                 return '7';
288             case '⁸' :
289                 return '8';
290             case '⁹' :
291                 return '9';
292             case '⁺' :
293                 return '+';
294             case '⁻' :
295                 return '-';
296             default :
297                 return ' ';
298         }
299 
300     }
301 
302     /** No-op converter.
303      * @param index character index
304      * @return character at index
305      */
306     private char noConvert(final int index) {
307         return unitSpecification.charAt(index);
308     }
309 
310     /** Character converter. */
311     private interface Converter {
312         /** Convert a character.
313          * @param index character index
314          * @return converted character
315          */
316         char convert(int index);
317     }
318 
319 }