1   /* Copyright 2002-2025 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.utils.units;
18  
19  import org.hipparchus.fraction.Fraction;
20  import org.orekit.errors.OrekitException;
21  import org.orekit.errors.OrekitMessages;
22  
23  /** Lexer for units.
24   * @author Luc Maisonobe
25   * @since 11.0
26   */
27  class Lexer {
28  
29      /** Unit specification to tokenize. */
30      private final CharSequence unitSpecification;
31  
32      /** End index. */
33      private final int end;
34  
35      /** Start index for next token. */
36      private int start;
37  
38      /** Next to last token emitted. */
39      private Token nextToLast;
40  
41      /** Last token emitted. */
42      private Token last;
43  
44      /** Upcoming token (which was pushed back). */
45      private Token upcoming;
46  
47      /** Build a lexer for a unit specification.
48       * @param unitSpecification unit specification to tokenize
49       */
50      Lexer(final CharSequence unitSpecification) {
51          this.unitSpecification = unitSpecification;
52          this.end               = unitSpecification.length();
53          this.start             = 0;
54          this.last              = null;
55      }
56  
57      /** Get the complete unit specification.
58       * @return complete unit specification
59       */
60      public String getUnitSpecification() {
61          return unitSpecification.toString();
62      }
63  
64      /** Push back last returned token.
65       * <p>
66       * This can be called only once
67       * </p>
68       */
69      public void pushBack() {
70          upcoming = last;
71          last     = nextToLast;
72      }
73  
74      /** Get next token.
75       * @return next token, or null if there are no more tokens
76       */
77      public Token next() {
78  
79          if (upcoming != null) {
80              nextToLast = last;
81              last       = upcoming;
82              upcoming   = null;
83              return last;
84          }
85  
86          // skip whitespace
87          while (start < end && Character.isWhitespace(unitSpecification.charAt(start))) {
88              ++start;
89          }
90  
91          if (start >= end) {
92              // no more characters to analyze
93              nextToLast = last;
94              last       = null;
95              return null;
96          }
97  
98          // look for prefixed units
99          int current = start;
100         while (current < end &&
101                (Character.isLowerCase(unitSpecification.charAt(current)) ||
102                 Character.isUpperCase(unitSpecification.charAt(current)) ||
103                 unitSpecification.charAt(current) == '°'  ||
104                 unitSpecification.charAt(current) == '◦'  ||
105                 unitSpecification.charAt(current) == '′'  ||
106                 unitSpecification.charAt(current) == '\'' ||
107                 unitSpecification.charAt(current) == '″'  ||
108                 unitSpecification.charAt(current) == '"'  ||
109                 unitSpecification.charAt(current) == '%'  ||
110                 unitSpecification.charAt(current) == '#')) {
111             ++current;
112         }
113         if (current > start) {
114             return emit(current, TokenType.IDENTIFIER, 0, 1);
115         }
116 
117         // look for power
118         if (start < end - 1 &&
119             unitSpecification.charAt(start)     == '*' &&
120             unitSpecification.charAt(start + 1) == '*') {
121             // power indicator as **
122             return emit(start + 2, TokenType.POWER, 0, 1);
123         } else if (unitSpecification.charAt(start) == '^') {
124             // power indicator as ^
125             return emit(start + 1, TokenType.POWER, 0, 1);
126         } else if (convertSuperscript(start) != ' ' &&
127                    last != null &&
128                    last.getType() != TokenType.POWER) {
129             // virtual power indicator as we switch to superscript characters
130             return emit(start, TokenType.POWER, 0, 1);
131         }
132 
133         // look for one character tokens
134         if (unitSpecification.charAt(start) == '*') {
135             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
136         } else if (unitSpecification.charAt(start) == '×') {
137             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
138         } else if (unitSpecification.charAt(start) == '.') {
139             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
140         } else if (unitSpecification.charAt(start) == '·') {
141             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
142         } else if (unitSpecification.charAt(start) == '/') {
143             return emit(start + 1, TokenType.DIVISION, 0, 1);
144         } else if (unitSpecification.charAt(start) == '⁄') {
145             return emit(start + 1, TokenType.DIVISION, 0, 1);
146         } else if (unitSpecification.charAt(start) == '(') {
147             return emit(start + 1, TokenType.OPEN, 0, 1);
148         } else if (unitSpecification.charAt(start) == ')') {
149             return emit(start + 1, TokenType.CLOSE, 0, 1);
150         } else if (unitSpecification.charAt(start) == '√') {
151             return emit(start + 1, TokenType.SQUARE_ROOT, 0, 1);
152         }
153 
154         // look for special case "0.5" (used by CCSDS for square roots)
155         if (start < end - 2 &&
156              unitSpecification.charAt(start)     == '0' &&
157              unitSpecification.charAt(start + 1) == '.' &&
158              unitSpecification.charAt(start + 2) == '5') {
159             // ½ written as decimal number
160             return emit(start + 3, TokenType.FRACTION, 1, 2);
161         }
162 
163         // look for special case "1.5" (used by CCSDS for power 3/2)
164         if (start < end - 2 &&
165              unitSpecification.charAt(start)     == '1' &&
166              unitSpecification.charAt(start + 1) == '.' &&
167              unitSpecification.charAt(start + 2) == '5') {
168             // 3/2 written as decimal number
169             return emit(start + 3, TokenType.FRACTION, 3, 2);
170         }
171 
172         // look for unicode fractions
173         if (unitSpecification.charAt(start) == '¼') {
174             return emit(start + 1, TokenType.FRACTION, 1, 4);
175         } else if (unitSpecification.charAt(start) == '½') {
176             return emit(start + 1, TokenType.FRACTION, 1, 2);
177         } else if (unitSpecification.charAt(start) == '¾') {
178             return emit(start + 1, TokenType.FRACTION, 3, 4);
179         } else if (unitSpecification.charAt(start) == '⅐') {
180             return emit(start + 1, TokenType.FRACTION, 1, 7);
181         } else if (unitSpecification.charAt(start) == '⅑') {
182             return emit(start + 1, TokenType.FRACTION, 1, 9);
183         } else if (unitSpecification.charAt(start) == '⅒') {
184             return emit(start + 1, TokenType.FRACTION, 1, 10);
185         } else if (unitSpecification.charAt(start) == '⅓') {
186             return emit(start + 1, TokenType.FRACTION, 1, 3);
187         } else if (unitSpecification.charAt(start) == '⅔') {
188             return emit(start + 1, TokenType.FRACTION, 2, 3);
189         } else if (unitSpecification.charAt(start) == '⅕') {
190             return emit(start + 1, TokenType.FRACTION, 1, 5);
191         } else if (unitSpecification.charAt(start) == '⅖') {
192             return emit(start + 1, TokenType.FRACTION, 2, 5);
193         } else if (unitSpecification.charAt(start) == '⅗') {
194             return emit(start + 1, TokenType.FRACTION, 3, 5);
195         } else if (unitSpecification.charAt(start) == '⅘') {
196             return emit(start + 1, TokenType.FRACTION, 4, 5);
197         } else if (unitSpecification.charAt(start) == '⅙') {
198             return emit(start + 1, TokenType.FRACTION, 1, 6);
199         } else if (unitSpecification.charAt(start) == '⅚') {
200             return emit(start + 1, TokenType.FRACTION, 5, 6);
201         } else if (unitSpecification.charAt(start) == '⅛') {
202             return emit(start + 1, TokenType.FRACTION, 1, 8);
203         } else if (unitSpecification.charAt(start) == '⅜') {
204             return emit(start + 1, TokenType.FRACTION, 3, 8);
205         } else if (unitSpecification.charAt(start) == '⅝') {
206             return emit(start + 1, TokenType.FRACTION, 5, 8);
207         } else if (unitSpecification.charAt(start) == '⅞') {
208             return emit(start + 1, TokenType.FRACTION, 7, 8);
209         }
210 
211         // it must be an integer, either as regular character or as superscript
212         final Converter converter = (convertSuperscript(start) == ' ') ?
213                                     this::noConvert :
214                                     this::convertSuperscript;
215 
216         // manage sign, taking care of counting characters properly
217         final int sign;
218         final int numberStart;
219         if (converter.convert(start) == '+') {
220             sign        = +1;
221             numberStart = start + 1;
222         } else if (converter.convert(start) == '-') {
223             sign        = -1;
224             numberStart = start + 1;
225         } else {
226             sign        = 1;
227             numberStart = start;
228         }
229         current = numberStart;
230 
231         int value = 0;
232         while (current < end) {
233             final int c = converter.convert(current);
234             if (c >= '0' && c <= '9') {
235                 value = value * 10 + (c - '0');
236                 ++current;
237             } else {
238                 break;
239             }
240         }
241         if (current > numberStart) {
242             // there were some digits
243             return emit(current, TokenType.INTEGER, sign * value, 1);
244         }
245 
246         throw generateException();
247 
248     }
249 
250     /** Generate an exception.
251      * @return generated exception
252      */
253     public OrekitException generateException() {
254         return new OrekitException(OrekitMessages.UNKNOWN_UNIT, unitSpecification);
255     }
256 
257     /** Emit one token.
258      * @param after index after token
259      * @param type token type
260      * @param numerator value of the token numerator
261      * @param denominator value of the token denominator
262      * @return new token
263      */
264     private Token emit(final int after, final TokenType type, final int numerator, final int denominator) {
265         final CharSequence subString = unitSpecification.subSequence(start, after);
266         start      = after;
267         nextToLast = last;
268         last       = new Token(subString, type, numerator,
269                                denominator == 1 ? null : new Fraction(numerator, denominator));
270         return last;
271     }
272 
273     /** Convert a superscript character to regular digit or sign character.
274      * @param index character index
275      * @return regular digit or sign character, or ' ' if character is not a superscript
276      */
277     private char convertSuperscript(final int index) {
278         // we can't do fancy stuff with code points
279         // superscripts for 1, 2 and 3 are not in the same range as others
280         switch (unitSpecification.charAt(index)) {
281             case '⁰' :
282                 return '0';
283             case '¹' :
284                 return '1';
285             case '²' :
286                 return '2';
287             case '³' :
288                 return '3';
289             case '⁴' :
290                 return '4';
291             case '⁵' :
292                 return '5';
293             case '⁶' :
294                 return '6';
295             case '⁷' :
296                 return '7';
297             case '⁸' :
298                 return '8';
299             case '⁹' :
300                 return '9';
301             case '⁺' :
302                 return '+';
303             case '⁻' :
304                 return '-';
305             default :
306                 return ' ';
307         }
308 
309     }
310 
311     /** No-op converter.
312      * @param index character index
313      * @return character at index
314      */
315     private char noConvert(final int index) {
316         return unitSpecification.charAt(index);
317     }
318 
319     /** Character converter. */
320     private interface Converter {
321         /** Convert a character.
322          * @param index character index
323          * @return converted character
324          */
325         char convert(int index);
326     }
327 
328 }