1   /* Copyright 2002-2026 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.utils.units;
18  
19  import org.hipparchus.fraction.Fraction;
20  import org.orekit.errors.OrekitException;
21  import org.orekit.errors.OrekitMessages;
22  
23  /** Lexer for units.
24   * @author Luc Maisonobe
25   * @since 11.0
26   */
27  class Lexer {
28  
29      /** Unit specification to tokenize. */
30      private final CharSequence unitSpecification;
31  
32      /** End index. */
33      private final int end;
34  
35      /** Start index for next token. */
36      private int start;
37  
38      /** Next to last token emitted. */
39      private Token nextToLast;
40  
41      /** Last token emitted. */
42      private Token last;
43  
44      /** Upcoming token (which was pushed back). */
45      private Token upcoming;
46  
47      /** Build a lexer for a unit specification.
48       * @param unitSpecification unit specification to tokenize
49       */
50      Lexer(final CharSequence unitSpecification) {
51          this.unitSpecification = unitSpecification;
52          this.end               = unitSpecification.length();
53          this.start             = 0;
54          this.last              = null;
55      }
56  
57      /** Get the complete unit specification.
58       * @return complete unit specification
59       */
60      public String getUnitSpecification() {
61          return unitSpecification.toString();
62      }
63  
64      /** Push back last returned token.
65       * <p>
66       * This can be called only once
67       * </p>
68       */
69      public void pushBack() {
70          upcoming = last;
71          last     = nextToLast;
72      }
73  
74      /** Get next token.
75       * @return next token, or null if there are no more tokens
76       */
77      public Token next() {
78  
79          if (upcoming != null) {
80              nextToLast = last;
81              last       = upcoming;
82              upcoming   = null;
83              return last;
84          }
85  
86          // skip whitespace
87          while (start < end && Character.isWhitespace(unitSpecification.charAt(start))) {
88              ++start;
89          }
90  
91          if (start >= end) {
92              // no more characters to analyze
93              nextToLast = last;
94              last       = null;
95              return null;
96          }
97  
98          // look for prefixed units
99          int current = start;
100         while (current < end &&
101                (Character.isLowerCase(unitSpecification.charAt(current)) ||
102                 Character.isUpperCase(unitSpecification.charAt(current)) ||
103                 unitSpecification.charAt(current) == '°'  ||
104                 unitSpecification.charAt(current) == '◦'  ||
105                 unitSpecification.charAt(current) == '′'  ||
106                 unitSpecification.charAt(current) == '\'' ||
107                 unitSpecification.charAt(current) == '″'  ||
108                 unitSpecification.charAt(current) == '"'  ||
109                 unitSpecification.charAt(current) == '%'  ||
110                 unitSpecification.charAt(current) == '#')) {
111             ++current;
112         }
113         if (current > start) {
114             return emit(current, TokenType.IDENTIFIER, 0, 1);
115         }
116 
117         // look for power
118         if (start < end - 1 &&
119             unitSpecification.charAt(start)     == '*' &&
120             unitSpecification.charAt(start + 1) == '*') {
121             // power indicator as **
122             return emit(start + 2, TokenType.POWER, 0, 1);
123         } else if (unitSpecification.charAt(start) == '^') {
124             // power indicator as ^
125             return emit(start + 1, TokenType.POWER, 0, 1);
126         } else if (convertSuperscript(start) != ' ' &&
127                    last != null &&
128                    last.getType() != TokenType.POWER) {
129             // virtual power indicator as we switch to superscript characters
130             return emit(start, TokenType.POWER, 0, 1);
131         }
132 
133         // look for one character tokens
134         if (unitSpecification.charAt(start) == '*') {
135             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
136         } else if (unitSpecification.charAt(start) == '×') {
137             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
138         } else if (unitSpecification.charAt(start) == '.') {
139             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
140         } else if (unitSpecification.charAt(start) == '·') {
141             return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
142         } else if (unitSpecification.charAt(start) == '/') {
143             return emit(start + 1, TokenType.DIVISION, 0, 1);
144         } else if (unitSpecification.charAt(start) == '⁄') {
145             return emit(start + 1, TokenType.DIVISION, 0, 1);
146         } else if (unitSpecification.charAt(start) == '(') {
147             return emit(start + 1, TokenType.OPEN, 0, 1);
148         } else if (unitSpecification.charAt(start) == ')') {
149             return emit(start + 1, TokenType.CLOSE, 0, 1);
150         } else if (unitSpecification.charAt(start) == '√') {
151             return emit(start + 1, TokenType.SQUARE_ROOT, 0, 1);
152         }
153 
154         // look for special case "0.5" (used by CCSDS for square roots)
155         if (start < end - 2 &&
156              unitSpecification.charAt(start)     == '0' &&
157              unitSpecification.charAt(start + 1) == '.' &&
158              unitSpecification.charAt(start + 2) == '5') {
159             // ½ written as decimal number
160             return emit(start + 3, TokenType.FRACTION, 1, 2);
161         }
162 
163         // look for special case "1.5" (used by CCSDS for power 3/2)
164         if (start < end - 2 &&
165              unitSpecification.charAt(start)     == '1' &&
166              unitSpecification.charAt(start + 1) == '.' &&
167              unitSpecification.charAt(start + 2) == '5') {
168             // 3/2 written as decimal number
169             return emit(start + 3, TokenType.FRACTION, 3, 2);
170         }
171 
172         // look for unicode fractions
173         if (unitSpecification.charAt(start) == '¼') {
174             return emit(start + 1, TokenType.FRACTION, 1, 4);
175         } else if (unitSpecification.charAt(start) == '½') {
176             return emit(start + 1, TokenType.FRACTION, 1, 2);
177         } else if (unitSpecification.charAt(start) == '¾') {
178             return emit(start + 1, TokenType.FRACTION, 3, 4);
179         } else if (unitSpecification.charAt(start) == '⅐') {
180             return emit(start + 1, TokenType.FRACTION, 1, 7);
181         } else if (unitSpecification.charAt(start) == '⅑') {
182             return emit(start + 1, TokenType.FRACTION, 1, 9);
183         } else if (unitSpecification.charAt(start) == '⅒') {
184             return emit(start + 1, TokenType.FRACTION, 1, 10);
185         } else if (unitSpecification.charAt(start) == '⅓') {
186             return emit(start + 1, TokenType.FRACTION, 1, 3);
187         } else if (unitSpecification.charAt(start) == '⅔') {
188             return emit(start + 1, TokenType.FRACTION, 2, 3);
189         } else if (unitSpecification.charAt(start) == '⅕') {
190             return emit(start + 1, TokenType.FRACTION, 1, 5);
191         } else if (unitSpecification.charAt(start) == '⅖') {
192             return emit(start + 1, TokenType.FRACTION, 2, 5);
193         } else if (unitSpecification.charAt(start) == '⅗') {
194             return emit(start + 1, TokenType.FRACTION, 3, 5);
195         } else if (unitSpecification.charAt(start) == '⅘') {
196             return emit(start + 1, TokenType.FRACTION, 4, 5);
197         } else if (unitSpecification.charAt(start) == '⅙') {
198             return emit(start + 1, TokenType.FRACTION, 1, 6);
199         } else if (unitSpecification.charAt(start) == '⅚') {
200             return emit(start + 1, TokenType.FRACTION, 5, 6);
201         } else if (unitSpecification.charAt(start) == '⅛') {
202             return emit(start + 1, TokenType.FRACTION, 1, 8);
203         } else if (unitSpecification.charAt(start) == '⅜') {
204             return emit(start + 1, TokenType.FRACTION, 3, 8);
205         } else if (unitSpecification.charAt(start) == '⅝') {
206             return emit(start + 1, TokenType.FRACTION, 5, 8);
207         } else if (unitSpecification.charAt(start) == '⅞') {
208             return emit(start + 1, TokenType.FRACTION, 7, 8);
209         }
210 
211         // it must be an integer, either as regular character or as superscript
212         final Converter converter = (convertSuperscript(start) == ' ') ?
213                                     this::noConvert :
214                                     this::convertSuperscript;
215 
216         // manage sign, taking care of counting characters properly
217         final int sign;
218         final int numberStart;
219         if (converter.convert(start) == '+') {
220             sign        = +1;
221             numberStart = start + 1;
222         } else if (converter.convert(start) == '-') {
223             sign        = -1;
224             numberStart = start + 1;
225         } else {
226             sign        = 1;
227             numberStart = start;
228         }
229         current = numberStart;
230 
231         int value = 0;
232         while (current < end) {
233             final int c = converter.convert(current);
234             if (c >= '0' && c <= '9') {
235                 value = value * 10 + (c - '0');
236                 ++current;
237             } else {
238                 break;
239             }
240         }
241         if (current > numberStart) {
242             // there were some digits
243             return emit(current, TokenType.INTEGER, sign * value, 1);
244         }
245 
246         throw generateException();
247 
248     }
249 
250     /** Generate an exception.
251      * @return generated exception
252      */
253     public OrekitException generateException() {
254         return new OrekitException(OrekitMessages.UNKNOWN_UNIT, unitSpecification);
255     }
256 
257     /** Emit one token.
258      * @param after index after token
259      * @param type token type
260      * @param numerator value of the token numerator
261      * @param denominator value of the token denominator
262      * @return new token
263      */
264     private Token emit(final int after, final TokenType type, final int numerator, final int denominator) {
265         final CharSequence subString = unitSpecification.subSequence(start, after);
266         start      = after;
267         nextToLast = last;
268         last       = new Token(subString, type, numerator,
269                                denominator == 1 ? null : new Fraction(numerator, denominator));
270         return last;
271     }
272 
273     /** Convert a superscript character to regular digit or sign character.
274      * @param index character index
275      * @return regular digit or sign character, or ' ' if character is not a superscript
276      */
277     private char convertSuperscript(final int index) {
278         // we can't do fancy stuff with code points
279         // superscripts for 1, 2 and 3 are not in the same range as others
280         return switch (unitSpecification.charAt(index)) {
281             case '⁰'  -> '0';
282             case '¹'  -> '1';
283             case '²'  -> '2';
284             case '³'  -> '3';
285             case '⁴'  -> '4';
286             case '⁵'  -> '5';
287             case '⁶'  -> '6';
288             case '⁷'  -> '7';
289             case '⁸'  -> '8';
290             case '⁹'  -> '9';
291             case '⁺'  -> '+';
292             case '⁻'  -> '-';
293             default  -> ' ';
294         };
295 
296     }
297 
298     /** No-op converter.
299      * @param index character index
300      * @return character at index
301      */
302     private char noConvert(final int index) {
303         return unitSpecification.charAt(index);
304     }
305 
306     /** Character converter. */
307     private interface Converter {
308         /** Convert a character.
309          * @param index character index
310          * @return converted character
311          */
312         char convert(int index);
313     }
314 
315 }