1   /* Copyright 2002-2025 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.files.ccsds.utils.lexical;
18  
19  import java.io.BufferedReader;
20  import java.io.IOException;
21  import java.io.Reader;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.hipparchus.exception.DummyLocalizable;
26  import org.orekit.data.DataSource;
27  import org.orekit.errors.OrekitException;
28  import org.orekit.errors.OrekitMessages;
29  import org.orekit.files.ccsds.utils.FileFormat;
30  import org.orekit.utils.units.Unit;
31  import org.orekit.utils.units.UnitsCache;
32  
33  /** Lexical analyzer for Key-Value Notation CCSDS messages.
34   * @author Luc Maisonobe
35   * @since 11.0
36   */
37  public class KvnLexicalAnalyzer implements LexicalAnalyzer {
38  
39      /** Regular expression matching blanks at start of line. */
40      private static final String LINE_START         = "^\\p{Blank}*";
41  
42      /** Regular expression matching the special COMMENT key that must be stored in the matcher. */
43      private static final String COMMENT_KEY        = "(COMMENT)\\p{Blank}*";
44  
45      /** Regular expression matching a non-comment key that must be stored in the matcher. */
46      private static final String NON_COMMENT_KEY    = "([A-Z][A-Z_0-9]*)\\p{Blank}*=\\p{Blank}*";
47  
48      /** Regular expression matching a no-value key starting a block that must be stored in the matcher. */
49      private static final String START_KEY          = "([A-Z][A-Z_0-9]*)_START";
50  
51      /** Regular expression matching a no-value key ending a block that must be stored in the matcher. */
52      private static final String STOP_KEY           = "([A-Z][A-Z_0-9]*)_STOP";
53  
54      /** Regular expression matching a value that must be stored in the matcher. */
55      private static final String OPTIONAL_VALUE     = "((?:(?:\\p{Graph}.*?)?))";
56  
57      /** Operators allowed in units specifications. */
58      private static final String UNITS_OPERATORS    = "-+*×.·/⁄^√⁺⁻";
59  
60      /** Letters allowed in units specifications. */
61      private static final String UNITS_LETTERS      = "A-Za-zµμ"; // beware µ (U+00B5) and μ (U+03BC) look similar but are different
62  
63      /** Digits allowed in units specifications. */
64      private static final String UNITS_DIGITS       = "0-9⁰¹²³⁴⁵⁶⁷⁸⁹";
65  
66      /** Fractions allowed in units specifications. */
67      private static final String UNITS_FRACTIONS    = "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞";
68  
69      /** Symbols allowed in units specifications. */
70      private static final String UNITS_SYMBOLS      = "%°◦′'″\\\"#";
71  
72      /** Parentheses allowed in units specifications. */
73      private static final String UNITS_PARENTHESES  = "()";
74  
75      /** Regular expression matching units that must be stored in the matcher. */
76      private static final String UNITS              = "(?:\\p{Blank}+\\[([" +
77                                                       UNITS_OPERATORS + UNITS_LETTERS + UNITS_DIGITS +
78                                                       UNITS_FRACTIONS + UNITS_SYMBOLS + UNITS_PARENTHESES +
79                                                      "]*)\\])?";
80  
81      /** Regular expression matching blanks at end of line. */
82      private static final String LINE_END           = "\\p{Blank}*$";
83  
84      /** Regular expression matching comment entry. */
85      private static final Pattern COMMENT_ENTRY     = Pattern.compile(LINE_START + COMMENT_KEY + OPTIONAL_VALUE + LINE_END);
86  
87      /** Regular expression matching non-comment entry with optional units.
88       * <p>
89       * Note than since 12.0, we allow empty values at lexical analysis level and detect them at parsing level
90       * </p>
91       */
92      private static final Pattern NON_COMMENT_ENTRY = Pattern.compile(LINE_START + NON_COMMENT_KEY + OPTIONAL_VALUE + UNITS + LINE_END);
93  
94      /** Regular expression matching no-value entry starting a block. */
95      private static final Pattern START_ENTRY       = Pattern.compile(LINE_START + START_KEY + LINE_END);
96  
97      /** Regular expression matching no-value entry ending a block. */
98      private static final Pattern STOP_ENTRY        = Pattern.compile(LINE_START + STOP_KEY + LINE_END);
99  
100     /** Source providing the data to analyze. */
101     private final DataSource source;
102 
103     /** Parsed units cache. */
104     private final UnitsCache cache;
105 
106     /** Simple constructor.
107      * @param source source providing the data to parse
108      */
109     public KvnLexicalAnalyzer(final DataSource source) {
110         this.source = source;
111         this.cache  = new UnitsCache();
112     }
113 
114     /** {@inheritDoc} */
115     @Override
116     public <T> T accept(final MessageParser<T> messageParser) {
117 
118         messageParser.reset(FileFormat.KVN);
119 
120         try (Reader         reader = source.getOpener().openReaderOnce();
121              BufferedReader br     = (reader == null) ? null : new BufferedReader(reader)) {
122 
123             if (br == null) {
124                 throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
125             }
126 
127             int lineNumber = 0;
128             for (String line = br.readLine(); line != null; line = br.readLine()) {
129                 ++lineNumber;
130                 if (line.trim().length() == 0) {
131                     continue;
132                 }
133 
134                 final Matcher nonComment = NON_COMMENT_ENTRY.matcher(line);
135                 if (nonComment.matches()) {
136                     // regular key=value line
137                     final Unit units = cache.getUnits(nonComment.groupCount() > 2 ? nonComment.group(3) : null);
138                     messageParser.process(new ParseToken(TokenType.ENTRY,
139                                                          nonComment.group(1), nonComment.group(2),
140                                                          units, lineNumber, source.getName()));
141                 } else {
142                     final Matcher comment = COMMENT_ENTRY.matcher(line);
143                     if (comment.matches()) {
144                         // comment line
145                         messageParser.process(new ParseToken(TokenType.ENTRY,
146                                                              comment.group(1), comment.group(2), null,
147                                                              lineNumber, source.getName()));
148                     } else {
149                         final Matcher start = START_ENTRY.matcher(line);
150                         if (start.matches()) {
151                             // block start
152                             messageParser.process(new ParseToken(TokenType.START,
153                                                                  start.group(1), null, null,
154                                                                  lineNumber, source.getName()));
155                         } else {
156                             final Matcher stop = STOP_ENTRY.matcher(line);
157                             if (stop.matches()) {
158                                 // block end
159                                 messageParser.process(new ParseToken(TokenType.STOP,
160                                                                      stop.group(1), null, null,
161                                                                      lineNumber, source.getName()));
162                             } else {
163                                 // raw data line
164                                 messageParser.process(new ParseToken(TokenType.RAW_LINE,
165                                                                      null, line, null,
166                                                                      lineNumber, source.getName()));
167                             }
168                         }
169                     }
170                 }
171 
172             }
173 
174             return messageParser.build();
175 
176         } catch (IOException ioe) {
177             throw new OrekitException(ioe, new DummyLocalizable(ioe.getMessage()));
178         }
179     }
180 
181 }