1   /* Copyright 2002-2022 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.files.ccsds.utils.lexical;
18  
19  import java.io.BufferedReader;
20  import java.io.IOException;
21  import java.io.Reader;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.hipparchus.exception.DummyLocalizable;
26  import org.orekit.data.DataSource;
27  import org.orekit.errors.OrekitException;
28  import org.orekit.errors.OrekitMessages;
29  import org.orekit.files.ccsds.utils.FileFormat;
30  import org.orekit.utils.units.Unit;
31  import org.orekit.utils.units.UnitsCache;
32  
33  /** Lexical analyzer for Key-Value Notation CCSDS messages.
34   * @author Luc Maisonobe
35   * @since 11.0
36   */
37  public class KvnLexicalAnalyzer implements LexicalAnalyzer {
38  
39      /** Regular expression matching blanks at start of line. */
40      private static final String LINE_START         = "^\\p{Blank}*";
41  
42      /** Regular expression matching the special COMMENT key that must be stored in the matcher. */
43      private static final String COMMENT_KEY        = "(COMMENT)\\p{Blank}*";
44  
45      /** Regular expression matching a non-comment key that must be stored in the matcher. */
46      private static final String NON_COMMENT_KEY    = "([A-Z][A-Z_0-9]*)\\p{Blank}*=\\p{Blank}*";
47  
48      /** Regular expression matching a no-value key starting a block that must be stored in the matcher. */
49      private static final String START_KEY          = "([A-Z][A-Z_0-9]*)_START";
50  
51      /** Regular expression matching a no-value key ending a block that must be stored in the matcher. */
52      private static final String STOP_KEY           = "([A-Z][A-Z_0-9]*)_STOP";
53  
54      /** Regular expression matching a value that must be stored in the matcher. */
55      private static final String VALUE              = "(\\p{Graph}.*?)";
56  
57      /** Regular expression matching a value that must be stored in the matcher. */
58      private static final String OPTIONAL_VALUE     = "((?:(\\p{Graph}.*?)?))";
59  
60      /** Operators allowed in units specifications. */
61      private static final String UNITS_OPERATORS    = "-+*×.·/⁄^√⁺⁻";
62  
63      /** Letters allowed in units specifications. */
64      private static final String UNITS_LETTERS      = "A-Za-zµμ"; // beware µ (U+00B5) and μ (U+03BC) look similar but are different
65  
66      /** Digits allowed in units specifications. */
67      private static final String UNITS_DIGITS       = "0-9⁰¹²³⁴⁵⁶⁷⁸⁹";
68  
69      /** Fractions allowed in units specifications. */
70      private static final String UNITS_FRACTIONS    = "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞";
71  
72      /** Symbols allowed in units specifications. */
73      private static final String UNITS_SYMBOLS      = "%°◦′'″\\\"#";
74  
75      /** Parentheses allowed in units specifications. */
76      private static final String UNITS_PARENTHESES  = "()";
77  
78      /** Regular expression matching units that must be stored in the matcher. */
79      private static final String UNITS              = "(?:\\p{Blank}+\\[([" +
80                                                       UNITS_OPERATORS + UNITS_LETTERS + UNITS_DIGITS +
81                                                       UNITS_FRACTIONS + UNITS_SYMBOLS + UNITS_PARENTHESES +
82                                                      "]*)\\])?";
83  
84      /** Regular expression matching blanks at end of line. */
85      private static final String LINE_END           = "\\p{Blank}*$";
86  
87      /** Regular expression matching comment entry. */
88      private static final Pattern COMMENT_ENTRY     = Pattern.compile(LINE_START + COMMENT_KEY + OPTIONAL_VALUE + LINE_END);
89  
90      /** Regular expression matching non-comment entry with optional units. */
91      private static final Pattern NON_COMMENT_ENTRY = Pattern.compile(LINE_START + NON_COMMENT_KEY + VALUE + UNITS + LINE_END);
92  
93      /** Regular expression matching no-value entry starting a block. */
94      private static final Pattern START_ENTRY       = Pattern.compile(LINE_START + START_KEY + LINE_END);
95  
96      /** Regular expression matching no-value entry ending a block. */
97      private static final Pattern STOP_ENTRY        = Pattern.compile(LINE_START + STOP_KEY + LINE_END);
98  
99      /** Source providing the data to analyze. */
100     private final DataSource source;
101 
102     /** Parsed units cache. */
103     private final UnitsCache cache;
104 
105     /** Simple constructor.
106      * @param source source providing the data to parse
107      */
108     public KvnLexicalAnalyzer(final DataSource source) {
109         this.source = source;
110         this.cache  = new UnitsCache();
111     }
112 
113     /** {@inheritDoc} */
114     @Override
115     public <T> T accept(final MessageParser<T> messageParser) {
116 
117         messageParser.reset(FileFormat.KVN);
118 
119         try (Reader         reader = source.getOpener().openReaderOnce();
120              BufferedReader br     = (reader == null) ? null : new BufferedReader(reader)) {
121 
122             if (br == null) {
123                 throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
124             }
125 
126             int lineNumber = 0;
127             for (String line = br.readLine(); line != null; line = br.readLine()) {
128                 ++lineNumber;
129                 if (line.trim().length() == 0) {
130                     continue;
131                 }
132 
133                 final Matcher nonComment = NON_COMMENT_ENTRY.matcher(line);
134                 if (nonComment.matches()) {
135                     // regular key=value line
136                     final Unit units = cache.getUnits(nonComment.groupCount() > 2 ? nonComment.group(3) : null);
137                     messageParser.process(new ParseToken(TokenType.ENTRY,
138                                                          nonComment.group(1), nonComment.group(2),
139                                                          units, lineNumber, source.getName()));
140                 } else {
141                     final Matcher comment = COMMENT_ENTRY.matcher(line);
142                     if (comment.matches()) {
143                         // comment line
144                         messageParser.process(new ParseToken(TokenType.ENTRY,
145                                                              comment.group(1), comment.group(2), null,
146                                                              lineNumber, source.getName()));
147                     } else {
148                         final Matcher start = START_ENTRY.matcher(line);
149                         if (start.matches()) {
150                             // block start
151                             messageParser.process(new ParseToken(TokenType.START,
152                                                                  start.group(1), null, null,
153                                                                  lineNumber, source.getName()));
154                         } else {
155                             final Matcher stop = STOP_ENTRY.matcher(line);
156                             if (stop.matches()) {
157                                 // block end
158                                 messageParser.process(new ParseToken(TokenType.STOP,
159                                                                      stop.group(1), null, null,
160                                                                      lineNumber, source.getName()));
161                             } else {
162                                 // raw data line
163                                 messageParser.process(new ParseToken(TokenType.RAW_LINE,
164                                                                      null, line, null,
165                                                                      lineNumber, source.getName()));
166                             }
167                         }
168                     }
169                 }
170 
171             }
172 
173             return messageParser.build();
174 
175         } catch (IOException ioe) {
176             throw new OrekitException(ioe, new DummyLocalizable(ioe.getMessage()));
177         }
178     }
179 
180 }