1 /* Copyright 2002-2022 CS GROUP
2 * Licensed to CS GROUP (CS) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * CS licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.orekit.files.ccsds.utils.lexical;
18
19 import java.io.BufferedReader;
20 import java.io.IOException;
21 import java.io.Reader;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.hipparchus.exception.DummyLocalizable;
26 import org.orekit.data.DataSource;
27 import org.orekit.errors.OrekitException;
28 import org.orekit.errors.OrekitMessages;
29 import org.orekit.files.ccsds.utils.FileFormat;
30 import org.orekit.utils.units.Unit;
31 import org.orekit.utils.units.UnitsCache;
32
33 /** Lexical analyzer for Key-Value Notation CCSDS messages.
34 * @author Luc Maisonobe
35 * @since 11.0
36 */
37 public class KvnLexicalAnalyzer implements LexicalAnalyzer {
38
39 /** Regular expression matching blanks at start of line. */
40 private static final String LINE_START = "^\\p{Blank}*";
41
42 /** Regular expression matching the special COMMENT key that must be stored in the matcher. */
43 private static final String COMMENT_KEY = "(COMMENT)\\p{Blank}*";
44
45 /** Regular expression matching a non-comment key that must be stored in the matcher. */
46 private static final String NON_COMMENT_KEY = "([A-Z][A-Z_0-9]*)\\p{Blank}*=\\p{Blank}*";
47
48 /** Regular expression matching a no-value key starting a block that must be stored in the matcher. */
49 private static final String START_KEY = "([A-Z][A-Z_0-9]*)_START";
50
51 /** Regular expression matching a no-value key ending a block that must be stored in the matcher. */
52 private static final String STOP_KEY = "([A-Z][A-Z_0-9]*)_STOP";
53
54 /** Regular expression matching a value that must be stored in the matcher. */
55 private static final String VALUE = "(\\p{Graph}.*?)";
56
57 /** Regular expression matching a value that must be stored in the matcher. */
58 private static final String OPTIONAL_VALUE = "((?:(\\p{Graph}.*?)?))";
59
60 /** Operators allowed in units specifications. */
61 private static final String UNITS_OPERATORS = "-+*×.·/⁄^√⁺⁻";
62
63 /** Letters allowed in units specifications. */
64 private static final String UNITS_LETTERS = "A-Za-zµμ"; // beware µ (U+00B5) and μ (U+03BC) look similar but are different
65
66 /** Digits allowed in units specifications. */
67 private static final String UNITS_DIGITS = "0-9⁰¹²³⁴⁵⁶⁷⁸⁹";
68
69 /** Fractions allowed in units specifications. */
70 private static final String UNITS_FRACTIONS = "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞";
71
72 /** Symbols allowed in units specifications. */
73 private static final String UNITS_SYMBOLS = "%°◦′'″\\\"#";
74
75 /** Parentheses allowed in units specifications. */
76 private static final String UNITS_PARENTHESES = "()";
77
78 /** Regular expression matching units that must be stored in the matcher. */
79 private static final String UNITS = "(?:\\p{Blank}+\\[([" +
80 UNITS_OPERATORS + UNITS_LETTERS + UNITS_DIGITS +
81 UNITS_FRACTIONS + UNITS_SYMBOLS + UNITS_PARENTHESES +
82 "]*)\\])?";
83
84 /** Regular expression matching blanks at end of line. */
85 private static final String LINE_END = "\\p{Blank}*$";
86
87 /** Regular expression matching comment entry. */
88 private static final Pattern COMMENT_ENTRY = Pattern.compile(LINE_START + COMMENT_KEY + OPTIONAL_VALUE + LINE_END);
89
90 /** Regular expression matching non-comment entry with optional units. */
91 private static final Pattern NON_COMMENT_ENTRY = Pattern.compile(LINE_START + NON_COMMENT_KEY + VALUE + UNITS + LINE_END);
92
93 /** Regular expression matching no-value entry starting a block. */
94 private static final Pattern START_ENTRY = Pattern.compile(LINE_START + START_KEY + LINE_END);
95
96 /** Regular expression matching no-value entry ending a block. */
97 private static final Pattern STOP_ENTRY = Pattern.compile(LINE_START + STOP_KEY + LINE_END);
98
99 /** Source providing the data to analyze. */
100 private final DataSource source;
101
102 /** Parsed units cache. */
103 private final UnitsCache cache;
104
105 /** Simple constructor.
106 * @param source source providing the data to parse
107 */
108 public KvnLexicalAnalyzer(final DataSource source) {
109 this.source = source;
110 this.cache = new UnitsCache();
111 }
112
113 /** {@inheritDoc} */
114 @Override
115 public <T> T accept(final MessageParser<T> messageParser) {
116
117 messageParser.reset(FileFormat.KVN);
118
119 try (Reader reader = source.getOpener().openReaderOnce();
120 BufferedReader br = (reader == null) ? null : new BufferedReader(reader)) {
121
122 if (br == null) {
123 throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
124 }
125
126 int lineNumber = 0;
127 for (String line = br.readLine(); line != null; line = br.readLine()) {
128 ++lineNumber;
129 if (line.trim().length() == 0) {
130 continue;
131 }
132
133 final Matcher nonComment = NON_COMMENT_ENTRY.matcher(line);
134 if (nonComment.matches()) {
135 // regular key=value line
136 final Unit units = cache.getUnits(nonComment.groupCount() > 2 ? nonComment.group(3) : null);
137 messageParser.process(new ParseToken(TokenType.ENTRY,
138 nonComment.group(1), nonComment.group(2),
139 units, lineNumber, source.getName()));
140 } else {
141 final Matcher comment = COMMENT_ENTRY.matcher(line);
142 if (comment.matches()) {
143 // comment line
144 messageParser.process(new ParseToken(TokenType.ENTRY,
145 comment.group(1), comment.group(2), null,
146 lineNumber, source.getName()));
147 } else {
148 final Matcher start = START_ENTRY.matcher(line);
149 if (start.matches()) {
150 // block start
151 messageParser.process(new ParseToken(TokenType.START,
152 start.group(1), null, null,
153 lineNumber, source.getName()));
154 } else {
155 final Matcher stop = STOP_ENTRY.matcher(line);
156 if (stop.matches()) {
157 // block end
158 messageParser.process(new ParseToken(TokenType.STOP,
159 stop.group(1), null, null,
160 lineNumber, source.getName()));
161 } else {
162 // raw data line
163 messageParser.process(new ParseToken(TokenType.RAW_LINE,
164 null, line, null,
165 lineNumber, source.getName()));
166 }
167 }
168 }
169 }
170
171 }
172
173 return messageParser.build();
174
175 } catch (IOException ioe) {
176 throw new OrekitException(ioe, new DummyLocalizable(ioe.getMessage()));
177 }
178 }
179
180 }