1   /* Copyright 2002-2025 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.files.ccsds.utils.lexical;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.Reader;
22  import java.util.Collections;
23  import java.util.HashMap;
24  import java.util.Map;
25  
26  import javax.xml.parsers.ParserConfigurationException;
27  import javax.xml.parsers.SAXParser;
28  import javax.xml.parsers.SAXParserFactory;
29  
30  import org.hipparchus.exception.DummyLocalizable;
31  import org.orekit.data.DataSource;
32  import org.orekit.errors.OrekitException;
33  import org.orekit.errors.OrekitMessages;
34  import org.orekit.files.ccsds.utils.FileFormat;
35  import org.xml.sax.Attributes;
36  import org.xml.sax.InputSource;
37  import org.xml.sax.Locator;
38  import org.xml.sax.SAXException;
39  import org.xml.sax.helpers.DefaultHandler;
40  
41  /** Lexical analyzer for XML CCSDS messages.
42   * @author Maxime Journot
43   * @author Luc Maisonobe
44   * @since 11.0
45   */
46  public class XmlLexicalAnalyzer implements LexicalAnalyzer {
47  
48      /** Source providing the data to analyze. */
49      private final DataSource source;
50  
51      /** Simple constructor.
52       * @param source source providing the data to parse
53       */
54      public XmlLexicalAnalyzer(final DataSource source) {
55          this.source = source;
56      }
57  
58      /** {@inheritDoc} */
59      @Override
60      public <T> T accept(final MessageParser<T> messageParser) {
61          try {
62              // Create the handler
63              final DefaultHandler handler = new XMLHandler(messageParser);
64  
65              // Create the XML SAX parser factory
66              final SAXParserFactory factory = SAXParserFactory.newInstance();
67  
68              // Build the parser
69              final SAXParser saxParser = factory.newSAXParser();
70  
71              // Read the xml file
72              messageParser.reset(FileFormat.XML);
73              final DataSource.Opener opener = source.getOpener();
74              if (opener.rawDataIsBinary()) {
75                  try (InputStream is = opener.openStreamOnce()) {
76                      if (is == null) {
77                          throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
78                      }
79                      saxParser.parse(new InputSource(is), handler);
80                  }
81              } else {
82                  try (Reader reader = opener.openReaderOnce()) {
83                      if (reader == null) {
84                          throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
85                      }
86                      saxParser.parse(new InputSource(reader), handler);
87                  }
88              }
89  
90              // Get the content of the file
91              return messageParser.build();
92  
93          } catch (SAXException | ParserConfigurationException | IOException e) {
94              // throw caught exception as an OrekitException
95              throw new OrekitException(e, new DummyLocalizable(e.getMessage()));
96          }
97      }
98  
99      /** Handler for parsing XML file formats.
100      */
101     private class XMLHandler extends DefaultHandler {
102 
103         /** CCSDS Message parser to use. */
104         private final MessageParser<?> messageParser;
105 
106         /** Builder for regular elements. */
107         private final XmlTokenBuilder regularBuilder;
108 
109         /** Builders for special elements. */
110         private Map<String, XmlTokenBuilder> specialElements;
111 
112         /** Locator used to get current line number. */
113         private Locator locator;
114 
115         /** Name of the current element. */
116         private String currentElementName;
117 
118         /** Line number of the current entry. */
119         private int currentLineNumber;
120 
121         /** Content of the current entry. */
122         private String currentContent;
123 
124         /** Attributes of the current element. */
125         private Map<String, String> currentAttributes;
126 
127         /** Last processed token qualified name.
128          * @since 12.0
129          */
130         private String lastQname;
131 
132         /** Last processed token start/end indicator.
133          * @since 12.0
134          */
135         private boolean lastWasStart;
136 
137         /** Simple constructor.
138          * @param messageParser CCSDS Message parser to use
139          */
140         XMLHandler(final MessageParser<?> messageParser) {
141             this.messageParser   = messageParser;
142             this.regularBuilder  = new RegularXmlTokenBuilder();
143             this.specialElements = messageParser.getSpecialXmlElementsBuilders();
144             this.lastQname       = "";
145             this.lastWasStart    = false;
146         }
147 
148         /** Get a builder for the current element.
149          * @param qName XML element ualified name
150          * @return builder for this element
151          */
152         private XmlTokenBuilder getBuilder(final String qName) {
153             final XmlTokenBuilder specialBuilder = specialElements.get(qName);
154             return (specialBuilder != null) ? specialBuilder : regularBuilder;
155         }
156 
157         /** {@inheritDoc} */
158         @Override
159         public void setDocumentLocator(final Locator documentLocator) {
160             this.locator = documentLocator;
161         }
162 
163         /** {@inheritDoc} */
164         @Override
165         public void characters(final char[] ch, final int start, final int length) throws SAXException {
166             // we are only interested in leaf elements between one start and one end tag
167             // when nested elements occur, this method is called with the spurious whitespace
168             // characters (space, tab, end of line) that occur between two successive start
169             // tags, two successive end tags, or one end tag and the following start tag of
170             // next element at same level.
171             // We need to identify the characters we want and the characters we drop.
172 
173             // check if we are after a start tag (thus already dropping the characters
174             // between and end tag and a following start or end tag)
175             if (currentElementName != null) {
176                 // we are after a start tag, we don't know yet if the next tag will be
177                 // another start tag (in which case we ignore the characters) or if
178                 // it is the end tag of a leaf element, so we just store the characters
179                 // and will either use them or drop them when this next tag is seen
180                 currentLineNumber = locator.getLineNumber();
181                 this.currentContent = this.currentContent + new String(ch, start, length);
182             }
183         }
184 
185         /** {@inheritDoc} */
186         @Override
187         public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {
188 
189             currentElementName = qName;
190             currentLineNumber  = locator.getLineNumber();
191             currentContent     = "";
192 
193             // save attributes in separate map, to avoid overriding during parsing
194             if (attributes.getLength() == 0) {
195                 currentAttributes  = Collections.emptyMap();
196             } else {
197                 currentAttributes = new HashMap<>(attributes.getLength());
198                 for (int i = 0; i < attributes.getLength(); ++i) {
199                     currentAttributes.put(attributes.getQName(i), attributes.getValue(i));
200                 }
201             }
202 
203             for (final ParseToken token : getBuilder(qName).
204                                           buildTokens(true, false, qName, getContent(), currentAttributes,
205                                                       currentLineNumber, source.getName())) {
206                 messageParser.process(token);
207             }
208             lastQname    = qName;
209             lastWasStart = true;
210 
211         }
212 
213         private String getContent() {
214             return currentContent.isEmpty() ? null : currentContent;
215         }
216 
217         /** {@inheritDoc} */
218         @Override
219         public void endElement(final String uri, final String localName, final String qName) {
220 
221             if (currentContent == null || currentContent.isEmpty()) {
222                 // for an end tag without content, we keep the line number of the end tag itself
223                 currentLineNumber = locator.getLineNumber();
224             }
225 
226             // check if we are parsing the end tag of a leaf element
227             final boolean isLeaf = lastWasStart && qName.equals(lastQname);
228 
229             for (final ParseToken token : getBuilder(qName).
230                                           buildTokens(false, isLeaf, qName, getContent(), currentAttributes,
231                                                       currentLineNumber, source.getName())) {
232                 messageParser.process(token);
233             }
234             lastQname    = qName;
235             lastWasStart = true;
236 
237             currentElementName = null;
238             currentAttributes  = null;
239             currentLineNumber  = -1;
240             currentContent     = "";
241 
242         }
243 
244         /** {@inheritDoc} */
245         @Override
246         public InputSource resolveEntity(final String publicId, final String systemId) {
247             // disable external entities
248             return new InputSource();
249         }
250 
251     }
252 
253 }