KvnLexicalAnalyzer

/* Copyright 2002-2025 CS GROUP
 * Licensed to CS GROUP (CS) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * CS licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.orekit.files.ccsds.utils.lexical;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.hipparchus.exception.DummyLocalizable;
import org.orekit.data.DataSource;
import org.orekit.errors.OrekitException;
import org.orekit.errors.OrekitMessages;
import org.orekit.files.ccsds.utils.FileFormat;
import org.orekit.utils.units.Unit;
import org.orekit.utils.units.UnitsCache;

/** Lexical analyzer for Key-Value Notation CCSDS messages.
 * @author Luc Maisonobe
 * @since 11.0
 */
public class KvnLexicalAnalyzer implements LexicalAnalyzer {

    /** Regular expression matching blanks at start of line. */
    private static final String LINE_START         = "^\\p{Blank}*";

    /** Regular expression matching the special COMMENT key that must be stored in the matcher. */
    private static final String COMMENT_KEY        = "(COMMENT)\\p{Blank}*";

    /** Regular expression matching a non-comment key that must be stored in the matcher. */
    private static final String NON_COMMENT_KEY    = "([A-Z][A-Z_0-9]*)\\p{Blank}*=\\p{Blank}*";

    /** Regular expression matching a no-value key starting a block that must be stored in the matcher. */
    private static final String START_KEY          = "([A-Z][A-Z_0-9]*)_START";

    /** Regular expression matching a no-value key ending a block that must be stored in the matcher. */
    private static final String STOP_KEY           = "([A-Z][A-Z_0-9]*)_STOP";

    /** Regular expression matching a value that must be stored in the matcher. */
    private static final String OPTIONAL_VALUE     = "((?:(?:\\p{Graph}.*?)?))";

    /** Operators allowed in units specifications. */
    private static final String UNITS_OPERATORS    = "-+*×.·/⁄^√⁺⁻";

    /** Letters allowed in units specifications. */
    private static final String UNITS_LETTERS      = "A-Za-zµμ"; // beware µ (U+00B5) and μ (U+03BC) look similar but are different

    /** Digits allowed in units specifications. */
    private static final String UNITS_DIGITS       = "0-9⁰¹²³⁴⁵⁶⁷⁸⁹";

    /** Fractions allowed in units specifications. */
    private static final String UNITS_FRACTIONS    = "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞";

    /** Symbols allowed in units specifications. */
    private static final String UNITS_SYMBOLS      = "%°◦′'″\\\"#";

    /** Parentheses allowed in units specifications. */
    private static final String UNITS_PARENTHESES  = "()";

    /** Regular expression matching units that must be stored in the matcher. */
    private static final String UNITS              = "(?:\\p{Blank}+\\[([" +
                                                     UNITS_OPERATORS + UNITS_LETTERS + UNITS_DIGITS +
                                                     UNITS_FRACTIONS + UNITS_SYMBOLS + UNITS_PARENTHESES +
                                                    "]*)\\])?";

    /** Regular expression matching blanks at end of line. */
    private static final String LINE_END           = "\\p{Blank}*$";

    /** Regular expression matching comment entry. */
    private static final Pattern COMMENT_ENTRY     = Pattern.compile(LINE_START + COMMENT_KEY + OPTIONAL_VALUE + LINE_END);

    /** Regular expression matching non-comment entry with optional units.
     * <p>
     * Note than since 12.0, we allow empty values at lexical analysis level and detect them at parsing level
     * </p>
     */
    private static final Pattern NON_COMMENT_ENTRY = Pattern.compile(LINE_START + NON_COMMENT_KEY + OPTIONAL_VALUE + UNITS + LINE_END);

    /** Regular expression matching no-value entry starting a block. */
    private static final Pattern START_ENTRY       = Pattern.compile(LINE_START + START_KEY + LINE_END);

    /** Regular expression matching no-value entry ending a block. */
    private static final Pattern STOP_ENTRY        = Pattern.compile(LINE_START + STOP_KEY + LINE_END);

    /** Source providing the data to analyze. */
    private final DataSource source;

    /** Parsed units cache. */
    private final UnitsCache cache;

    /** Simple constructor.
     * @param source source providing the data to parse
     */
    public KvnLexicalAnalyzer(final DataSource source) {
        this.source = source;
        this.cache  = new UnitsCache();
    }

    /** {@inheritDoc} */
    @Override
    public <T> T accept(final MessageParser<T> messageParser) {

        messageParser.reset(FileFormat.KVN);

        try (Reader         reader = source.getOpener().openReaderOnce();
             BufferedReader br     = (reader == null) ? null : new BufferedReader(reader)) {

            if (br == null) {
                throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
            }

            int lineNumber = 0;
            for (String line = br.readLine(); line != null; line = br.readLine()) {
                ++lineNumber;
                if (line.trim().length() == 0) {
                    continue;
                }

                final Matcher nonComment = NON_COMMENT_ENTRY.matcher(line);
                if (nonComment.matches()) {
                    // regular key=value line
                    final Unit units = cache.getUnits(nonComment.groupCount() > 2 ? nonComment.group(3) : null);
                    messageParser.process(new ParseToken(TokenType.ENTRY,
                                                         nonComment.group(1), nonComment.group(2),
                                                         units, lineNumber, source.getName()));
                } else {
                    final Matcher comment = COMMENT_ENTRY.matcher(line);
                    if (comment.matches()) {
                        // comment line
                        messageParser.process(new ParseToken(TokenType.ENTRY,
                                                             comment.group(1), comment.group(2), null,
                                                             lineNumber, source.getName()));
                    } else {
                        final Matcher start = START_ENTRY.matcher(line);
                        if (start.matches()) {
                            // block start
                            messageParser.process(new ParseToken(TokenType.START,
                                                                 start.group(1), null, null,
                                                                 lineNumber, source.getName()));
                        } else {
                            final Matcher stop = STOP_ENTRY.matcher(line);
                            if (stop.matches()) {
                                // block end
                                messageParser.process(new ParseToken(TokenType.STOP,
                                                                     stop.group(1), null, null,
                                                                     lineNumber, source.getName()));
                            } else {
                                // raw data line
                                messageParser.process(new ParseToken(TokenType.RAW_LINE,
                                                                     null, line, null,
                                                                     lineNumber, source.getName()));
                            }
                        }
                    }
                }

            }

            return messageParser.build();

        } catch (IOException ioe) {
            throw new OrekitException(ioe, new DummyLocalizable(ioe.getMessage()));
        }
    }

}