;;
;;
;;      Title : Parsing Routines
;;
;;      Function : Contains basic parsing routines for the
;;                 user files
;;
;;      Author : Alan W Black  26th January 1986
;;               Dept of A.I.  University of Edinburgh
;;
;;      Copyright Graeme Ritchie, Alan Black,
;;                Steve Pulman and Graham Russell  1987
;;
;;         ---------------------------------------------
;;         |    Not to be used for military purposes   |
;;         ---------------------------------------------
;;
;;      2.1  30th July
;;         Changed the parsing of category to accept the GDE/GPSG book
;;         format.  Also updated substitute aliases accordingly. 
;;      2.4  26th January 87
;;         Added compiler directive for switching tracing on and off.
;;
;;      Description :
;;         Basically this provides the lexical analyser and some
;;         of the basic parsing routines like categories and features
;;
;;      Parameters :
;;      Returns :
;;      Side Effect :
;;
;;      External references :
;;
;;      System functions used :
;;        Name     type      Args #  type                 Comment
;;
;;      Restrictions :
;;
;;
(include "readatom")

(declare
   (special
      D-RIGHTPAREN D-LEFTPAREN D-SEMICOLON D-HASH 
      D-LEFTBRACE D-RIGHTBRACE D-LEFTANGLE D-RIGHTANGLE
      D-COLON D-LEFTBRACKET D-RIGHTBRACKET D-COMMA
      ;;D-WHITESPACECHARS D-TERMINATINGCHARS
      D-BANG D-SIMPLEQUOTE
      D-CurrentSym D-CATCONTAINSVAR
      D-TRACEFLAGSWITCH
      D-INCOREFLAGSWITCH
      D-NONINFLECTFLAGSWITCH
   )
   (localf
      D-ParseCategory                 
      D-ParseFeatureBundle
      D-ParseBundleList
      D-ParseFeaturePair              
      D-ParseSimpleList               
      D-ParseSimpleSet
      D-ParseSetElements
      D-GetNextSym                    
      D-InterpretCompilerDirective    
      D-SwallowComment                
      D-NextAtom                      
      D-MustHave                      
      D-CheckAtom
      D-FindCurrentLine
      D-SubsAliasCategory             
      D-ExpandAliasCat
      D-ExpandAliasFeat
   )
)

;;
;;  Global values
;;
(setq D-LEFTPAREN '|(|)
(setq D-RIGHTPAREN '|)|)
(setq D-HASH '|#|)
(setq D-SEMICOLON '|;|)
(setq D-COLON '\:)
(setq D-LEFTBRACE '\{)
(setq D-RIGHTBRACE '\})
(setq D-LEFTANGLE '\<)
(setq D-RIGHTANGLE '\>)
(setq D-LEFTBRACKET '\[)
(setq D-RIGHTBRACKET '\])
(setq D-COMMA '\,)
(setq D-SIMPLEQUOTE '\' )

(defun D-ParseCategory ()
;;
;;  parses a GPSG type category
;;  This can be in two main types (they cannot be mixed)
;;  First the simple naive lisp form with aliases
;;  Secondly the alternative trendy form as used in the book and the 
;;  GDE
;;
   (let ( (category nil) )
      (cond
	 ((eq D-CurrentSym D-LEFTPAREN)
            (D-MustHave D-LEFTPAREN)
            (D-while (not (eq D-CurrentSym D-RIGHTPAREN))
	       (setq category
		  (cons
		     (D-ParseFeaturePair)
		     category))
            )
            (D-MustHave D-RIGHTPAREN)
         )
	 ((eq D-CurrentSym D-LEFTBRACKET)  ;; square bracket form
	    (setq category
	       (D-ParseFeatureBundle))
         )     
	 (t    ;; must be alias or variable (ie atomic)
	    (setq category D-CurrentSym)
	    (D-GetNextSym)
	    (cond
	       ((eq D-CurrentSym D-LEFTBRACKET) ;; a feature bundle too
		  (setq category
		     (cons category (D-ParseFeatureBundle)))))
         )
      )
      category  ;; this will be the reverse of its specification 
   )
)

(defun D-ParseFeatureBundle ()
;;
;;   Parses a feature bundle.  That is a set of features in a
;;   bracketed list separated by commas.  This is the format 
;;   categories are specified in, in the book
;;
   (let ( bundle )
      (D-MustHave D-LEFTBRACKET)
      (setq bundle (D-ParseBundleList))
      (D-MustHave D-RIGHTBRACKET)
      bundle
   )
)

(defun D-ParseBundleList ()
;;
;;   Parses a list of features separated by D-COMMA and terminated by
;;   D-RIGHTBRACKET
;;
   (cond
      ((eq D-CurrentSym D-RIGHTBRACKET)
	 nil            ;; end of bundle list
      )
      (t
	 (let (featname featvalue)
	    (setq featname D-CurrentSym) (D-GetNextSym)
	    (D-CheckAtom featname)   ;; check it isn't a bracket
	    (cond
	       ((eq D-CurrentSym D-COMMA)   ;; featname is an alias
		  (D-MustHave D-COMMA)      ;; skip the comma
		  (cons featname (D-ParseBundleList))
               )
	       ((eq D-CurrentSym D-RIGHTBRACKET)
		  (ncons featname)
               )
	       (t         ;; is feature name and value
		  (cond
		     ((D-CatValFeatP featname)
			(setq featvalue (D-ParseCategory))
                     )
		     (t
		        (setq featvalue D-CurrentSym) (D-GetNextSym)
			(D-CheckAtom featvalue)  ;; check it isn't a bracket
                     ))
		  (cond
		     ((eq D-CurrentSym D-COMMA)
			(D-MustHave D-COMMA)
		        (cons
		           (list featname featvalue)
		           (D-ParseBundleList)))
                     ((eq D-CurrentSym D-RIGHTBRACKET)
			(ncons
			   (list featname featvalue))  ;; last feature pair
                     )
		     (t
	                (D-FindCurrentLine)
	                (error (concat "Bracket or comma expected but "
				       D-CurrentSym " found")))
                  )
               )
            )
         )
      )
   )
)

(defun D-ParseFeaturePair ()
;;
;; parses a feature pair or alias/variable
;;
   (let ( (featvalue nil) (featname nil) )
      (cond
         ((eq D-CurrentSym D-LEFTPAREN)  ;; feature
            (D-MustHave D-LEFTPAREN)
	       (D-CheckAtom D-CurrentSym)
               (setq featname D-CurrentSym)
               (D-GetNextSym)
               (cond
                  ((eq D-CurrentSym D-LEFTPAREN);; cat valued feature
                     (setq featvalue (D-ParseCategory))
                  )
                  (t    ;; atomic valued category
		     (D-CheckAtom D-CurrentSym)
                     (setq featvalue D-CurrentSym)
                     (D-GetNextSym)
                  )
               )
            (D-MustHave D-RIGHTPAREN)
            (list featname featvalue)
         )
         (t       ;; alias or variable
     	    (D-CheckAtom D-CurrentSym)
            (setq featname D-CurrentSym)
	    (D-GetNextSym)
	    featname
         )
      )
   )
)

(defun D-ParseSimpleList ()
;;
;;  parses a list of atoms
;;
   (let  ( (thelist nil) )
      (D-MustHave D-LEFTPAREN)
	 (D-while (not (eq D-CurrentSym D-RIGHTPAREN))
	    (D-CheckAtom D-CurrentSym)
	    (setq thelist (cons D-CurrentSym thelist))
	    (D-GetNextSym)
         )
      (D-MustHave D-RIGHTPAREN)
      (nreverse thelist)  ;; better reverse it and return
   )
)

(defun D-ParseSimpleSet ()
;;
;;   parses a set of atomic values separated by commas
;;
   (let ( (thelist nil) )
      (D-MustHave D-LEFTBRACE)
      (setq thelist (D-ParseSetElements))
      (D-MustHave D-RIGHTBRACE)
      thelist
   )
)

(defun D-ParseSetElements ()
;;
;;   parses a list separated by commas and terminated by a right
;;   brace
;;
   (cond
      ((eq D-CurrentSym D-RIGHTBRACE)
	 nil   ;; needed for case when an empty set is given
      )
      (t
	 (let ( (element D-CurrentSym) )
	    (D-CheckAtom element)
	    (D-GetNextSym)
	    (cond
	      ((eq D-CurrentSym D-COMMA)
		 (D-GetNextSym)
		 (cons element (D-ParseSetElements))
              )
	      ((eq D-CurrentSym D-RIGHTBRACE)
		 (ncons element))
              (t
	          (D-FindCurrentLine)
	          (error (concat "Comma or right brace expected but "
				 D-CurrentSym " found"))))))
   )
)

(defun D-GetNextSym ()
;;
;;  reads D-fileid upto next symbol. (over white space) then
;;  reads in an atom (terminated by white space, a RIGHTPAREN
;;  or LEFTPAREN)
;;  if the symbol is # (the compiler directive) then a possible
;;  command (only include and # (get a real hash) are supported)
;;
;;  This atom is set in global D-CurrentSym and returned
;;
;;  This skips comments and deals with compiler directives
;;
   (setq D-CurrentSym (D-NextAtom))
   ;(print D-CurrentSym) (terpri)
   (cond
      ((eq D-CurrentSym 'EOF)
	 (cond    ;; sorry bad coding here D-PopInclude has side effects
	    ((eq (D-PopInclude) 'EOF) 'EOF)     ;; real end of whole file
	    (t 
	       (D-GetNextSym)))    ;; this is going to restrict include depth
      )
      ((equal D-CurrentSym (DK-COMPILERDIRECTIVE))  ;; compiler directive
	 (D-InterpretCompilerDirective)
	 (D-GetNextSym)            ;; read next symbol after 
      )
      ((eq D-CurrentSym D-SEMICOLON)    ;; comment
	 (D-SwallowComment)
	 (D-GetNextSym)
      )
      (t
	 D-CurrentSym)
   )
)

(defun D-InterpretCompilerDirective ()
;;
;;  Interprets a compiler directive 
;;  at present there is
;;    #include <filename>
;;    #trace {on|off}
;;    #incore {on|off}
;;    #noninflects {on|off}
;;
   (let ( (command (D-GetNextSym)) )
      (cond
	 ((eq command (DK-INCLUDE))
	    (D-PushInclude (D-GetNextSym))
         )
	 ((eq command (DK-TRACE))
	    (let ((switch (D-GetNextSym)))
	       (cond
	          ((eq switch (DK-ON))
		     (setq D-TRACEFLAGSWITCH (DK-ON)))
                  ((eq switch (DK-OFF))
		     (setq D-TRACEFLAGSWITCH (DK-OFF))
                  )
		  (t
		     (D-FindCurrentLine)
		     (error (concat " unknown TRACE setting " switch))))))
	 ((eq command (DK-INCORE))
	    (let ((switch (D-GetNextSym)))
	       (cond
	          ((eq switch (DK-ON))
		     (setq D-INCOREFLAGSWITCH (DK-ON)))
                  ((eq switch (DK-OFF))
		     (setq D-INCOREFLAGSWITCH (DK-OFF))
                  )
		  (t
		     (D-FindCurrentLine)
		     (error (concat " unknown INCORE setting " switch))))))
	 ((eq command (DK-NONINFLECTCD))
	    (let ((switch (D-GetNextSym)))
	       (cond
	          ((eq switch (DK-ON))
		     (setq D-NONINFLECTFLAGSWITCH (DK-ON)))
                  ((eq switch (DK-OFF))
		     (setq D-NONINFLECTFLAGSWITCH (DK-OFF))
                  )
		  (t
		     (D-FindCurrentLine)
		     (error (concat " unknown NONINFLECT setting " switch))))))
         (t
	    (D-FindCurrentLine)
	    (error (concat " unknown compiler directive: " command))
         )
       )
   )
)

(defun D-SwallowComment ()
;;
;;   reads upto and the end of the line
;;
   (let (ch)
      (D-MakeEOLNSymbol)
      (D-while (not (or (eq (setq ch (D-ReadAtom D-fileid 'EOF)) D-NEWLINE)
			(eq ch 'EOF)))
         t
      )
      (D-MakeEOLNNotSymbol)
   )
)

(defun D-NextAtom ()
;;
;;   reads the next atom from the port D-fileid
;;   an atom is a lisp atom, left or right paren or #
;;   Cope with case where EOF is found twice.  If this
;;   routine is called twice when EOF is true (my pascal
;;   background comming through) then an error is given
;;
   (let ((symbol (D-ReadAtom D-fileid 'EOF)))
      (cond 
	 ((and (eq symbol 'EOF) (eq D-LASTSYMBOL 'EOF))
            (D-FindCurrentLine)     ;; prints out current line number
            (error "Unexpected End of File"))
         (t
	    (setq D-LASTSYMBOL symbol)
         )
      )
   )
)

(defun D-MustHave (thing)
;;
;;  checks the next symbol to be processed to see it is the
;;  same as thing.  If not it gives an error.  If is skips
;;  to next symbol
;;
   (D-if (not (eq D-CurrentSym thing))
   then
      (D-FindCurrentLine)     ;; prints out current line number
      (error (concat "\"" thing "\" expected but \"" D-CurrentSym "\" found")))
   (D-GetNextSym)
)

(defun D-CheckAtom (sym)
;;
;;  Checks that the given symbol is a sensible atom,
;;  i.e. not parenthesis 
;;
   (cond
      ((memq sym (list D-LEFTPAREN D-RIGHTPAREN D-LEFTBRACKET
		       D-RIGHTBRACKET D-LEFTBRACE D-RIGHTBRACE))
	 (D-FindCurrentLine)
	 (error (concat "atom name expected but \"" sym "\" found"))
      )
      (t t)
   )
)

(defun D-FindCurrentLine ()
;;
;;   finds the current line number of the current position of 
;;   D-fileid.  This is called when a syntax error is detected
;;   This prints out the line number and the current line upto
;;   the actual error
;;
   (cond
      ((eq D-fileid t)  ;; is it ther terminal I am reading ?
	 (D-ReadToEndOfLine 'EOF) ;; zap the rest of the line
         t)
      (t
	 (let ( (pos (filepos D-fileid))  (lineno 1)
		(sweep 0)  (linestart 0) )
	    (filepos D-fileid 0)     ;; position back to begining of file
	    (D-while (lessp sweep pos)
	       (D-if (eq (readc D-fileid) D-NEWLINE)
	       then
		  (setq linestart (filepos D-fileid))
		  (setq lineno (add1 lineno)))
	       (setq sweep (add1 sweep))
	    )
	    (princ "file : ") (princ (car D-INCLUDEFILES))
	    (princ " line number : ") (princ lineno) (terpri)
	    (filepos D-fileid linestart)
	    (setq sweep linestart)
	    (D-while (lessp sweep pos)
	       (princ (readc D-fileid))
	       (setq sweep (add1 sweep))
	    )
	    (mapcar          ;; be polite to the number of open file ids
	       #'(lambda (fid) (close fid))   
	       (cons D-fileid D-INCLUDESTACK))
	    (terpri))
      )
   )
)

(defun D-SubsAliasCategory (category)
;;
;;   This substitutes the value of aliases in a category
;;
   (D-ExpandAliasCat category nil)
)

(defun D-ExpandAliasCat (category expandedlist)
;;
;;   returns the expanded form of the given category.  A check is
;;   made to ensure no cyclic delcartations are made.  If the give
;;   name is a member of the expandedlist then an error is printed
;;   and nil is returned.
;;   An alias always expands to a category
;;
   (cond
      ((memq category expandedlist)
	 (princ (concat "Cycle found in alias expansion of " category))
	 (terpri)
	 nil        ;; expand this to nil
      )
      ((null category)
	 nil
      )
      ((and (atom category) (D-DeclaredVariableP category))
	 (setq D-CATCONTAINSVAR t)
	 category    ;; a variable ??? This could be over general
      )
      ((and (atom category) (null (D-GetAlias category)))
	 (princ (concat "Unknown alias " category)) (terpri)
	 nil
      )
      ((atom category)   ;; category is an alias
	 (D-ExpandAliasCat
	    (copy (D-GetAlias category))
	    (cons category expandedlist))
      )
      (t           ;; a simple category
	 (mapcan
	    #'(lambda (fpair)
	       (D-ExpandAliasFeat fpair expandedlist))
            category)
      )
   )
)

(defun D-ExpandAliasFeat (fpair expandedlist)
;;
;;   Expands a feature pair (or atomic alias) removing aliases
;;   If a cycle is detected an error is printed and nil is
;;   returned.  Not this returns categories which are spliced
;;   rather than simple feature pairs
;;
   (cond
      ((memq fpair expandedlist)
	 (princ (concat "Cycle found in alias expansion of " fpair))
	 (terpri)
	 nil        ;; expand this to nil
      )
      ((and (atom fpair) (null (D-GetAlias fpair)))
	 (princ (concat "Unknown alias " fpair)) (terpri)
	 nil
      )
      ((atom fpair)     ;; it is an alias
	 (D-ExpandAliasCat
	    (copy (D-GetAlias fpair))
	    (cons fpair expandedlist))
      )
      ((D-CatValFeatP (car fpair))     ;; ;this means it is a cat but an alias
	 (ncons
	    (list
	       (car fpair)
	       (D-ExpandAliasCat (cadr fpair) expandedlist)))
      )
      ((D-DeclaredVariableP (cadr fpair))
	 (setq D-CATCONTAINSVAR t) ;; this is a hack used in mkwgram
	 (ncons fpair)
      )
      (t             ;; simple fpair with no aliases
	 (ncons fpair))
   )
)

