/*
 * This file is part of the Yices SMT Solver.
 * Copyright (C) 2017 SRI International.
 *
 * Yices is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Yices is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Yices.  If not, see <http://www.gnu.org/licenses/>.
 */

Yices Parser

The parser code is in src/yices_parser.c and src/yices_parser.h.
It's an encoding of a recursive descent parser. It relies on
an automaton defined by types and transition tables contained 
in 'src/yices_parse_tables.h'. 

That file is built from 'utils/yices_input_tables.h' and a set of
tables generated by 'utils/table_builder.c'. 


Grammar
-------

 command  ::= 
          ( define-type <symbol> )
        | ( define-type <symbol> <typedef> )
        | ( define <symbol> :: <type> )
        | ( define <symbol> :: <type> <expression> )
        | ( assert <expression> )
        | ( assert <expression> <symbol> )
        | ( exit )
        | ( check )
        | ( check-assuming <assumption-list> )
        | ( push )
        | ( pop )
        | ( reset )
        | ( show-model )
        | ( eval <expression> )
        | ( echo <string> )
        | ( include <string> )
        | ( set-param <symbol> <immediate-value> )
        | ( show-param <symbol> )
        | ( show-params )
        | ( show-stats )
        | ( reset-stats )
        | ( set-timeout <number> )
        | ( show-timeout )
        | ( dump-context )
        | ( help )
        | ( help <string> )
        | ( help <symbol> )
        | ( ef-solve )
        | ( export-to-dimacs <string> )
        | ( show-implicant )
        | ( show-unsat-core )
        | ( show-unsat-assumptions )
	| ( show-reduced-model )
        | EOS

immediate-value :: = 
          true 
        | false
        | rational
        | symbol


typedef ::= 
          <type>
        | ( scalar <symbol-list> )

symbol-list ::=  <symbol> | <symbol> <symbol-list>

assumption-list ::=
        |
        | <symbol> <assumption-list>
        | (not <symbol>) <assumption-list>

type ::= 
          int
        | real
        | bool
        | ( bitvector <rational> )
        | ( tuple <type-list> )
        | ( -> <type> <type-list> )

expression ::=
         true
       | false
       | <rational>
       | <float>
       | <binary bv>
       | <hexa bv>
       | <symbol>
       | ( update <expression> ( <expression-list> ) <expression> )
       | ( forall ( <var-decl-list> ) <expression> )
       | ( exists ( <var-decl-list> ) <expression> )
       | ( lambda ( <var-decl-list> ) <expression> )
       | ( let ( <binding-list> ) <expression> )
       | ( <function> <expression-list> )

function ::=
         <function-keyword>
       | <expression>

function-keyword ::=
        if      ite     =       /=      distinct

        or      and     not     xor     <=>     =>

        mk-tuple        select  tuple-update

        +       -       *       /       ^
        <       <=      >       >=

        abs     div     mod     floor   ceil
        divides         is_int

        mk-bv   bv-add  bv-sub  bv-mul  bv-neg  bv-pow
        bv-not  bv-and  bv-or   bv-xor  
        bv-shift-left0  bv-shift-left1  bv-shift-right0 bv-shift-right1
        bv-ashift-right bv-rotate-left  bv-rotate-right 
        bv-extract      bv-concat       bv-repeat
        bv-sign-extend  bv-zero-extend  bv-ge   bv-gt   bv-le   bv-lt
        bv-sge  bv-sgt  bv-sle  bv-slt  bv-shl  bv-lshr bv-ashr
        bv-div  bv-rem  bv-sdiv bv-srem bv-smod
        bv-redor        bv-redand       bv-comp

        bool-to-bv      bit


Recursive-descent parser
------------------------

We use four deterministic automata to describe parsing procedures.
Edges are labeled by terminals or by a recursive call to a parsing 
procedure. When several transitions are enabled in the same state,
the one listed first has priority (i.e., transitions labeled by 
a terminal symbol have priority over recursive calls).

Example:

c9       )                      DONE
         [parse-expression]     c4

means 

if state=c9 then
  if token=')' then
    state=DONE
  else
    parse-expression();
    state=c4
  endif



 
parse-command:
- initial-state = c0
- transition table
c0       (                      c1
c1       define-type            c2
         echo                   c3
         include                c3
         exit                   c4
         check                  c4
         check-assuming         c16
         assert                 c5
         define                 c6
         show-model             c4
         eval                   c5
         push                   c4
         pop                    c4
         reset                  c4
         show-params            c4
         show-stats             c4
         reset-stats            c4
         show-timeout           c4
         set-param              c11
         show-param             c13
         set-timeout            c14
         help                   c15
         ef-solve               c4
         export-to-dimacs       c3
         show-implicant         c4
         show-unsat-core        c4
         show-unsat-assumptions  c4
	 show-reduced-model     c4
c2       <symbol>               c10
c3       <string>               c4 
c4       )                      DONE
c5       [parse-expression]     c20
c6       <symbol>               c7
c7       ::                     c8
c8       [parse-type]           c9
c9       )                      DONE
         [parse-expression]     c4
c10      )                      DONE
         [parse-typedef]        c4
c11      <symbol>               c12
c12      true                   c4
         false                  c4
         <rational>             c4
         <float>                c4
         <symbol>               c4
c13      <symbol>               c4
c14      <rational>             c4
c15      <symbol>               c4
         <string>               c4
         )                      DONE

list of assumptions after '(check-assuming '

c16      )                      DONE
         <symbol>               c16
         (                      c17
c17      not                    c18
c18      <symbol>               c19
c19      )                      c16

optional label after '(assert [expression] '
c20      )                      DONE
         <symbol>               c4


parse-typedef: share states t2, t3, t4 with parse-type
- initial-state = td0
- transition table
td0      int                   DONE
         real                  DONE
         bool                  DONE
         <symbol>              DONE
         (                     td1
td1      scalar                td2
         bit-vector            t4
         tuple                 t2
         ->                    t3
td2      <symbol>              td3
td3      <symbol>              td3
         )                     DONE
td4      <rational>            td5
td5      )                     DONE


parse-type:
- initial state = t0
- transition table
t0      int                    DONE
        real                   DONE
        bool                   DONE
        <symbol>               DONE
        (                      t1
t1      bitvector              t4
        tuple                  t2
        ->                     t3
t2      [parse-type]           t6
t3      [parse-type]           t2
t4      <rational>             t5
t5      )                      DONE
t6      )                      DONE
        [parse-type]           t6



parse-expression:
- initial state = e0
- transition table
e0      (                      e1
        true                   DONE
        false                  DONE
        <rational>             DONE
        <float>                DONE
        <binary bv>            DONE
        <hexa bv>              DONE
        <symbol>               DONE
e1      <function-keyword>     e2
        update                 e4
        forall                 e10
        exists                 e10
        lambda                 e10
        let                    e15
        [parse-expression]     e2
e2      [parse-expression]     e3
e3      )                      DONE
        [parse-expression]     e3
e4      [parse-expression]     e5
e5      (                      e6
e6      [parse-expression]     e7
e7      )                      e8
        [parse-expression]     e7
e8      [parse-expression]     e9
e9      )                      DONE
e10     (                      e11
e11     <symbol>               e12
e12     ::                     e13
e13     [parse-type]           e14
e14     )                      e8
        <symbol>               e12
e15     (                      e16
e16     (                      e17
e17     <symbol>               e18
e18     [parse-expr]           e19
e19     )                      e20
e20     )                      e8       
        <symbol>               e18


Implementation:

Each recursive procedure can be implemented as

   state = initial state
   while (state != DONE) {
      <transitions>
   }

Each transition with a terminal token is equivalent to 

   if state=<source> and token=<token> then
      token=next-token();
      state=<destination>

For recursive calls, we use a stack of states.
We push the successor state on the stack, and jump
to the initial state of the recursive function.
The current token is not consumed.

Example:

   "e2      [parse-expression]     e3"

is translated to 

   if state=e2 then 
      push e3 on the stack;
      state=e0

Return from a procedure is executed when state=DONE, and is
translated to 
 
   state=state on top of the stack;
   pop stack


Initially, the stack contains a bottom marker, and token is
set by calling next-token() in the lexer, and state is set to
the initial state of whatever needs to be parsed (e.g., c0, e0, 
or t0). Parsing terminates when the bottom marker is popped
from the stack (by a return). To avoid reading past the parsed
sentence, which is bad in interactive mode, we must not call the
next-token() function if state=bottom marker.


Optimizations:

We can merge equivalent states: c4, t5, e9 (to a new state r0)

We can skip intermediate states whose only
transition is a recursive call. For example,

  "e1  <function-symbol>  e2  [parse-expression] e3"

corresponds to executing two transitions:

  if state=e1 and token=<function-symbol> then
     token=next-token();
     state=e2

  if state=e2 then
     push e3 on the stack;
     state=e0

We can remove e2 and the second transition by rewriting 
the first one:

  if state=e1 and token=<function-symbol> then
     token=next-token();
     push e3 on the stack;
     new-state=e0

We can also eliminate the DONE state using the same trick.



Full table
----------

The parser is defined by the following table, which defines a move or
transition for each pair state/token; 'next' means get next token.

The same data is stored in 'utils/yices_input_tables.h' and it's 
used by 'utils/build_tables.c'.

 
state    token              action

c0       (              next; goto c1
         EOS            return
c1       exit           next; goto r0
         check          next; goto r0
         check-assuming next; goto c16
         echo           next; goto c3
         include        next; goto c3
         assert         next; push c20; goto e0
         define-type    next; goto c2
         define         next; goto c6
         show-model     next; goto r0
         eval           next; push r0; goto e0
         push           next; goto r0
         pop            next; goto r0
         reset          next; goto r0
         dump-context   next; goto r0
         set-param      next; goto c11
         show-params    next; goto r0
         show-param     next; goto c13
         show-stats     next; goto r0
         reset-stats    next; goto r0
         show-timeout   next; goto r0
         set-timeout    next; goto c14
         help           next; goto c15
         ef-solve       next; goto r0
         show-implicant next; goto r0
         show-unsat-core        next; goto r0
         show-unsat-assumptions next; goto r0
	 show-reduced-model     next; goto r0
c2       <symbol>       next; goto c10
c3       <string>       next; goto r0
c6       <symbol>       next; goto c7
c7       ::             next; push c9; goto t0
c9       )              return
         _              push r0; goto e0
c10      )              return
         _              push r0; goto td0
c11      <symbol>       next; goto c12
c12      true           next; goto r0
         false          next; goto r0
         <rational>     next; goto r0
         <float>        next; goto r0
         <symbol>       next; goto r0      --> in (set-param .... <symbol>) 
c13      <symbol>       next; goto r0
c14      <rational>     next; goto r0
c15      <symbol>       next; goto r0
         <string>       next; goto r0
         ...            next; goto r0      --> treat all keywords as symbols here
         )              return

c16      )              return
         <symbol>       next; goto c16
         (              next; goto c17
c17      not            next; goto c18
c18      <symbol>       next; goto c19
c19      )              next; goto c16

c20      )              return
         <symbol>       next; goto r0

td0      int            return
         real           return
         bool           return
         <symbol>       return
         (              next; goto td1
td1      scalar         next; goto td2
         bitvector      next; goto t4
         tuple          next; push t6; goto t0
         ->             next; push t6; push t0; goto t0
td2      <symbol>       next; goto td3
td3      )              return
         <symbol>       next;

t0       int            return
         real           return
         bool           return
         <symbol>       return
         (              next; goto t1
t1       bitvector      next; goto t4
         tuple          next; push t6; goto t0
         ->             next; push t6; push t0; goto t0
t4       <rational>     next; goto r0
t6       )              return
         _              push t6; goto t0

e0       true           return
         false          return
         <rational>     return
         <float>        return
         <binary bv>    return
         <hexa bv>      return
         <symbol>       return
         (              next; goto e1
e1       <function-kw>  next; push e3; goto e0
         update         next; push e5; goto e0
         forall         next; goto e10
         exists         next; goto e10
         lambda         next; goto e10
         let            next; goto e15
         _              push e3; push e0; goto e0
e3       )              return
         _              push e3; goto e0
e5       (              next; push e7; goto e0
e7       )              next; push r0; goto e0
         _              push e7; goto e0
e10      (              next; goto e11
e11      <symbol>       next; goto e12
e12      ::             next; push e14; goto t0
e14      )              next; push r0; goto e0
         <symbol>       next; goto e12
e15      (              next; goto e16
e16      (              next; goto e17
e17      <symbol>       next; push e19; goto e0
e19      )              next; goto e20
e20      (              next; goto e17
         )              next; push r0; goto e0

r0       )              return


return means

s := pop-state on top of stack
if s != bottom 
   next; goto s
else
   exit
