Boost logo

Boost :

From: Phil Endecott (spam_from_boost_dev_at_[hidden])
Date: 2008-08-18 11:11:00


Dear All,

Here's another FSM example which I hope is a bit less trivial than the
last one. It's for the escape sequence handling in a terminal
emulator. If you're curious you can see the real code here:

   http://svn.anyterm.org/anyterm/trunk/common/Terminal.cc

The terminal emulator has to understand:

- Normal characters like 'A' that write themselves on the screen at the
current cursor position.
- Normal control characters like tab, carriage return etc. A peculiar
feature is that these characters can occur within escape sequences,
where they have their normal effect.
- Escape sequences, which consist of ESC followed by one character
(normally a letter) and have effects like clearing the screen.
- "CSI sequences", which perform more complex operations that may
require parameters (e.g. scroll lines N to M up). They have the form
ESC [ n1 ; n2 ; n3 ; ... X where the final letter X determines the
operation and the number of numeric parameters is variable.

So the state machine needs to track whether it is in an escape
sequence, in a CSI sequence, or in "normal operation". The transitions
are as follows:

Current state | Condition | Next state
------------------------------------------------
Normal | ESC | ESC
ESC | end of sequence | Normal
ESC | [ | CSI
ESC | char 26 | Normal
CSI | end of sequence | Normal
CSI | char 26 | Normal

Character 26 explicitly abandons any escape sequence that is in progress.

A terminal emulator should behave sensibly when it receives invalid
input. One reasonable behaviour on receiving an invalid character is
to ignore the preceding incomplete input and to treat the new character
as if it had been received in normal mode; or, we could ignore the
preceding incomplete input AND the new character and switch to normal
mode. The following transitions do something along those lines:

Normal | char 26 | Normal
ESC | ESC | ESC
ESC | invalid sequence | Normal
CSI | ESC | ESC
CSI | invalid sequence | Normal

One thing that we don't want to do is to throw an exception when we get
invalid input.

My (stripped-down) ad-hoc implementation of this follows:

class Terminal {

   screen_t screen; // basically a 2D array, with some features
                     // for efficient scrolling
   int cursor_col, cursor_row;
   int params[MAX_PARAMS];
   int nparams;
   enum state_t { normal, esc, csi };
   state_t state;

   Terminal(): cursor_col(0), cursor_row(0), state(normal) {}

   void carriage_return() {
     cursor_col = 0;
   }

   void cursor_line_up() {
     cursor_row--;
   }

   void csi_CUU() {
     cursor_row -= params[0];
   }
   // Lots more functions like those omitted.

   void write_normal_char(char c) {
     if (cursor_col>=cols) {
       cursor_col = 0;
       cursor_line_down();
     }
     screen(cursor_row,cursor_col) = c;
     cursor_col++;
   }

   void process_char(char c) {
     // Control chars like tab, newline etc. work even inside
     // escape sequences:
     if (c <= 31) {
       switch (c) {
         ....
         case 13: carriage_return(); break;
         ....
         case 26: // This code abandons any escape sequence that is in progress
                  state = normal; break;
         case 27: // Escape
                  state = esc; break;
         ....
       }

     } else {
       switch (state) {
         case normal:
           write_normal_char(c);
           break;

         case esc:
           switch (c) {
             ....
             case 'M': cursor_line_up(); state=normal; break;
             ....
             case '[': state=csi; nparams=0; break;
             default: state=normal; break;
           }
           break;

         case csi:
           if (isdigit(c)) {
             if (nparams==0) {
               nparams=1;
               params[0]=0;
             }
             params[nparams-1] = params[nparams-1]*10 + (c-'0');

           } else if (c==';') {
             if (nparams>=MAX_PARAMS) {
               return;
             }
             nparams++;
             params[nparams-1] = 0;

           } else {
             switch (c) {
               ....
               case 'A': csi_CUU(); break;
               ....
             }
             state = normal;
           }
       }
     }
   }

};

OK, so let's try to implement that with the proposed library. As
before, I haven't tried to compile any of this code:

struct Normal;
struct Esc;
struct CSI;

typedef mpl::vector<Normal,Esc,CSI>::type StateList;

struct Common {
   screen_t screen; // BTW, how can I access this from outside?
   int cursor_col, cursor_row;

   void carriage_return() {
     cursor_col = 0;
   }

   void handle_control_chars(char c) {
     switch (c) {
       ....
       case 13: carriage_return(); break;
       ....
       case 26: // This code abandons any escape sequence that is in progress
                this->switch_to<Normal>(); break;
       case 27: // Escape
                this->switch_to<Esc>(); break;
       ....
     }
   }
};

template <typename StateT>
struct TermState: fsm::state<StateT,StateList>, virtual Common {
   // I think I need some using declarations here. Or do they have to
be in each of
   // the derived classes?
}

struct Normal: TermState<Normal> {
   void write_normal_char(char c) {
     if (cursor_col>=cols) {
       cursor_col = 0;
       cursor_line_down();
     }
     screen(cursor_row,cursor_col) = c;
     cursor_col++;
   }

   void on_process(const char& c) {
     if (c<=31) {
       handle_control_char(c);
     } else {
       write_normal_char(c);
     }
   }
};

struct Esc: TermState<Esc> {
   void cursor_line_up() {
     cursor_row--;
   }

   void on_process(const char& c) {
     if (c<=31) {
       handle_control_char(c);
     } else {
       switch (c) {
         ....
         case 'M': cursor_line_up(); switch_to<Normal>(); break;
         ....
         case '[': switch_to<CSI>(); break;
         default: switch_to<Normal>(); break;
       }
     }
   }
};

struct CSI: TermState<CSI> {
   int params[MAX_PARAMS];
   int nparams;

   void on_enter() {
     nparams=0; // doing this here is nice
   }

   void csi_CUU() {
     cursor_row -= params[0];
   }

   void on_process(const char& c) {
     if (c<=31) {
       handle_control_char(c);
     } else {
       if (isdigit(c)) {
         if (nparams==0) {
           nparams=1;
           params[0]=0;
         }
         params[nparams-1] = params[nparams-1]*10 + (c-'0');

       } else if (c==';') {
         if (nparams>=MAX_PARAMS) {
           return;
         }
         nparams++;
         params[nparams-1] = 0;

       } else {
         switch (c) {
           ....
           case 'A': csi_CUU(); break;
           ....
         }
         switch_to<Normal>();
       }
     }
   }
};

My view is that the ad-hoc version of the code is sufficiently readable
and efficient for my needs, and that the Boost.FSM version does not
improve on it.

What do other people think? Am I choosing bad examples? If so, please
can someone post an example that does demonstrate the library's advantages?

(Maybe off-topic, but if any Spirit or Expressive experts would like to
show how their libraries would deal with this problem I'd be interested
to see that.)

Regards,

Phil.


Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk