Hello,
I'm developing networking library that provides RTMP-like networking using Boost.ASIO library. My problem is that any synchronous-access function breaks into many little functions that do some part of job and invoke async operation with "next" function as a handler, it makes the code really ugly and obfuscating.
E.g. void hadshake that reads N bytes from peer, responds with those bytes + same count of other bytes, reads N bytes back and checks that they are the same. It breaks into:

  void start_handshake_wait();
  void handle_handshake_wait(const boost::system::error_code&, std::size_t, buffer_ptr &);
  void handle_handshake_wait_write_dup(..same as above..);
  void handle_handshake_wait_write_mine(...);
  void handle_handshake_wait_read_mine_check(...);

Every function uses boost::bind, so for some quite simple operation a lot of code is produced. Receiving a chunk of data breaks into:

  void read_header_head(); // header size depends on first byte
  void parse_header_head();
  void parse_header_body(); 
  void parse_chunk_body();

The only idea I came to is to use some tricks like coroutines. (Sorry for big listings)

template<class T>
class Context : public boost::enable_shared_from_this< Context<T> >
{
public:
    typedef boost::shared_ptr<Context<T> > ptr;

private:
    typedef boost::shared_ptr<T> objptr_t;

    typedef void (T::*funcptr_t)
        (const BSYSECode&, std::size_t, Context<T>::ptr, buffer_ptr);

    objptr_t objptr_;
    funcptr_t funcptr_;
public:
    std::map< std::string, buffer_ptr > buffers;
    int state;

    Context(objptr_t op, funcptr_t fp)
      : objptr_(op), funcptr_(fp),
        buffers(), state(0)
    { }

    template<typename AsyncReadStream>
    void async_read(AsyncReadStream &s, buffer_ptr buf)
    {
        boost::asio::async_read(s, *buf, boost::bind(funcptr_, objptr_,
                                BASIOPErr, BASIOPBytes,
                                this->shared_from_this(), buf));
    }

    template<typename AsyncWriteStream>
    void async_write(AsyncWriteStream &s, buffer_ptr buf)
    {
        boost::asio::async_write(s, *buf, boost::bind(funcptr_, objptr_,
                                 BASIOPErr, BASIOPBytes,
                                 this->shared_from_this(), buf));
    }
};

#define CTX_PROLOG(c)                  \
   if(!c)                              \
       return;                         \
                                       \
   if(c->state == 0) {                 \


#define CTX_READ(c, s, b)
             \
       c->state=__LINE__;              \
       c->async_read(s, b);            \
       return;                         \
   } else if(c->state == __LINE__) {   \

#define CTX_WRITE(c, s, b)
            \
       c->state=__LINE__;              \
       c->async_write(s, b);           \
       return;                         \
   } else if(c->state == __LINE__) {   \


#define CTX_EPILOG
                    \
   }

buffer_ptr is actually a shared_ptr to buffer, it's used to get buffer back to pool as it goes out of scope. With all this handshake becomes something more readable (to me, but it still cryptic; again, sorry for posting that much):

class Handshaker :
    public boost::enable_shared_from_this<Handshaker>
{
private:
    Connection::ptr conn_;

public:
    typedef boost::shared_ptr<Handshaker> ptr;
    typedef Context<Handshaker> context;

    Handshaker(Connection::ptr conn)
      : conn_(conn)
    { }

    void shake() {
        context::ptr ctx(new context(shared_from_this(), &Handshaker::perform));
        perform(BSYSECode(), 0, ctx, buffer_ptr());
    }

private:
    void perform(const BSYSECode&  err,  std::size_t  sz,
                 context::ptr      ctx,  buffer_ptr   buf)
    {
        if(err)
            return;

        CTX_PROLOG(ctx);
        CTX_READ(ctx, conn_->socket(), conn_->pool()->allocate(HandshakeSize));
        CTX_WRITE(ctx, conn_->socket(), buf);

        ctx->buffers["mybuf"] = conn_->pool()->allocate(HandshakeSize);
        CTX_WRITE(ctx, conn_->socket(), ctx->buffers["mybuf"]);

        CTX_READ(ctx, conn_->socket(), conn_->pool()->allocate(HandshakeSize));

        buffer_ptr mybuf = ctx->buffers["mybuf"];
        buffer_ptr hisbuf = buf;
        if(boost::asio::buffer_size(*mybuf) != boost::asio::buffer_size(*hisbuf))
            return;

        if(memcmp(  boost::asio::buffer_cast<void*>(*mybuf),
                    boost::asio::buffer_cast<void*>(*hisbuf),
                    boost::asio::buffer_size(*mybuf)          ))
            return;

        CTX_EPILOG;
    }
};

Finally, the question: is there any library that provides functionality like that? I'd like to stick with it cause it looks like my implementation is really bad, HUGE bunch of limitations on code written that way, cryptic look, usage of macro, any local variable has to resort in ctx to survive async call.