IPC error

Valery V. Sedletski _valerius at mail.ru
Tue Jun 8 03:01:57 CEST 2010


Hi all

I am writing a program (API server which implements API and support for LX format executables found in OS/2 operating system). So, it parses an executable, applies 
fixups and starts it. At the moment it loads a simple executable which calls a single API printing a string on a screen. It generally works, but I got strange IPC problems. 
There is a function in a server, trampoline() which starts a previously prepared executable in a separate task (it is a kind of wrapper around that executable). After 
executable termination, trampoline() must synchronize with the server, passing it a message about termination. Then the API server can terminate itself.

For synchronization, I tried two ways: 

1)

sending a message to main server thread through DICE interfaces. As I understand, a problem can be with that client and server stubs are linked to a single program. A 
code of trampoline() looks like that: 

static void
trampoline(unsigned long esp_data, unsigned long eip_data)
{
  CORBA_Environment env = dice_default_environment;
  l4_threadid_t     dest;
  l4_msgdope_t  result;
  l4_umword_t   w0, w1;
  int err;

  asm(
      "pop %%eax\n"
      "pop %%eax\n"
      "movl %[esp_data], %%eax \n"    /* Put old esp in eax */
      "movl %[eip_data], %%ecx \n"

    "movl %%ebp, %%edx \n" /* Copy ebp to edx. Base pointer for this functions local variables.*/
    "movl %%eax, %%esp \n" /* Copy eax to esp. Stack pointer*/
    /* We have changed the stack so it now points to out LX image.*/
    "call *%%ecx \n" /* Call our main() */
      :
      :[esp_data] "m" (esp_data), /* esp+ data_mmap+8+*/
       [ebp_data] "m" (ebp_data), /* esp+ data_mmap+8+*/
       [eip_data] "m" (eip_data));

  LOG("task end");
  // query OS/2 server task id
  names_query_name("os2server", &dest) ;
  LOG("os2server uid=%x.%x", dest.id.task, dest.id.lthread);
  // send a signal about termination to OS/2 server
  env.utcb    = l4_utcb_get();
  os2server_wakeup_call (&dest, &env);
  if (DICE_HAS_EXCEPTION(&env))
      printf("Error: %d.%d", 
  	  DICE_EXCEPTION_MAJOR(&env),
  	  DICE_EXCEPTION_MINOR(&env));
}

Without setting env.utcb to UTCB pointer, it traps because of null pointer dereferencing. (In os2server_wakeup_call() function code generated by DICE. This is the 
disassembly code:

    _os2server_wakeup_wakeup_wakeup_msg_buffer_t *_dice_msg_buffer = (_os2server_wakeup_wakeup_wakeup_msg_buffer_t*)_dice_corba_env->utcb->values;
 1806a19:	8b 45 0c             	mov    0xc(%ebp),%eax
 1806a1c:	8b 40 10             	mov    0x10(%eax),%eax
 1806a1f:	89 45 dc             	mov    %eax,0xffffffdc(%ebp)
    l4_umword_t _exception __attribute__ ((unused));
    l4_umword_t _dummy __attribute__ ((unused));
    l4_msgtag_t tag_dummy __attribute__ ((unused)) = l4_msgtag(0,0,0,0);
    l4_msgdope_t _dice_result = { raw: 0 };
    _dice_msg_buffer->wakeup_wakeup_in._dice_opcode = _OS2SERVER_WAKEUP_WAKEUP_OPCODE;
 1806a22:	c7 00 01 00 10 00    	movl   $0x100001,(%eax)

-- it traps when assigning an opcode at the last instruction. I thought this is because of UTCB pointer in env points to OS/2 server's main thread UTCB but trampoline() 
executes in different task, so UTCB is other. When I added UTCB pointer, it stopped trapping but I got DICEexception with MAJOR code 2 and MINOR 2 (IPC error).

On server side, a function os2server_wakeup_component() must be called which intended to reset a semaphore, on which a main thread waits for trampoline() 
termination:

void DICE_CV
os2server_wakeup_component (CORBA_Object _dice_corba_obj,
                                    CORBA_Server_Environment *_dice_corba_env)
{
  LOG("wakeup called");
  l4semaphore_up(&sem);
}

-- It never gets called because of IPC error in os2server_wakeup_call ().

2)

Also, I tried another way -- there is a thread on the server side waiting for a message (I decided to create another thread besides the main one to not interfere with DICE):

static void
os2server_wakeup_thread (void)
{
  l4_msgdope_t  result;
  l4_threadid_t src, id;
  l4_umword_t   w0, w1;

  /* because of l4thread_create(..., L4THREAD_CREATE_SYNC) */
  l4thread_started(0);

  names_register("os2server.wakeup");
    
  while (1)
  {
    LOG("waiting for message");
    l4_ipc_wait (&src,
                 L4_IPC_SHORT_MSG, &w0, &w1,
                 L4_IPC_NEVER, &result);
    LOG("message received");
    if (w0 == 0 && w1 == 0)
    {
      l4semaphore_up(&sem);
      LOG("semaphore reset");
    }
  }
}

trampoline() code is modified this way: 

static void
trampoline(unsigned long esp_data, unsigned long eip_data)
{
  l4_threadid_t     dest;
  l4_msgdope_t  result;
  l4_umword_t   w0, w1;
  int err;

  asm(
      "pop %%eax\n"
      "pop %%eax\n"
      "movl %[esp_data], %%eax \n"    /* Put old esp in eax */
      "movl %[eip_data], %%ecx \n"

    "movl %%ebp, %%edx \n" /* Copy ebp to edx. Base pointer for this functions local variables.*/
    "movl %%eax, %%esp \n" /* Copy eax to esp. Stack pointer*/
    /* We have changed the stack so it now points to out LX image.*/
    "call *%%ecx \n" /* Call our main() */
      :
      :[esp_data] "m" (esp_data), /* esp+ data_mmap+8+*/
       [ebp_data] "m" (ebp_data), /* esp+ data_mmap+8+*/
       [eip_data] "m" (eip_data));

  LOG("task end");
  if (!names_waitfor_name("os2server.wakeup", &dest, 10000))
    LOG("wakeup thread not found");
  else
    LOG("wakeup thread found, uid=%x.%x", dest.id.task, dest.id.lthread);
  w0 = 0; w1 = 0;
  err = l4_ipc_send(dest,
                    L4_IPC_SHORT_MSG,
                    w0,
                    w1,
                    L4_IPC_NEVER,
                    &result);
  if (err)
    LOG("error: %ld", err);
  else
    LOG("message transmitted");
  LOG("msgdope = %lu", result);
  LOG("mini33 exit");
}

This way I get an IPC error too. The return code from l4_ipc_send() is equal to 16 but msgdope is clear (zeroes). I found a description of errors returned in message 
dopes but what means a return code of 16, I don't know. (For message dope, a error code of 0x10 means that source or destination does not exist, but I am sure it 
exists) So, l4_ipc_send() exits and l4_ipc_wait() on other side never exits.

Maybe, someone can help understanding why it won't work? 
Thanks in advance.





More information about the l4-hackers mailing list