hello... I'm having a strange page fault in my program. My server has an object called Consensus. This object its responsable in the system to do consensus about something... an has an inner thread that receives some parameters from outside.
its header file its the following
##################################################
#ifndef CONSENSUS_H #define CONSENSUS_H
#include <fcntl.h> #include <stdlib.h> #include <netdb.h> #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <arpa/inet.h>
#include <l4/thread/thread.h> #include <l4/log/l4log.h> #include <l4/lock/lock.h>
#include "WOO_vector.h" #include "WOO_map.h" #include "WOO_string.h"
#include "FailureDetector.h" #include "Consensus_Packet.h" #include "WOO_TMO.h" #include "WOO_Groups.h" #include "WOO_common.h" #include "WOO_member.h"
#define CONSENSUS_PORTNUMBER 30987
struct estimate{ int eid; vector<string> myEstimate; };
class Consensus{ private: int sock; int trueFlag; socklen_t addrLength; struct sockaddr_in destinationAddr; struct sockaddr_in myAddr; struct sockaddr_in responseAddr; estimate est; phases current_phase; bool mustRunConsensus; //l4lock_t consensus_lock; l4thread_t consTh; int consensus_id;
//params int myEid; vector<WOO_member> *eids; l4lock_t *group_lock; map<string, CWOO_TMO> *addr_TMO_running; l4lock_t *lock_TMO_running; map<string, CWOO_TMO> *addr_TMO_decided; l4lock_t *lock_TMO_decided; vector<CWOO_Groups> *addr_WOO_groups_table; l4lock_t *lock_WOO_groups_table; public: l4lock_t consensus_lock;
private: void initialize_comm(); bool amICoordinator(vector<WOO_member> v); int coordinator(vector<WOO_member> v); void copyGroup(vector<WOO_member> *toFill); bool deadCoordinator(vector<WOO_member> v, FailureDetector *fd); void cleanFailures(vector<WOO_member> *toClean, FailureDetector *fd); void consensus_broadcast(vector<WOO_member> *toBroadcast, phases phase, packet_type packet, int consensus_round); vector<string> buildEstimate(); bool moreConsensus(); void decide(); public: Consensus(); void start(int eid, vector<WOO_member> *new_eids, l4lock_t *new_group_lock, map<string, CWOO_TMO> *p_addr_TMO_running, l4lock_t *p_lock_TMO_running, map<string, CWOO_TMO> *p_addr_TMO_decided, l4lock_t *p_lock_TMO_decided, vector<CWOO_Groups> *p_addr_WOO_groups_table, l4lock_t *p_lock_WOO_groups_table); void consensus_impl(); void startConsensus(); void stopConsensus(); bool get_mustRunConsensus(); };
void helper_consensus(void *args);
#endif // CONSENSUS_H
##################################################
when someone wants to start the program we invoke start() that has this code...
##################################################
//passar aqui as TMOdecided e as TMOrunning e respectivos locks void Consensus::start (int new_eid, vector < WOO_member > *new_eids, l4lock_t * new_group_lock, map < string, CWOO_TMO > *p_addr_TMO_running, l4lock_t * p_lock_TMO_running, map < string, CWOO_TMO > *p_addr_TMO_decided, l4lock_t * p_lock_TMO_decided, vector < CWOO_Groups > *p_addr_WOO_groups_table, l4lock_t * p_lock_WOO_groups_table) { myEid = new_eid; eids = new_eids; group_lock = new_group_lock; consensus_id = 0; trueFlag = 0x1;
consensus_lock = L4LOCK_UNLOCKED;
addr_TMO_running = p_addr_TMO_running; lock_TMO_running = p_lock_TMO_running; addr_TMO_decided = p_addr_TMO_decided; lock_TMO_decided = p_lock_TMO_decided; addr_WOO_groups_table = p_addr_WOO_groups_table; lock_WOO_groups_table = p_lock_WOO_groups_table;
/*consTh = l4thread_create (helper_consensus, this, L4THREAD_CREATE_ASYNC);*/
this->startConsensus(); this->consensus_impl();
LOG ("estou dps do thread create\n"); }
##################################################
for testing porpuses the l4thread_create method its commented because its page faulting also :(. Because it page-faulted when i've called the object's method that its inside helper_consensus...
##################################################
void helper_consensus (void *args) { Consensus *cobj = static_cast<Consensus *>(args); cobj->consensus_impl(); }
##################################################
i thought it was pagefaulting because of this method... but no... he page-faults event when called normally... but the focus of the problem it's not here yet.
when i call the startConsensus() method it page-faults in the l4lock_lock(...) instruction... the method looks like this
##################################################
void Consensus::startConsensus () { l4lock_lock(&consensus_lock); <---- it page faults here... LOG("inside\n"); mustRunConsensus = true; est.myEstimate = this->buildEstimate (); LOG("out\n"); l4lock_unlock(&consensus_lock); }
##################################################
here it is the dissassembled output...
##################################################
Consensus::startConsensus(): /home/tiago/l4/pkg/WOO/server/src/Consensus_udp.cc:178 01db6a50 push %ebp 01db6a51 mov %esp,%ebp 01db6a53 push %edi 01db6a54 push %esi 01db6a55 push %ebx 01db6a56 sub $0xc37c,%esp 01db6a5c mov 8(%ebp),%ebx /home/tiago/l4/include/l4/lock/lock.h:97 01db6a5f lea 0xc3c8(%ebx),%eax 01db6a65 mov %eax,0xffff3c84(%ebp) <<--- PAGE FAULT HERE!!!!!!!!!!!! /home/tiago/l4/include/l4/lock/lock.h:98 01db6a6b call 0x1dc0e80 <l4thread_myself> /home/tiago/l4/include/l4/lock/lock.h:101 01db6a70 mov 0xffff3c84(%ebp),%ecx /home/tiago/l4/include/l4/lock/lock.h:98 01db6a76 mov %eax,0xffff3c80(%ebp) /home/tiago/l4/include/l4/lock/lock.h:101 01db6a7c test %ecx,%ecx 01db6a7e je 0x1db6aa6
##################################################
as of my inexperience with Fiasco debug issues... what might be causing this?
thanks for your all your help...(and yes... i'm just about to finish my project)
Tiago Jorge