Implementing the thread pool using pthreads

advertisements

I am trying to understand the below implementation of thread pool using the pthreads. When I comment out the the for loop in the main, the program stucks, upon putting the logs it seems that its getting stuck in the join function in threadpool destructor.

I am unable to understand why this is happening, is there any deadlock scenario happening ?

This may be naive but can someone help me understand why this is happening and how to correct this.

Thanks a lot !!!

#include <stdio.h>
#include <queue>
#include <unistd.h>
#include <pthread.h>
#include <malloc.h>
#include <stdlib.h>

     // Base task for Tasks
     // run() should be overloaded and expensive calculations done there
     // showTask() is for debugging and can be deleted if not used
class Task {
public:
Task() {}
virtual ~Task() {}
virtual void run()=0;
virtual void showTask()=0;
};

// Wrapper around std::queue with some mutex protection
class WorkQueue {
public:
WorkQueue() {
    // Initialize the mutex protecting the queue
    pthread_mutex_init(&qmtx,0);

    // wcond is a condition variable that's signaled
    // when new work arrives
    pthread_cond_init(&wcond, 0);
}

~WorkQueue() {
    // Cleanup pthreads
    pthread_mutex_destroy(&qmtx);
    pthread_cond_destroy(&wcond);
}
// Retrieves the next task from the queue
Task *nextTask() {
    // The return value
    Task *nt = 0;

    // Lock the queue mutex
    pthread_mutex_lock(&qmtx);
    // Check if there's work
    if (finished && tasks.size() == 0) {
        // If not return null (0)
        nt = 0;
    } else {
        // Not finished, but there are no tasks, so wait for
        // wcond to be signalled
        if (tasks.size()==0) {
            pthread_cond_wait(&wcond, &qmtx);
        }
        // get the next task
        nt = tasks.front();
        if(nt){
        tasks.pop();
    }

        // For debugging
        if (nt) nt->showTask();
    }
    // Unlock the mutex and return
    pthread_mutex_unlock(&qmtx);
    return nt;
}
// Add a task
void addTask(Task *nt) {
    // Only add the task if the queue isn't marked finished
    if (!finished) {
        // Lock the queue
        pthread_mutex_lock(&qmtx);
        // Add the task
        tasks.push(nt);
        // signal there's new work
        pthread_cond_signal(&wcond);
        // Unlock the mutex
        pthread_mutex_unlock(&qmtx);
    }
}
// Mark the queue finished
void finish() {
    pthread_mutex_lock(&qmtx);
    finished = true;
    // Signal the condition variable in case any threads are waiting
    pthread_cond_signal(&wcond);
    pthread_mutex_unlock(&qmtx);
}

// Check if there's work
bool hasWork() {
//printf("task queue size is %d\n",tasks.size());
    return (tasks.size()>0);
}

private:
std::queue<Task*> tasks;
bool finished;
pthread_mutex_t qmtx;
pthread_cond_t wcond;
};

// Function that retrieves a task from a queue, runs it and deletes it
void *getWork(void* param) {
Task *mw = 0;
WorkQueue *wq = (WorkQueue*)param;
while (mw = wq->nextTask()) {
    mw->run();
    delete mw;
}
pthread_exit(NULL);
}

class ThreadPool {
public:
// Allocate a thread pool and set them to work trying to get tasks
ThreadPool(int n) : _numThreads(n) {
int rc;
    printf("Creating a thread pool with %d threads\n", n);
    threads = new pthread_t[n];
    for (int i=0; i< n; ++i) {
        rc = pthread_create(&(threads[i]), 0, getWork, &workQueue);
    if (rc){
     printf("ERROR; return code from pthread_create() is %d\n", rc);
     exit(-1);
        }
    }
}

// Wait for the threads to finish, then delete them
~ThreadPool() {
    workQueue.finish();
    //waitForCompletion();
    for (int i=0; i<_numThreads; ++i) {
        pthread_join(threads[i], 0);
    }
    delete [] threads;
}

// Add a task
void addTask(Task *nt) {
    workQueue.addTask(nt);
}
// Tell the tasks to finish and return
void finish() {
    workQueue.finish();
}

// Checks if there is work to do
bool hasWork() {
    return workQueue.hasWork();
}

private:
pthread_t * threads;
int _numThreads;
WorkQueue workQueue;
};

// stdout is a shared resource, so protected it with a mutex
static pthread_mutex_t console_mutex = PTHREAD_MUTEX_INITIALIZER;

// Debugging function
void showTask(int n) {
pthread_mutex_lock(&console_mutex);
pthread_mutex_unlock(&console_mutex);
}

// Task to compute fibonacci numbers
// It's more efficient to use an iterative algorithm, but
// the recursive algorithm takes longer and is more interesting
// than sleeping for X seconds to show parrallelism
class FibTask : public Task {
public:
FibTask(int n) : Task(), _n(n) {}
~FibTask() {
    // Debug prints
    pthread_mutex_lock(&console_mutex);
    printf("tid(%d) - fibd(%d) being deleted\n", pthread_self(), _n);
    pthread_mutex_unlock(&console_mutex);
}
virtual void run() {
    // Note: it's important that this isn't contained in the console mutex lock
    long long val = innerFib(_n);
    // Show results
    pthread_mutex_lock(&console_mutex);
    printf("Fibd %d = %lld\n",_n, val);
    pthread_mutex_unlock(&console_mutex);

    // The following won't work in parrallel:
    //   pthread_mutex_lock(&console_mutex);
    //   printf("Fibd %d = %lld\n",_n, innerFib(_n));
    //   pthread_mutex_unlock(&console_mutex);
}
virtual void showTask() {
    // More debug printing
    pthread_mutex_lock(&console_mutex);
    printf("thread %d computing fibonacci %d\n", pthread_self(), _n);
    pthread_mutex_unlock(&console_mutex);
}
private:
// Slow computation of fibonacci sequence
// To make things interesting, and perhaps imporove load balancing, these
// inner computations could be added to the task queue
// Ideally set a lower limit on when that's done
// (i.e. don't create a task for fib(2)) because thread overhead makes it
// not worth it
long long innerFib(long long n) {
    if (n<=1) { return 1; }
    return innerFib(n-1) + innerFib(n-2);
}
long long _n;
};

int main(int argc, char *argv[])
{
// Create a thread pool
ThreadPool *tp = new ThreadPool(10);

// Create work for it
/*for (int i=0;i<100; ++i) {
    int rv = rand() % 40 + 1;
    showTask(rv);
    tp->addTask(new FibTask(rv));
}*/
delete tp;

printf("\n\n\n\n\nDone with all work!\n");
}


The design is more or less OK-ish but implementationwise it contains several things that are a bit overcomplicated and may introduce instabilities. I guess you prog deadlocks when you comment out the for loop because you should use pthread_cond_broadcast instead of pthread_cond_signal in your WorkQueue::finish() method.

Note: I usually implemented threadpool termination by placing NUM_THREADS number of NULL items into the workqueue and I set a finished flag only to be able to check something in my addTask() method because after finish() I usually don't let adding new tasks and I return with false from addTask() or sometimes I assert.

Another note: Its best to encapsulate threads into classes, that has several benefits and makes proting to multiple platforms easier.

There may be other bugs too as I haven't executed your program, just ran through your code.

EDIT: Here is a reworked version, I issued some modifications to your code but I don't guarantee that it works. Fingers crossed... :-)

#include <stdio.h>
#include <queue>
#include <unistd.h>
#include <pthread.h>
#include <malloc.h>
#include <stdlib.h>
#include <assert.h>

// Reusable thread class
class Thread
{
public:
    Thread()
    {
        state = EState_None;
        handle = 0;
    }

    virtual ~Thread()
    {
        assert(state != EState_Started || joined);
    }

    void start()
    {
        assert(state == EState_None);
        // in case of thread create error I usually FatalExit...
        if (pthread_create(&handle, NULL, threadProc, this))
            abort();
        state = EState_Started;
    }

    void join()
    {
        // A started thread must be joined exactly once!
        // This requirement could be eliminated with an alternative implementation but it isn't needed.
        assert(state == EState_Started);
        pthread_join(handle, NULL);
        state = EState_Joined;
    }

protected:
    virtual void run() = 0;

private:
    static void* threadProc(void* param)
    {
        Thread* thread = reinterpret_cast<Thread*>(param);
        thread->run();
        return NULL;
    }

private:
    enum EState
    {
        EState_None,
        EState_Started,
        EState_Joined
    };

    EState state;
    bool joined;
    pthread_t handle;
};

// Base task for Tasks
// run() should be overloaded and expensive calculations done there
// showTask() is for debugging and can be deleted if not used
class Task {
public:
    Task() {}
    virtual ~Task() {}
    virtual void run()=0;
    virtual void showTask()=0;
};

// Wrapper around std::queue with some mutex protection
class WorkQueue
{
public:
    WorkQueue() {
        pthread_mutex_init(&qmtx,0);

        // wcond is a condition variable that's signaled
        // when new work arrives
        pthread_cond_init(&wcond, 0);
    }

    ~WorkQueue() {
        // Cleanup pthreads
        pthread_mutex_destroy(&qmtx);
        pthread_cond_destroy(&wcond);
    }

    // Retrieves the next task from the queue
    Task *nextTask() {
        // The return value
        Task *nt = 0;

        // Lock the queue mutex
        pthread_mutex_lock(&qmtx);

        while (tasks.empty())
            pthread_cond_wait(&wcond, &qmtx);

        nt = tasks.front();
        tasks.pop();

        // Unlock the mutex and return
        pthread_mutex_unlock(&qmtx);
        return nt;
    }
    // Add a task
    void addTask(Task *nt) {
        // Lock the queue
        pthread_mutex_lock(&qmtx);
        // Add the task
        tasks.push(nt);
        // signal there's new work
        pthread_cond_signal(&wcond);
        // Unlock the mutex
        pthread_mutex_unlock(&qmtx);
    }

private:
    std::queue<Task*> tasks;
    pthread_mutex_t qmtx;
    pthread_cond_t wcond;
};

// Thanks to the reusable thread class implementing threads is
// simple and free of pthread api usage.
class PoolWorkerThread : public Thread
{
public:
    PoolWorkerThread(WorkQueue& _work_queue) : work_queue(_work_queue) {}
protected:
    virtual void run()
    {
        while (Task* task = work_queue.nextTask())
            task->run();
    }
private:
    WorkQueue& work_queue;
};

class ThreadPool {
public:
    // Allocate a thread pool and set them to work trying to get tasks
    ThreadPool(int n) {
        printf("Creating a thread pool with %d threads\n", n);
        for (int i=0; i<n; ++i)
        {
            threads.push_back(new PoolWorkerThread(workQueue));
            threads.back()->start();
        }
    }

    // Wait for the threads to finish, then delete them
    ~ThreadPool() {
        finish();
    }

    // Add a task
    void addTask(Task *nt) {
        workQueue.addTask(nt);
    }

    // Asking the threads to finish, waiting for the task
    // queue to be consumed and then returning.
    void finish() {
        for (size_t i=0,e=threads.size(); i<e; ++i)
            workQueue.addTask(NULL);
        for (size_t i=0,e=threads.size(); i<e; ++i)
        {
            threads[i]->join();
            delete threads[i];
        }
        threads.clear();
    }

private:
    std::vector<PoolWorkerThread*> threads;
    WorkQueue workQueue;
};

// stdout is a shared resource, so protected it with a mutex
static pthread_mutex_t console_mutex = PTHREAD_MUTEX_INITIALIZER;

// Debugging function
void showTask(int n) {
    pthread_mutex_lock(&console_mutex);
    pthread_mutex_unlock(&console_mutex);
}

// Task to compute fibonacci numbers
// It's more efficient to use an iterative algorithm, but
// the recursive algorithm takes longer and is more interesting
// than sleeping for X seconds to show parrallelism
class FibTask : public Task {
public:
    FibTask(int n) : Task(), _n(n) {}
    ~FibTask() {
        // Debug prints
        pthread_mutex_lock(&console_mutex);
        printf("tid(%d) - fibd(%d) being deleted\n", (int)pthread_self(), (int)_n);
        pthread_mutex_unlock(&console_mutex);
    }
    virtual void run() {
        // Note: it's important that this isn't contained in the console mutex lock
        long long val = innerFib(_n);
        // Show results
        pthread_mutex_lock(&console_mutex);
        printf("Fibd %d = %lld\n",(int)_n, val);
        pthread_mutex_unlock(&console_mutex);

        // The following won't work in parrallel:
        //   pthread_mutex_lock(&console_mutex);
        //   printf("Fibd %d = %lld\n",_n, innerFib(_n));
        //   pthread_mutex_unlock(&console_mutex);

        // this thread pool implementation doesn't delete
        // the tasks so we perform the cleanup here
        delete this;
    }
    virtual void showTask() {
        // More debug printing
        pthread_mutex_lock(&console_mutex);
        printf("thread %d computing fibonacci %d\n", (int)pthread_self(), (int)_n);
        pthread_mutex_unlock(&console_mutex);
    }
private:
    // Slow computation of fibonacci sequence
    // To make things interesting, and perhaps imporove load balancing, these
    // inner computations could be added to the task queue
    // Ideally set a lower limit on when that's done
    // (i.e. don't create a task for fib(2)) because thread overhead makes it
    // not worth it
    long long innerFib(long long n) {
        if (n<=1) { return 1; }
        return innerFib(n-1) + innerFib(n-2);
    }
    long long _n;
};

int main(int argc, char *argv[])
{
    // Create a thread pool
    ThreadPool *tp = new ThreadPool(10);

    // Create work for it
    for (int i=0;i<100; ++i) {
        int rv = rand() % 40 + 1;
        showTask(rv);
        tp->addTask(new FibTask(rv));
    }
    delete tp;

    printf("\n\n\n\n\nDone with all work!\n");
}