1

I wrote a kernel module that needs to push messages to user space. The idea is that the kernel module buffers the message and signals the user space program, then the user space program goes and gets the message by requesting it over a netlink socket. My problem is that after buffering 90 messages, the machine locks and I need to restart. I can't figure out what I'm doing wrong, and I'm using linked lists elsewhere in the kernel module successfully.

//
// A message from the kernel space to user space.
//
typedef struct CoreLinkMessage
{

    unsigned int id;
    char* data;
    unsigned int length;

    struct list_head list; // kernel's list structure

} CoreLinkMessage;

This function initializes the list and semaphore:

// Constructor
void
ctsRtNetlinkSystem_init( void )
{
    sema_init(&cmd_sem_, 1);    
    INIT_LIST_HEAD(&cmd_list_.list);
}

This is the function that must be causing the problem. It simply pushes an item on to the tail of the linked list. If I comment out adding items to the linked list and only call a signal, the program runs indefinitely, so I don't think the problem is the signaling.

//
// Allows the kernel module to buffer messages until requested by
// the user space
//
void
ctsRtNetlinkSystem_addMessage(char* data, unsigned int length)
{

    CoreLinkMessage* msg;
    int sem_ret;
    BOOL doSignal = FALSE;

    //
    // LOCK the semaphore
    //
    sem_ret = down_interruptible(&cmd_sem_);

    if ( !sem_ret )
    {

    msg = (CoreLinkMessage*)kmalloc(sizeof(CoreLinkMessage), GFP_KERNEL );
    if ( msg == NULL )
    {
        PRINTF(CTSMSG_INFO
            "ctsRtNetlinkSystem_addMessage failed to allocate memory! \n" );
        goto unlock;
    }
            memset( msg, 0, sizeof(CoreLinkMessage) );
            msg->data = (char*)kmalloc( length, GFP_KERNEL );
    if ( msg->data == NULL )
    {                        
        kfree( msg );
        PRINTF(CTSMSG_INFO
            "ctsRtNetlinkSystem_addMessage failed to allocate data memory!\n" );
        goto unlock;
    }

    memcpy( msg->data, data, length );
    msg->length = length;

    lastMessageId_ += 1;
    msg->id = lastMessageId_;

    list_add_tail(&(msg->list), &cmd_list_.list);   
    doSignal = TRUE;

unlock:

    up( &cmd_sem_ );

    if ( doSignal )
        sendMessageSignal( msg->id );


    }
    else
    {
    PRINTF(CTSMSG_INFO
        "CtsRtNetlinkSystem_addMessage -- failed to get semaphore\n" );
    }


}




//
// Signal the user space that a message is waiting. Pass along the message
// id
//
static BOOL
sendMessageSignal( unsigned int id )
{
    int ret;
    struct siginfo info;
    struct task_struct *t;

    memset(&info, 0, sizeof(struct siginfo));
    info.si_signo = SIGNAL_MESSAGE;
    info.si_code = SI_QUEUE;    // this is bit of a trickery: 
                            // SI_QUEUE is normally used by sigqueue 
                            // from user space,
                            // and kernel space should use SI_KERNEL. 
                            // But if SI_KERNEL is used the real_time data 
                            // is not delivered to the user space signal 
                            // handler function. 

    // tell the user space application the index of the message
    // real time signals may have 32 bits of data.
    info.si_int = id;       

    rcu_read_lock();

    //find the task_struct associated with this pid
    t = // find_task_by_pid_type( PIDTYPE_PID, registeredPid_ );  
    // find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
    pid_task(find_vpid(registeredPid_), PIDTYPE_PID); 
    if(t == NULL)
    {
    PRINTF(CTSMSG_INFO
        "CtsRtNetlinkSystem::sendMessageSignal -- no such pid\n");
    rcu_read_unlock();
    registeredPid_ = 0;
    return FALSE;
    }

    rcu_read_unlock();

    //send the signal
    ret = send_sig_info(SIGNAL_MESSAGE, &info, t);    
    if (ret < 0) 
    {
    PRINTF(CTSMSG_INFO
        "CtsRtNetlinkSystem::sendMessageSignal -- \n"
        "\t error sending signal %d \n", ret );
    return FALSE;
    }

    return TRUE;    
}

I'm currently testing the program on a VM, so I created a timer that ticks every 7 seconds and adds a message to the buffer.

//
// Create a timer to call the process thread
// with nanosecond resolution.
//

static void
createTimer(void)
{
    hrtimer_init(
    &processTimer_,     // instance of process timer
    CLOCK_MONOTONIC,    // Pick a specific clock. CLOCK_MONOTONIC is
                // guaranteed to move forward, no matter what.
                // It's akin to jiffies tick count
                // CLOCK_REALTIME matches the current real-world time
    HRTIMER_MODE_REL ); // Timer mode (HRTIMER_ABS or HRTIMER_REL)

    processTimer_.function = &cyclic_task;

    processTimerNs_ =  ktime_set(1, FREQUENCY_NSEC);

    //
    // Start the timer. It will callback the .function
    // when the timer expires.
    //
    hrtimer_start(
    &processTimer_,     // instance of process timer
    processTimerNs_,    // time, nanosecconds                       
    HRTIMER_MODE_REL );     // HRTIMER_REL indicates that time should be
                // interpreted relative
                // HRTIMER_ABS indicates time is an 
                // absolute value


}

static enum hrtimer_restart
cyclic_task(struct hrtimer* timer)
{

    char msg[255];
    sprintf(msg, "%s", "Testing the buffer.");


    ctsRtNetlink_send( &msg[0], strlen(msg) );

    hrtimer_forward_now(
            &processTimer_,
            processTimerNs_ );


    return HRTIMER_RESTART; 

}

Thanks in advance for any help.

4

3 回答 3

0

尽管您的代码流程从问题中不是很清楚,但我觉得列表添加可能不是问题。您必须在其他地方处理列表,您必须从列表中删除消息等。我怀疑在您的列表添加和删除等之间存在某种死锁情况。此外,请检查您将消息复制到的位置用户空间并从列表中删除并释放它。我想,您并没有试图直接从用户空间引用您的消息作为上面建议的评论员之一。

还,

   memset( msg, 0, sizeof(CoreLinkMessage) );

if ( msg == NULL )
{

这两行必须颠倒它的顺序,否则,如果 alloc 失败了,你的系统就注定了。

于 2013-10-04T14:25:20.117 回答
0

kmalloc 使用 GFP_ATOMIC 而不是 GFP_KERNEL 解决了这个问题。到目前为止运行了三天,没有崩溃。我怀疑一个人不能在由 hrtimer 触发的线程中睡觉。

msg = (CoreLinkMessage*)kmalloc(sizeof(CoreLinkMessage), GFP_ATOMIC );

感谢大家的见解!

于 2013-10-07T14:15:57.020 回答
0

分配的内存不足

确保为字符串长度 + 1 分配足够的内存来存储它的终止符。
在发送时,length + 1可能需要一个。

// ctsRtNetlink_send( &msg[0], strlen(msg) );
ctsRtNetlink_send( &msg[0], strlen(msg) + 1);  // +1 for \0
于 2013-10-04T13:38:46.403 回答