free():有效的指针异常中-使用流读取大文件时

修改: 所以我注意到我的article.txt文件中包含的字符不是ASCII。所以我将MAX_CHAR_COUNT更改为256,现在我遇到了分段错误错误

我正在编写一个C ++(新的解决方案)解决方案,以找到一个巨大的文件中单词word.txt中单词的出现,该文件无法容纳带有后缀树的内存。

我一次从流中读取4行(4只是一个例子,我将在分析之后对行数进行基准测试。)

当我运行带有小文件的程序时,它可以正常工作。但是当我运行带有实际文件的程序时,我遇到了错误:

.....    

building suffix tree 
    text is :       Template      Template talk      Help      Help talk      Category
    free end  -432,71 
    free node 
    free end  -1,71 
    *** Error in `./needleInHaystack': free(): invalid pointer: 0x0000000000606140 ***
    ======= Backtrace: =========
    /lib64/libc.so.6(+0x81679)[0x7ff83d338679]
    ./needleInHaystack[0x4017c9]
    ./needleInHaystack[0x401775]
    ./needleInHaystack[0x401775]
    ./needleInHaystack[0x401775]
    ./needleInHaystack[0x401775]
    ./needleInHaystack[0x401775]
    ./needleInHaystack[0x401775]
    ./needleInHaystack[0x401d88]
    /lib64/libc.so.6(__libc_start_main+0xf5)[0x7ff83d2d9505]
    ./needleInHaystack[0x4010d9]
    ======= Memory map: ========
    00400000-00406000 r-xp 00000000 00:51 16651445                           /usr/local/rms/needleInHaystack
    00605000-00606000 r--p 00005000 00:51 16651445                           /usr/local/rms/needleInHaystack
    00606000-00607000 rw-p 00006000 00:51 16651445                           /usr/local/rms/needleInHaystack
    01f6d000-01faf000 rw-p 00000000 00:00 0                                  [heap]
    7ff838000000-7ff838021000 rw-p 00000000 00:00 0 
    7ff838021000-7ff83c000000 ---p 00000000 00:00 0 
    7ff83d2b7000-7ff83d47a000 r-xp 00000000 08:01 3547654                    /usr/lib64/libc-2.17.so
    7ff83d47a000-7ff83d67a000 ---p 001c3000 08:01 3547654                    /usr/lib64/libc-2.17.so
    7ff83d67a000-7ff83d67e000 r--p 001c3000 08:01 3547654                    /usr/lib64/libc-2.17.so
    7ff83d67e000-7ff83d680000 rw-p 001c7000 08:01 3547654                    /usr/lib64/libc-2.17.so
    7ff83d680000-7ff83d685000 rw-p 00000000 00:00 0 
    7ff83d685000-7ff83d69a000 r-xp 00000000 08:01 3547686                    /usr/lib64/libgcc_s-4.8.5-20150702.so.1
    7ff83d69a000-7ff83d899000 ---p 00015000 08:01 3547686                    /usr/lib64/libgcc_s-4.8.5-20150702.so.1
    7ff83d899000-7ff83d89a000 r--p 00014000 08:01 3547686                    /usr/lib64/libgcc_s-4.8.5-20150702.so.1
    7ff83d89a000-7ff83d89b000 rw-p 00015000 08:01 3547686                    /usr/lib64/libgcc_s-4.8.5-20150702.so.1
    7ff83d89b000-7ff83d99c000 r-xp 00000000 08:01 3547702                    /usr/lib64/libm-2.17.so
    7ff83d99c000-7ff83db9b000 ---p 00101000 08:01 3547702                    /usr/lib64/libm-2.17.so
    7ff83db9b000-7ff83db9c000 r--p 00100000 08:01 3547702                    /usr/lib64/libm-2.17.so
    7ff83db9c000-7ff83db9d000 rw-p 00101000 08:01 3547702                    /usr/lib64/libm-2.17.so
    7ff83db9d000-7ff83dc86000 r-xp 00000000 08:01 3547770                    /usr/lib64/libstdc++.so.6.0.19
    7ff83dc86000-7ff83de85000 ---p 000e9000 08:01 3547770                    /usr/lib64/libstdc++.so.6.0.19
    7ff83de85000-7ff83de8d000 r--p 000e8000 08:01 3547770                    /usr/lib64/libstdc++.so.6.0.19
    7ff83de8d000-7ff83de8f000 rw-p 000f0000 08:01 3547770                    /usr/lib64/libstdc++.so.6.0.19
    7ff83de8f000-7ff83dea4000 rw-p 00000000 00:00 0 
    7ff83dea4000-7ff83dec6000 r-xp 00000000 08:01 3547624                    /usr/lib64/ld-2.17.so
    7ff83e0b7000-7ff83e0bc000 rw-p 00000000 00:00 0 
    7ff83e0c2000-7ff83e0c5000 rw-p 00000000 00:00 0 
    7ff83e0c5000-7ff83e0c6000 r--p 00021000 08:01 3547624                    /usr/lib64/ld-2.17.so
    7ff83e0c6000-7ff83e0c7000 rw-p 00022000 08:01 3547624                    /usr/lib64/ld-2.17.so
    7ff83e0c7000-7ff83e0c8000 rw-p 00000000 00:00 0 
    7ffdfed2f000-7ffdfed50000 rw-p 00000000 00:00 0                          [stack]
    7ffdfedc9000-7ffdfedcb000 r--p 00000000 00:00 0                          [vvar]
    7ffdfedcb000-7ffdfedcd000 r-xp 00000000 00:00 0                          [vdso]
    ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
    Aborted

以下是我的代码,现在我仅通过硬编码(字符串:“ make”)来尝试出现一个单词:

#define MAX_CHAR 256 
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include<cstring>
#include <unordered_map>
#include <fstream>
using namespace std;


struct SuffixTreeNode { 
    struct SuffixTreeNode *children[MAX_CHAR]; 

    struct SuffixTreeNode *suffixLink; 
    int start; 
    int *end; 

    int suffixIndex; 
}; 

typedef struct SuffixTreeNode Node; 
string text;  


Node *root = NULL; //Pointer to root node 

Node *lastNewNode = NULL; 
Node *activeNode = NULL; 

int activeEdge = -1; 
int activeLength = 0; 

int remainingSuffixCount = 0; 
int leafEnd = -1; 
int *rootEnd = NULL; 
int *splitEnd = NULL; 
int size = -1; //Length of input string 

Node *newNode(int start,int *end) 
{ 
    Node *node =(Node*) malloc(sizeof(Node)); 
    int i; 
    for (i = 0; i < MAX_CHAR; i++) 
        node->children[i] = NULL; 


    node->suffixLink = root; 
    node->start = start; 
    node->end = end; 

    node->suffixIndex = -1; 
    return node; 
} 

int edgeLength(Node *n) { 
    if(n == root) 
        return 0; 
    return *(n->end) - (n->start) + 1; 
} 

int walkDown(Node *currNode) 
{ 

    if (activeLength >= edgeLength(currNode)) 
    { 
        activeEdge += edgeLength(currNode); 
        activeLength -= edgeLength(currNode); 
        activeNode = currNode; 
        return 1; 
    } 
    return 0; 
} 

void extendSuffixTree(int pos)
{

leafEnd = pos;

remainingSuffixCount++;

lastNewNode = NULL;

while(remainingSuffixCount > 0) {

if (activeLength == 0)
activeEdge = pos; 


if (activeNode->children[text[activeEdge]] == NULL)
{
activeNode->children[text[activeEdge]] =
newNode(pos,&leafEnd);


if (lastNewNode != NULL)
{
lastNewNode->suffixLink = activeNode;
lastNewNode = NULL;
}
}

else
{

Node *next = activeNode->children[text[activeEdge]];
if (walkDown(next)){
continue;
}

if (text[next->start + activeLength] == text[pos])
{

if(lastNewNode != NULL && activeNode != root)
{
lastNewNode->suffixLink = activeNode;
lastNewNode = NULL;
}

activeLength++;

break;
}

splitEnd = (int*) malloc(sizeof(int));
*splitEnd = next->start + activeLength - 1;

Node *split = newNode(next->start,splitEnd);
activeNode->children[text[activeEdge]] = split;

split->children[text[pos]] = newNode(pos,&leafEnd);
next->start += activeLength;
split->children[text[next->start]] = next;


if (lastNewNode != NULL)
{

lastNewNode->suffixLink = split;
}

lastNewNode = split;
}


remainingSuffixCount--;
if (activeNode == root && activeLength > 0) 
{
activeLength--;
activeEdge = pos - remainingSuffixCount + 1;
}
else if (activeNode != root) 
{
activeNode = activeNode->suffixLink;
}
}
}
void print(int i,int j) 
{ 
    int k; 
    for (k=i; k<=j; k++) 
        printf("%c",text[k]); 
} 

void setSuffixIndexByDFS(Node *n,int labelHeight) 
{ 
    if (n == NULL) return; 
    int leaf = 1; 
    int i; 
    for (i = 0; i < MAX_CHAR; i++) 
    { 
        if (n->children[i] != NULL) 
        { 

            leaf = 0; 
            setSuffixIndexByDFS(n->children[i],labelHeight + 
                                edgeLength(n->children[i])); 
        } 
    } 
    if (leaf == 1) 
    { 
        n->suffixIndex = size - labelHeight; 

    } 
} 

void freeSuffixTreeByPostOrder(Node *n) 
{ 
    if (n == NULL) 
        return; 
    int i; 
    for (i = 0; i < MAX_CHAR; i++) 
    { 
        if (n->children[i] != NULL) 
        { 
            freeSuffixTreeByPostOrder(n->children[i]); 
        } 
    } 

    printf("free end  %d,%d \n",n->suffixIndex,*(n->end));
    if (n->suffixIndex == -1) 
        free(n->end); 

    printf("free node \n");
    free(n); 
} 


void buildSuffixTree() 
{ 
    printf("building suffix tree \n");
    size = text.length(); 
    int i; 
    rootEnd = (int*) malloc(sizeof(int)); 
    *rootEnd = - 1; 


    root = newNode(-1,rootEnd); 

    activeNode = root; 
    for (i=0; i<size; i++) 
        extendSuffixTree(i); 
    int labelHeight = 0; 
    setSuffixIndexByDFS(root,labelHeight); 
} 

int traverseEdge(const char *str,int idx,int start,int end) 
{ 
    int k = 0; 
    for(k=start; k<=end && str[idx] != '\0'; k++,idx++) 
    { 
        if(text[k] != str[idx]) 
            return -1; // mo match 
    } 
    if(str[idx] == '\0') 
        return 1; // match 
    return 0; // more characters yet to match 
} 

int doTraversalToCountLeaf(Node *n) 
{ 
    if(n == NULL) 
        return 0; 
    if(n->suffixIndex > -1) 
    { 
        return 1; 
    } 
    int count = 0; 
    int i = 0; 
    for (i = 0; i < MAX_CHAR; i++) 
    { 
        if(n->children[i] != NULL) 
        { 
            count += doTraversalToCountLeaf(n->children[i]); 
        } 
    } 
    return count; 
} 

int countLeaf(Node *n) 
{ 
    if(n == NULL) 
        return 0; 
    return doTraversalToCountLeaf(n); 
} 

// returns count in the current buffer 
int doTraversal(Node *n,const char* str,int idx) 
{ 
    if(n == NULL) 
    { 
        return -1; // no match 
    } 
    int res = -1; 
    if(n->start != -1) 
    { 
        res = traverseEdge(str,idx,n->start,*(n->end)); 
        if(res == -1) //no match 
            return -1; 
        if(res == 1) //match 
        { 
            if(n->suffixIndex > -1) 
                return 1;
            else
                return countLeaf(n); 

        } 
    } 
    idx = idx + edgeLength(n); 

    if(n->children[str[idx]] != NULL) 
        return doTraversal(n->children[str[idx]],str,idx); 
    else
        return -1; // no match 
} 

int checkForSubString(const char* str) 
{ 
    int res = doTraversal(root,0); 
    if(res != 1) 
        return 0;
    return res;
} 



int main(int argc,char *argv[]) 
{ 


    unordered_map<string,int> map;

    // read file 
    ifstream words;
    string allLines[1000];

        words.open("words.txt");
    int i =0;
        for (string line; getline(words,line);)
    {
            allLines[i] = line;
        map[line] = 0;

    }

    allLines[0] = "make";
    map[allLines[0]] = 0;

    ifstream article;
    string artlines;
        article.open ("article.txt");
    i=0;
    for (string line; getline(article,line);)
    {
        if(line.length() == 0 ){
            continue;
        }

        if(i == 4){
            i=0;
            text = artlines;
            buildSuffixTree();
            printf("text is : %s\n",text.c_str() );
            map[allLines[0]]+= checkForSubString(allLines[0].c_str());
            artlines = line;
            freeSuffixTreeByPostOrder(root); 
        }else{
            printf("counter I is %d\n",i);
            artlines += line;
            ++i;
        }
    }

    printf("out of loop \n");

    if(i != 4 && i != 0){
        text = artlines;
        //buildSuffixTree();
        //map[allLines[0]]+= checkForSubString(allLines[0].c_str());
        freeSuffixTreeByPostOrder(root); 
    }

    printf("make count : %d",map[allLines[0]]);

    return 0; 
} 
wyq_1234 回答:free():有效的指针异常中-使用流读取大文件时

在某些情况下,您呼叫newNode(pos,&leafEnd);,其中&leafEnd不是malloc返回的地址。

在其他情况下,例如

splitEnd = (int*) malloc(sizeof(int));
*splitEnd = some_value;
Node *split = newNode(next->start,splitEnd);

您必须始终动态分配传递给newNode的内存,因为您以后使用free(n->end);。如果n->end是局部变量或全局变量的地址,而不是前一个malloc返回的地址,则不允许这样做。

示例:

代替

split->children[text[pos]] = newNode(pos,&leafEnd);

您可以使用类似的

int *copyOfLeafEnd = malloc(sizeof(int));
/* check for NULL and do error handling */
*copyOfLeafEnd = leafEnd;
split->children[text[pos]] = newNode(pos,copyOfLeafEnd);
本文链接:https://www.f2er.com/3138618.html

大家都在问