#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <time.h>
pthread_mutex_t lock;
//typedef struct for a word
typedef struct {
char word[101];
int frequency;
}Word;
//struct for thread
struct ft{
char* fileName;
int start;
int stop;
};
//compare frequency of 2 words
int compareWords(const void *f1,const void *f2){
Word *a = (Word *)f1;
Word *b = (Word *)f2;
return (b->frequency - a->frequency);
}
//count frequency of a word
void countFrequency(void *arg){
pthread_mutex_lock(&lock);
int i,c;
struct ft* fi = (struct ft*)arg;
FILE *file = fopen(fi->fileName,"r");
fseek(file,fi->start,SEEK_SET);
for(i = 0; i < fi->stop - fi->start; i++){
c = getc(file);
//printf("%d\n",c);
//frequency count
}
fclose(file);
pthread_mutex_unlock(&lock);
}
int main (int argc,char **argv){
//variabies for <time.h>
struct timespec startTime;
struct timespec endTime;
clock_gettime(CLOCK_REALTIME,&startTime);
/*------------main------------------*/
//variables
int nthreads; //number of threads
int chunkSize; //each threas processing size
//if user input is not correct,inform
if(argc < 3){
printf("./a.out text_file #ofthreads \n");
exit(-1);
}
nthreads = atoi(argv[2]);
chunkSize = sizeof(argv[1])/nthreads;
//declare threads and default attributes
pthread_t threads[nthreads];
pthread_attr_t attr;
pthread_attr_init(&attr);
//run threads in parallel
int i;
for (i = 0; i < nthreads; i++){
struct ft data[nthreads];
data[i].start = i*chunkSize;
data[i].stop = data[i].start+chunkSize;
data[i].fileName = argv[1];
// Create a new thread for every segment,and count word frequency for each
pthread_create(&threads[i],&attr,(void*) countFrequency,(void*) &data[i]);
}
//wait for results (all threads)
for (i = 0; i < nthreads; i++){
pthread_join(threads[i],NULL);
}
//func of <time.h>
clock_gettime(CLOCK_REALTIME,&endTime);
time_t sec = endTime.tv_sec - startTime.tv_sec;
long n_sec = endTime.tv_nsec - startTime.tv_nsec;
if (endTime.tv_nsec < startTime.tv_nsec)
{
--sec;
n_sec = n_sec + 1000000000L;
}
printf("Total Time was %ld.%09ld seconds\n",sec,n_sec);
}
我正在使用该程序来使用多个线程读取和处理大型文本文件,并对文本中长度超过6个字符的前10个最常用单词执行单词计数频率。但是我不知道为什么会不断出现细分错误错误,有人知道吗?