#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include<ctype.h>
#define MAXWORD 32
#define NHASH 1000000
#define MULT 37
int n[3]={0},now=0;
struct lnode{
char word[MAXWORD];
int count;
struct lnode* next;
};
typedef struct lnode lNode;
typedef struct lnode* lNodeptr;
lNodeptr hash[1000010],hashstop[1000010];
lNodeptr wl[3],r[3];
struct tnode{
char word[MAXWORD];
int count;
struct tnode* left;
struct tnode* right;
};
typedef struct tnode tNode;
typedef struct tnode* tNodeptr;
int getWord(FILE *fp,char *w)
{
int c;
while(!isalpha(c=fgetc(fp)))
if(c==EOF) return c;
else continue;
do{
*w++=tolower(c);
}while(isalpha(c=fgetc(fp)));
*w='\0';
return 1;
}
int isword(lNodeptr *list,char *w)
{
char *p;
unsigned int h=0;
for (p=w;*p!='\0';p++)
h= MULT*h+*p;
h=h%NHASH;
if (list[h]==NULL) return -1;
else {
lNodeptr p=list[h];
while (p!=NULL&&strcmp(p->word,w)<0)
{
p=p->next;
}
if (p==NULL) return -1;
else if (strcmp(p->word,w)>0) return -1;
else if (strcmp(p->word,w)==0) return 1;
}
return -1;
}
void insert(tNodeptr r,char x[1000],int t)
{
if (strcmp(r->word,x)==0){
r->count++;
}
else if (strcmp(r->word,x)>0) {
if (r->left==NULL) {
n[t]++;
tNodeptr p=(tNodeptr)malloc(sizeof(tNode));
strcpy(p->word,x);
p->count=1;
p->left=NULL;
p->right=NULL;
r->left=p;
}
else insert(r->left,x,t);
}
else {
if (r->right==NULL) {
n[t]++;
tNodeptr p=(tNodeptr)malloc(sizeof(tNode));
p->count=1;
strcpy(p->word,x);
p->left=NULL;
p->right=NULL;
r->right=p;
}
else insert(r->right,x,t);
}
}
void search(tNodeptr r,int n,int t)
{
if (r!=NULL){
search(r->left,n,t);
if (wl[t]==NULL) {
wl[t]=(lNodeptr)malloc(sizeof(lNode));
strcpy(wl[t]->word,r->word);
wl[t]->count=r->count;
wl[t]->next=NULL;
}
else {
int i=0;
lNodeptr p=wl[t],pr;
lNodeptr tmp=(lNodeptr)malloc(sizeof(lNode));
strcpy(tmp->word,r->word);
tmp->count=r->count;
while (p->next!=NULL&&p->count>=r->count&&i<n)
{
pr=p;
p=p->next;
i++;
}
if (p->next==NULL) {
if (p->count>=r->count)
{
p->next=tmp;
tmp->next=NULL;
}
else if (p==wl[t]){
tmp->next=p;
wl[t]=tmp;
}
else {
pr->next=tmp;
tmp->next=p;
}
}
else if (p==wl[t]){
tmp->next=p;
wl[t]=tmp;
}
else {
pr->next=tmp;
tmp->next=p;
}
//printf("%d\n",now);
}
search(r->right,n,t);
}
}
double min1(double x,double y)
{
if (x<y) return x;
else return y;
}
double max1(double x,double y)
{
if (x>y) return x;
else return y;
}
int main()
{
FILE *fp1,*fp2,*fstop,*dic,*res;
fp1=fopen("article1.txt","r");
fp2=fopen("article2.txt","r");
fstop=fopen("stopwords.txt","r");
dic=fopen("dictionary.txt","r");
res=fopen("results.txt","w");
char word[MAXWORD];
int nn;
scanf("%d",&nn);
while (getWord(dic,word)!=EOF)
{
char *p;
unsigned int h=0;
for (p=word;*p!='\0';p++)
h= MULT*h+*p;
h=h%NHASH;
if (hash[h]==NULL)
{
hash[h]=(lNodeptr)malloc(sizeof(lNode));
strcpy(hash[h]->word,word);
hash[h]->next=NULL;
}
else {
lNodeptr p=hash[h];
lNodeptr q=(lNodeptr)malloc(sizeof(lNode));;
strcpy(q->word,word);
if (strcmp(p->word,word)>0)
{
q->next=hash[h];
hash[h]=q;
}
else {
lNodeptr r=p;
while (p!=NULL&&strcmp(p->word,word)<0)
{
r=p;
p=p->next;
}
if (p==NULL) {
r->next=q;
q->next=NULL;
}
else {
q->next=r->next;
r->next=q;
}
}
}
}
while (getWord(fstop,word)!=EOF)
{
char *p;
unsigned int h=0;
for (p=word;*p!='\0';p++)
h= MULT*h+*p;
h=h%NHASH;
if (hashstop[h]==NULL)
{
hashstop[h]=(lNodeptr)malloc(sizeof(lNode));
strcpy(hashstop[h]->word,word);
hashstop[h]->next=NULL;
}
else {
lNodeptr p=hashstop[h];
lNodeptr q=(lNodeptr)malloc(sizeof(lNode));;
strcpy(q->word,word);
if (strcmp(p->word,word)>0)
{
q->next=hashstop[h];
hashstop[h]=q;
}
else {
lNodeptr r=p;
while (p!=NULL&&strcmp(p->word,word)<0)
{
r=p;
p=p->next;
}
if (p==NULL) {
r->next=q;
q->next=NULL;
}
else {
q->next=r->next;
r->next=q;
}
}
}
}
tNodeptr wordlist1,wordlist2;
if(fp1!=NULL)
{
while(getWord(fp1,word)!=EOF){
if(isword(hash,word)==1&&isword(hashstop,word)==-1)
{
if (n[1]==0)
{
wordlist1=(tNodeptr)malloc(sizeof(tNode));
strcpy(wordlist1->word,word);
wordlist1->count=1;
wordlist1->left=NULL;
wordlist1->right=NULL;
n[1]=1;
}
else insert(wordlist1,word,1);
}
}
}
if(fp2!=NULL)
{
while(getWord(fp2,word)!=EOF){
if(isword(hash,word)==1&&isword(hashstop,word)==-1)
{
if (n[2]==0)
{
wordlist2=(tNodeptr)malloc(sizeof(tNode));
strcpy(wordlist2->word,word);
wordlist2->count=1;
wordlist2->left=NULL;
wordlist2->right=NULL;
n[2]=1;
}
else insert(wordlist2,word,2);
}
}
}
int n1=min1(nn,n[1]);
int n2=min1(nn,n[2]);
search(wordlist1,n1,1);
now=0;
search(wordlist2,n2,2);
int freq1=0,freq2=0,freqm1=0,freqm2=0;
double pro1,pro2;
int i=0;
for (lNodeptr r=wl[1];i<n1;r=r->next,i++)
{
freq1=freq1+r->count;
int j=0;
for (lNodeptr rr=wl[2];j<n2;rr=rr->next,j++)
if (strcmp(r->word,rr->word)==0){
freqm1=freqm1+r->count;
freqm2=freqm2+rr->count;
}
}
i=0;
for (lNodeptr r=wl[2];i<n2;r=r->next,i++) freq2=freq2+r->count;
pro1=(double)freqm1/(double)freq1;
pro2=(double)freqm2/(double)freq2;
double sim=min1(pro1,pro2)/max1(pro1,pro2);
printf("%.5lf",sim);
fprintf(res,"%3.5lf\n\n",sim);
i=0;
for (lNodeptr r=wl[1];i<n1;r=r->next,i++) fprintf(res,"%s %d\n",r->word,r->count);
fprintf(res,"\n");
i=0;
for (lNodeptr r=wl[2];i<n2;r=r->next,i++) fprintf(res,"%s %d\n",r->word,r->count);
getchar();
getchar();
return 0;
}