Java Code To study Attribute relevance analysis
Java Code To study Attribute relevance analysis
java.io.*;
class import java.util.*;
import InfoGain{
public
static double getInitialEntropy(){
String
file = "data.txt";
String line = null;
int i = 0, yes = 0, no = 0, tot = 0;
try{
FileReader
fr = new FileReader(file);
BufferedReader
br = new BufferedReader(fr);
while((line=br.readLine())!=null){
i++;
if(i
== 1)
continue;
String
colData[] = line.split(" ");
if(colData[5].equals("yes")){
yes++;
tot++;
}
else
if(colData[5].equals("no")){
no++;
tot++;
}
}
br.close();
}
catch(Exception
e){
System.out.println(e);
}
System.out.println("\nPlay
Tennis: \nyes: "+yes+" \nno: "+no+" \ntotal:
"+tot+"\n");
double
py = (double)yes/tot, pn = (double)no/tot;
double
initial_entropy;
if(py
== 1 || pn == 1){
initial_entropy
= 0;
}
else{
initial_entropy
= py*Math.log(1.0/py)/Math.log(2) +
pn*Math.log(1.0/pn)/Math.log(2);
}
return
initial_entropy;
}
public
static double getInfoGain(String attr_name, String values[], int index, double
ie){
System.out.println("Now
we split on the attribute: "+attr_name);
String
line = null;
String
file = "data.txt";
int
i = 0, tot = 0, c1[] = new int[values.length], c2[] = new int[2*values.length];
double
p1[] = new double[values.length], p2[] = new double[2*values.length], E[] = new
double[values.length];
try{
FileReader
fr = new FileReader(file);
BufferedReader
br = new BufferedReader(fr);
while((line=br.readLine())!=null){
i++;
if(i
== 1)
continue;
tot++;
String
colData[] = line.split(" ");
for(int
j = 0; j < values.length; j++){
if(colData[index].equals(values[j])){
c1[j]++;
if(colData[5].equals("yes")){
c2[2*j]++;
}
else
if(colData[5].equals("no")){
c2[2*j+1]++;
}
}
}
}
br.close();
}
catch(Exception
e){
System.out.println(e);
}
for(int
j = 0; j < values.length; j++){
p1[j]
= (double)c1[j]/tot;
p2[2*j]
= (double)c2[2*j]/c1[j];
p2[2*j+1]
= (double)c2[2*j+1]/c1[j];
}
for(int
j = 0; j < values.length; j++){
if(p2[2*j]
== 1.0 || p2[2*j+1] == 1)
E[j]
= 0;
else
E[j]
= p2[2*j]*Math.log(1.0/p2[2*j])/Math.log(2) + p2[2*j + 1]*Math.log(1.0/p2[2*j +
1])/Math.log(2);
}
for(int
j = 0; j<values.length; j++){
System.out.println("Entropy
of the subset where "+attr_name+" is "+values[j]+":
"+E[j]);
}
double
we = 0;
for(int
j = 0; j<values.length; j++ ){
we
= we + p1[j]*E[j];
}
System.out.println("Weighted
entropy is: "+we);
double
ig = ie - we;
return
ig;
}
public
static void main(String args[]){
String
attrs[] = {"day", "outlook", "temp",
"humidity", "wind", "play_tennis"};
String
outlook[] = {"sunny", "rainy", "overcast"};
String
temp[] = {"hot", "mild", "cool"};
String
humidity[] = {"high", "normal"};
String
wind[] = {"weak", "strong"};
double
initial_entropy = getInitialEntropy();
System.out.println("Initial
entropy is: " + initial_entropy);
System.out.println("");
double
ig_outlook = getInfoGain("outlook", outlook, 1, initial_entropy);
System.out.println("information
gain of outlook is: "+ig_outlook);
System.out.println("");
double
ig_temp = getInfoGain("temp", temp, 2, initial_entropy);
System.out.println("information
gain of temp is: "+ig_temp);
System.out.println("");
double
ig_humidity = getInfoGain("humidity", humidity, 3, initial_entropy);
System.out.println("information
gain of humidity is: "+ig_humidity);
System.out.println("");
double
ig_wind = getInfoGain("wind", wind, 4, initial_entropy);
System.out.println("information
gain of wind is: "+ig_wind);
}
}
0 comments :