Java Code To study Attribute relevance analysis


Java Code To study Attribute relevance analysis















java.io.*;
class import java.util.*;
import InfoGain{
               
                public static double getInitialEntropy(){
                                String file = "data.txt";
String line = null;
int i = 0, yes = 0, no = 0, tot = 0;
                                try{
                                                FileReader fr = new FileReader(file);
                                                BufferedReader br = new BufferedReader(fr); 
                                                while((line=br.readLine())!=null){
                                                                i++;
                                                                if(i == 1)
                                                                continue;
                                                                String colData[] = line.split(" ");                                 
                                                                if(colData[5].equals("yes")){
                                                                                yes++;
                                                                                tot++;
                                                                }
                                                                else if(colData[5].equals("no")){
                                                                                no++;
                                                                                tot++;
                                                                }
                                                }                                             
                                                br.close();
                                }
                                catch(Exception e){
                                                System.out.println(e);
                                }
                System.out.println("\nPlay Tennis: \nyes: "+yes+" \nno: "+no+" \ntotal: "+tot+"\n");
                                double py = (double)yes/tot, pn = (double)no/tot;
                                double initial_entropy;
                                if(py == 1 || pn == 1){
                                                initial_entropy = 0;
                                }
                                else{
                                                initial_entropy =  py*Math.log(1.0/py)/Math.log(2) + pn*Math.log(1.0/pn)/Math.log(2);
                                }
                                return initial_entropy;
                }
                public static double getInfoGain(String attr_name, String values[], int index, double ie){
                                System.out.println("Now we split on the attribute: "+attr_name);
                                String line = null;
                                String file = "data.txt";
                                int i = 0, tot = 0, c1[] = new int[values.length], c2[] = new int[2*values.length];
                                double p1[] = new double[values.length], p2[] = new double[2*values.length], E[] = new double[values.length];
                                try{
                                                FileReader fr = new FileReader(file);
                                                BufferedReader br = new BufferedReader(fr);                 
                                                while((line=br.readLine())!=null){
                                                                i++;
                                                                if(i == 1)
                                                                continue;
                                                                tot++;
                                                                String colData[] = line.split(" ");
                                                                for(int j = 0; j < values.length; j++){
                                                                                if(colData[index].equals(values[j])){
                                                                                                c1[j]++;
                                                                                                if(colData[5].equals("yes")){
                                                                                                                c2[2*j]++;
                                                                                                }
                                                                                                else if(colData[5].equals("no")){
                                                                                                                c2[2*j+1]++;
                                                                                                }
                                                                                }
                                                                }
                                                }                                             
                                                br.close();
                                }
                                catch(Exception e){
                                                System.out.println(e);
                                }                             
                                for(int j = 0; j < values.length; j++){
                                                p1[j] = (double)c1[j]/tot;
                                                p2[2*j] = (double)c2[2*j]/c1[j];
                                                p2[2*j+1] = (double)c2[2*j+1]/c1[j];
                                }                             
                                for(int j = 0; j < values.length; j++){
                                                if(p2[2*j] == 1.0 || p2[2*j+1] == 1)
                                                                E[j] = 0;
                                                else
                                                                E[j] = p2[2*j]*Math.log(1.0/p2[2*j])/Math.log(2) + p2[2*j + 1]*Math.log(1.0/p2[2*j + 1])/Math.log(2);
                                }
                                for(int j = 0; j<values.length; j++){
                                                System.out.println("Entropy of the subset where "+attr_name+" is "+values[j]+": "+E[j]);
                                }             
                                double we = 0;                 
                                for(int j = 0; j<values.length; j++ ){
                                                we = we + p1[j]*E[j];
                                }                             
                                System.out.println("Weighted entropy is: "+we);            
                                double ig = ie - we;
                                return ig;
                }
                public static void main(String args[]){
                                String attrs[] = {"day", "outlook", "temp", "humidity", "wind", "play_tennis"};
                                String outlook[] = {"sunny", "rainy", "overcast"};
                                String temp[] = {"hot", "mild", "cool"};
                                String humidity[] = {"high", "normal"};
                                String wind[] = {"weak", "strong"};
                               
                                double initial_entropy = getInitialEntropy();
                                System.out.println("Initial entropy is: " + initial_entropy);           
                                System.out.println("");
                                double ig_outlook = getInfoGain("outlook", outlook, 1, initial_entropy);
                                System.out.println("information gain of outlook is: "+ig_outlook);
                                System.out.println("");
                                double ig_temp = getInfoGain("temp", temp, 2, initial_entropy);
                                System.out.println("information gain of temp is: "+ig_temp);
                               
                                System.out.println("");
                                double ig_humidity = getInfoGain("humidity", humidity, 3, initial_entropy);
                                System.out.println("information gain of humidity is: "+ig_humidity);
                               
                                System.out.println("");
                                double ig_wind = getInfoGain("wind", wind, 4, initial_entropy);
                                System.out.println("information gain of wind is: "+ig_wind);
                }
}

0 comments :