Commit 6f53e576888f120fd05244c44db145be1cfbd051
1 parent
941ec3b8d7
Exists in
main
first2
Showing 3 changed files with 407 additions and 0 deletions Side-by-side Diff
GenerationData.py
View file @
6f53e57
| 1 | +import numpy as np | |
| 2 | +import pandas as pd | |
| 3 | +import statistics | |
| 4 | +import random | |
| 5 | +#from distfit import distfit | |
| 6 | +import csv | |
| 7 | +#import math | |
| 8 | +from scipy.special import expit | |
| 9 | + | |
| 10 | +class Generator: | |
| 11 | + def __init__(self, base): | |
| 12 | + self.base=base | |
| 13 | + | |
| 14 | + def logit_Normal(self, x): | |
| 15 | + return 1/(1+np.exp(-x)) | |
| 16 | + | |
| 17 | + #Generation of dataset with grade and time for 15 questions and 5 complexity levels | |
| 18 | + def generationDatasetComplexities(self): | |
| 19 | + tbase=pd.DataFrame() | |
| 20 | + #Number of complexity levels | |
| 21 | + #sigma_grade=1.2#Initial | |
| 22 | + #mu_grade=0#initial | |
| 23 | + sigma_grade=0.5 | |
| 24 | + mu_grade=1.5 | |
| 25 | + sigma_time=1.7 | |
| 26 | + mu_time=30 | |
| 27 | + for rows in range(5):#5 | |
| 28 | + tlist=[] | |
| 29 | + #Number of questions | |
| 30 | + for ncomp in range(15):#15 #3 for initial | |
| 31 | + | |
| 32 | + if ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions | |
| 33 | + cgrade2=self.logit_Normal(np.random.normal(-1, 0.2, 700)) | |
| 34 | + #if rows == 0 and ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions | |
| 35 | + # omu_grade=mu_grade | |
| 36 | + # mu_grade=-2 | |
| 37 | + #if rows == 3 and ncomp < 3 :#Simulate mistakes in complexity level 3 first 3 questions | |
| 38 | + # omu_grade=mu_grade | |
| 39 | + # mu_grade=-1 | |
| 40 | + else: | |
| 41 | + cgrade2=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 700)) | |
| 42 | + | |
| 43 | + #Number of questions (grade, time) | |
| 44 | + cgrade=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 300)) | |
| 45 | + cgrade=np.append(cgrade, cgrade2) | |
| 46 | + cgrade=cgrade*10 | |
| 47 | + ctime=np.random.normal(mu_time, sigma_time, 1000) | |
| 48 | + #vcomp=np.ones(len(vgrade))*(ncomp+1) | |
| 49 | + result = [cgrade.tolist(), ctime.tolist()] | |
| 50 | + | |
| 51 | + tbase[len(tbase.columns)]=cgrade | |
| 52 | + tbase[len(tbase.columns)]=ctime | |
| 53 | + #omu_grade+=0.5 | |
| 54 | + mu_grade-=0.2 | |
| 55 | + sigma_grade+=0.08 | |
| 56 | + tbase.to_csv("data.csv", sep=" ") | |
| 57 | + | |
| 58 | + #Generation of dataset with mean of grade and mean of time for 15 questions and 10 sub-competences | |
| 59 | + def generationDatasetMeanSubCompetences(self): | |
| 60 | + tbase=[] | |
| 61 | + #Number of rows to generate | |
| 62 | + for rows in range(1000): | |
| 63 | + sigma_grade=1.7 | |
| 64 | + mu_grade=5 | |
| 65 | + sigma_time=1.7 | |
| 66 | + mu_time=30 | |
| 67 | + tlist=[] | |
| 68 | + #Number of sub-competences | |
| 69 | + for ncomp in range(10): | |
| 70 | + vgrade=[] | |
| 71 | + vtime=[] | |
| 72 | + #Number of questions (grade, time) | |
| 73 | + for i in range(15): | |
| 74 | + cgrade=np.random.normal(mu_grade, sigma_grade, 1)[0] | |
| 75 | + vgrade.append(cgrade) | |
| 76 | + ctime=np.random.normal(mu_time, sigma_time, 1)[0] | |
| 77 | + vtime.append(ctime) | |
| 78 | + nmu_grade=np.mean(vgrade) | |
| 79 | + nmu_time=np.mean(vtime) | |
| 80 | + vcomp=np.ones(len(vgrade))*(ncomp+1) | |
| 81 | + result = [np.mean(vgrade), np.mean(vtime)] | |
| 82 | + tlist=tlist + result | |
| 83 | + mu_grade=np.random.normal(nmu_grade, 0.5, 1)[0] | |
| 84 | + mu_time=np.random.normal(nmu_time, 0.5, 1)[0] | |
| 85 | + sigma_grade=(abs(mu_grade-nmu_grade))*0.4 | |
| 86 | + sigma_time=(abs(mu_time-nmu_time))*0.4 | |
| 87 | + #print(tlist) | |
| 88 | + tbase.append(tlist) | |
| 89 | + #print(tbase) | |
| 90 | + #Write the csv file | |
| 91 | + with open("dataMean.csv", "w", newline="") as f: | |
| 92 | + writer=csv.writer(f) | |
| 93 | + writer.writerows(tbase) | |
| 94 | + | |
| 95 | + def generation(self): | |
| 96 | + vlambda = 0.5 | |
| 97 | + lbase=self.base | |
| 98 | + #print(lbase) | |
| 99 | + for i in range(100): | |
| 100 | + element1=lbase.sample() | |
| 101 | + element1=vlambda*np.array(element1) | |
| 102 | + element2=lbase.sample() | |
| 103 | + element2=(1.0-vlambda)*np.array(element2) | |
| 104 | + #print(element1) | |
| 105 | + #print(element2) | |
| 106 | + #print(element1[0]+element2[0]) | |
| 107 | + elementN=pd.DataFrame(element1+element2) | |
| 108 | + #print(elementN) | |
| 109 | + | |
| 110 | + #Concatenate self.base and elementN | |
| 111 | + | |
| 112 | + | |
| 113 | + return self.base | |
| 114 | + #print(x) | |
| 115 | + | |
| 116 | + | |
| 117 | + #Generation with white noise | |
| 118 | + def generation3(self): | |
| 119 | + mu, sigma = 0, 0.1 | |
| 120 | + x=[sum(self.base.iloc[i,:]) for i in range(21)] | |
| 121 | + #print(x) | |
| 122 | + for i in range(1000): | |
| 123 | + element=self.base.sample() | |
| 124 | + noise=np.random.normal(mu, sigma, [1, element.shape[1]]) | |
| 125 | + nbase=[self.base, element+noise] | |
| 126 | + self.base=pd.concat(nbase) | |
| 127 | + x=[sum(self.base.iloc[i,:]) for i in range(21)] | |
| 128 | + return self.base | |
| 129 | + #print(x) | |
| 130 | + | |
| 131 | + def detection(self, data): | |
| 132 | + dfit=distfit() | |
| 133 | + dfit.fit_transform(data) | |
| 134 | + print(dfit.summary) | |
| 135 | + | |
| 136 | + | |
| 137 | + #Generation with detection of distribution for each column | |
| 138 | + def generation2(self): | |
| 139 | + dfit=distfit() | |
| 140 | + lbase=np.array(self.base) | |
| 141 | + newData=[] | |
| 142 | + for vindex in range(lbase.shape[1]): | |
| 143 | + #print("Column: ",lbase[:,vindex]) | |
| 144 | + dfit.fit_transform(lbase[:,vindex]) | |
| 145 | + sigma=dfit.model['scale'] | |
| 146 | + nrand=dfit.generate(1) | |
| 147 | + newData.append(nrand) | |
| 148 | + lbase=lbase[(lbase[:,vindex] < (nrand + (sigma/1.0))) & (lbase[:,vindex] > (nrand - (sigma/1.0)))] | |
| 149 | + print(newData) | |
| 150 | + self.base.loc[len(self.base.index)]=newData | |
| 151 | + print(self.base.corr()) | |
| 152 | + | |
| 153 | + #Generation with normal distribution | |
| 154 | + def generation0(self): | |
| 155 | + lbase=self.base | |
| 156 | + print(lbase.corr()) | |
| 157 | + #print(lbase[lbase[20].values==0].corr()) | |
| 158 | + #print(lbase[lbase[20].values==0].iloc[1:100,:].corr()) | |
| 159 | + for n in range(100): | |
| 160 | + vindex=0 | |
| 161 | + newData=[] | |
| 162 | + lbase=self.base | |
| 163 | + for vindex in range(21): | |
| 164 | + #mu=statistics.median(self.base[vindex]) | |
| 165 | + mu=statistics.mean(lbase[vindex]) | |
| 166 | + sigma=statistics.stdev(lbase[vindex]) | |
| 167 | + nrand=np.random.normal(mu, sigma, 1)[0] | |
| 168 | + #print(mu, " ", sigma, nrand) | |
| 169 | + #print(self.base.head()) | |
| 170 | + lbase=lbase[(lbase[vindex].values < (nrand + (sigma/100.0))) & (lbase[vindex].values > (nrand - (sigma/100.0)))] | |
| 171 | + newData.append(nrand) | |
| 172 | + #print(lbase) | |
| 173 | + #print(newData) | |
| 174 | + self.base.loc[len(self.base.index)]=newData | |
| 175 | + print(self.base.corr()) | |
| 176 | + | |
| 177 | + | |
| 178 | +g=Generator([]) | |
| 179 | +#g.detection(data) | |
| 180 | +g.generationDatasetComplexities() | |
| 181 | +#g.generationDatasetMeanSubCompetences() | 
Prediction.py
View file @
6f53e57
| 1 | +import statistics | |
| 2 | +import numpy as np | |
| 3 | + | |
| 4 | +class Prediction: | |
| 5 | + def __init__(self, ilevels, levels, betap, deltaP, penalisation, gradeT): | |
| 6 | + #The level has the note and the time for each question | |
| 7 | + #Test data | |
| 8 | + self.ilevels=ilevels | |
| 9 | + self.levels=levels | |
| 10 | + #Initialization of beta distributions for each level | |
| 11 | + self.betap=betap | |
| 12 | + self.INLevel=[] | |
| 13 | + self.NLevel=[] | |
| 14 | + | |
| 15 | + self.maxGrade=10 | |
| 16 | + self.factor=10 | |
| 17 | + | |
| 18 | + self.deltaPlus=deltaP | |
| 19 | + self.gradeThreshold=gradeT | |
| 20 | + self.timePenalisation=penalisation | |
| 21 | + | |
| 22 | + def Structure(self, base, total, questions): | |
| 23 | + v1=[] | |
| 24 | + v2=[] | |
| 25 | + elem=1 | |
| 26 | + #for i in range(1,150,2): | |
| 27 | + for i in range(1,total,2): | |
| 28 | + #print(i," ",dr.tolist()[0+i:2+i]) | |
| 29 | + v1.append(base.tolist()[0+i:2+i]) | |
| 30 | + #if (elem % 15) == 0: | |
| 31 | + if (elem % questions) == 0: | |
| 32 | + v2.append(v1) | |
| 33 | + v1=[] | |
| 34 | + elem+=1 | |
| 35 | + #In this case, v2 is the last of all rows | |
| 36 | + return v2 | |
| 37 | + | |
| 38 | + def CalculateGradePenalization(self): | |
| 39 | + self.ilevels=self.Structure(self.ilevels,30,3) | |
| 40 | + self.levels=self.Structure(self.levels,150,15) | |
| 41 | + #Calculate the note with penalization for time used | |
| 42 | + self.INLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.ilevels] | |
| 43 | + self.NLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.levels] | |
| 44 | +#Generalized Thompson sampling | |
| 45 | + def Calculate1(self, NLevel): | |
| 46 | + | |
| 47 | + r=1 | |
| 48 | + maxGrade=10 | |
| 49 | + #Here start the Multi armed bandits for choose the best level. Thompson Sampling... | |
| 50 | + for i in range(min(len(self.levels[0]),len(self.levels[1]),len(self.levels[2]))): | |
| 51 | + if i==0: | |
| 52 | + IRLevel=0 | |
| 53 | + else: | |
| 54 | + #Take a sample for all calculated posterior distributions | |
| 55 | + Rlevel=[np.random.beta(self.betap[0][0],self.betap[0][1]), | |
| 56 | + np.random.beta(self.betap[1][0],self.betap[1][1]), | |
| 57 | + np.random.beta(self.betap[2][0],self.betap[2][1])] | |
| 58 | + IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] | |
| 59 | + print(Rlevel, self.betap) | |
| 60 | + print("Mean 1: ", self.betap[0][0]/(self.betap[0][0]+self.betap[0][1])) | |
| 61 | + print("Mean 2: ", self.betap[1][0]/(self.betap[1][0]+self.betap[1][1])) | |
| 62 | + print("Mean 3: ", self.betap[2][0]/(self.betap[2][0]+self.betap[2][1])) | |
| 63 | + print(IRLevel,NLevel[IRLevel][i]) | |
| 64 | + #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success. The reward is normalized value between 0 and 1 | |
| 65 | + #NLevel[IRLevel][i]=NLevel[IRLevel][i]/self.maxGrade | |
| 66 | + grade=NLevel[IRLevel][i] | |
| 67 | + deltaMu=(maxGrade-(2*grade))/10 | |
| 68 | + | |
| 69 | + print("DeltaMu: ",deltaMu) | |
| 70 | + | |
| 71 | + #Change the value of beta parameter index 0 | |
| 72 | + sumAlphaBeta=self.betap[IRLevel][0]+self.betap[IRLevel][1] | |
| 73 | + self.betap[IRLevel][0]=round((self.betap[IRLevel][0]*r)+(deltaMu*(sumAlphaBeta)),0) | |
| 74 | + #Control the limits of value for first parameter | |
| 75 | + self.betap[IRLevel][0]=max(1, self.betap[IRLevel][0]) | |
| 76 | + | |
| 77 | + #Change the value of beta parameter index 1 | |
| 78 | + self.betap[IRLevel][1]=round((sumAlphaBeta*r)-self.betap[IRLevel][0],0) | |
| 79 | + #Control the limits of value for second parameter | |
| 80 | + self.betap[IRLevel][1]=max(1, self.betap[IRLevel][1]) | |
| 81 | + | |
| 82 | + print(self.betap[IRLevel][0], self.betap[IRLevel][1]) | |
| 83 | + print(Rlevel, self.betap) | |
| 84 | + | |
| 85 | +#Bernoulli Thompson Sampling | |
| 86 | + def UpdateBeta(self, grade, level): | |
| 87 | + if grade >= self.gradeThreshold: | |
| 88 | + #Change the value of beta parameter index 1 | |
| 89 | + self.betap[level][1]+=self.deltaPlus | |
| 90 | + #Correlated Thompson Sampling | |
| 91 | + if level>0: | |
| 92 | + self.betap[level-1][1]+=self.deltaPlus/2 | |
| 93 | + if level<len(self.betap)-1: | |
| 94 | + self.betap[level+1][0]+=self.deltaPlus/2 | |
| 95 | + | |
| 96 | + else: | |
| 97 | + #Change the value of alpha parameter index 0 | |
| 98 | + self.betap[level][0]+=self.deltaPlus | |
| 99 | + #Correlated Thompson Sampling | |
| 100 | + if level>0: | |
| 101 | + self.betap[level-1][0]+=self.deltaPlus/2 | |
| 102 | + if level<len(self.betap)-1: | |
| 103 | + self.betap[level+1][1]+=self.deltaPlus/2 | |
| 104 | + | |
| 105 | + def InitializeBeta(self): | |
| 106 | + c=0 | |
| 107 | + for itemc in self.INLevel: | |
| 108 | + for i in range(len(itemc)): | |
| 109 | + #print(itemc[i]) | |
| 110 | + self.UpdateBeta(itemc[i],c) | |
| 111 | + c+=1 | |
| 112 | + | |
| 113 | + def Calculate(self): | |
| 114 | + self.InitializeBeta() | |
| 115 | + NLevel=self.NLevel | |
| 116 | + file1=open('results_slevel.csv','a+') | |
| 117 | + file2=open('results_sgrade.csv','a+') | |
| 118 | + #Here start the Multi armed bandits for choose the best level. Thompson Sampling... | |
| 119 | + #print("NLevel Vector: ",NLevel) | |
| 120 | + for i in range(len(NLevel[0])): | |
| 121 | + #if i==0: | |
| 122 | + # IRLevel=0 | |
| 123 | + #else: | |
| 124 | + #Take a sample for all calculated posterior distributions | |
| 125 | + Rlevel=[np.random.beta(p[0],p[1]) for p in self.betap] | |
| 126 | + #Take the max probability value | |
| 127 | + #IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] | |
| 128 | + IRLevel=max( (v, i) for i, v in enumerate(Rlevel) )[1] | |
| 129 | + #print(Rlevel, self.betap, [p[0]/(p[0]+p[1]) for p in self.betap]) | |
| 130 | + print("Stochastic ",i," ",IRLevel) | |
| 131 | + #print(NLevel[IRLevel][i]) | |
| 132 | + file1.write(str(IRLevel)+" ") | |
| 133 | + file2.write(str(IRLevel)+" "+str(NLevel[IRLevel][i])+" ") | |
| 134 | + #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success | |
| 135 | + self.UpdateBeta(NLevel[IRLevel][i],IRLevel) | |
| 136 | + file1.write("\n") | |
| 137 | + file2.write("\n") | |
| 138 | + file1.close() | |
| 139 | + file2.close() | |
| 140 | + | |
| 141 | + def CalculateSW(self): | |
| 142 | + file1=open('results_dlevel.csv','a+') | |
| 143 | + file2=open('results_dgrade.csv','a+') | |
| 144 | + #file2=open('results_dtm.csv','a+') | |
| 145 | + IRLevel=0 | |
| 146 | + NIRLevel=0 | |
| 147 | + step=0 | |
| 148 | + mc0=0 | |
| 149 | + mc1=0 | |
| 150 | + mc2=0 | |
| 151 | + mc3=0 | |
| 152 | + mc4=0 | |
| 153 | + #print("INLevel: ",INLevel) | |
| 154 | + #for i in range(len(NLevel[0])): | |
| 155 | + | |
| 156 | + Level=self.INLevel | |
| 157 | + clevel=[0,0,0,0,0] | |
| 158 | + vindex=[0,0,0,0,0] | |
| 159 | + | |
| 160 | + for i in range(15): | |
| 161 | + if i>0: | |
| 162 | + #print("Value ",Level) | |
| 163 | + Level[NIRLevel].append(self.NLevel[NIRLevel][vindex[NIRLevel]]) | |
| 164 | + Level[NIRLevel].pop(0) | |
| 165 | + #print("Value ",Level) | |
| 166 | + vindex[NIRLevel]+=1 | |
| 167 | + #vindex+=1 | |
| 168 | + mc0=0 | |
| 169 | + mc1=0 | |
| 170 | + mc2=0 | |
| 171 | + mc3=0 | |
| 172 | + mc4=0 | |
| 173 | + mc=0 | |
| 174 | + #print("Level Vector ",Level) | |
| 175 | + for IRL in range(4): | |
| 176 | + if IRL == 0: | |
| 177 | + mc0=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
| 178 | + mc0=mc0/3 | |
| 179 | + mc0=10*mc0/5 | |
| 180 | + if IRL == 1: | |
| 181 | + mc1=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
| 182 | + mc1=mc1/3 | |
| 183 | + if IRL == 2: | |
| 184 | + mc2=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
| 185 | + mc2=mc2/3 | |
| 186 | + if IRL == 3: | |
| 187 | + mc3=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
| 188 | + mc3=mc3/3 | |
| 189 | + if IRL == 4: | |
| 190 | + mc4=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
| 191 | + mc4=mc4/3 | |
| 192 | + | |
| 193 | + #print(mc0," ",mc1," ",mc2,"",mc3) | |
| 194 | + mc1=max(mc0+((10/5)*mc1), (20/5)*mc1) | |
| 195 | + mc2=max(mc1+((10/5)*mc2), (30/5)*mc2) | |
| 196 | + mc3=max(mc2+((10/5)*mc3), (40/5)*mc3) | |
| 197 | + mc=max(mc3+((10/5)*mc4), (50/5)*mc4) | |
| 198 | + #print(mc0," ",mc1," ",mc2," ",mc3," ",mc) | |
| 199 | + #print(mc) | |
| 200 | + #file2.write(str(mc)+" ") | |
| 201 | + if mc >= 0 and mc <= 15: | |
| 202 | + NIRLevel=0 | |
| 203 | + elif mc >= 16 and mc <= 25: | |
| 204 | + NIRLevel=1 | |
| 205 | + elif mc >= 26 and mc <= 35: | |
| 206 | + NIRLevel=2 | |
| 207 | + elif mc >= 36 and mc <= 42: | |
| 208 | + NIRLevel=2 | |
| 209 | + elif mc >= 43 and mc <= 50: | |
| 210 | + NIRLevel=3 | |
| 211 | + elif mc >= 51 and mc <= 75: | |
| 212 | + NIRLevel=3 | |
| 213 | + elif mc >= 76 and mc <= 100: | |
| 214 | + NIRLevel=4 | |
| 215 | + print("Deterministic ",i," ",NIRLevel) | |
| 216 | + if NIRLevel != IRLevel: | |
| 217 | + IRLevel=NIRLevel | |
| 218 | + file1.write(str(IRLevel)+" ") | |
| 219 | + file2.write(str(IRLevel)+" "+str(self.NLevel[IRLevel][vindex[IRLevel]])+" ") | |
| 220 | + file1.write("\n") | |
| 221 | + file2.write("\n") | |
| 222 | + file1.close() | |
| 223 | + file2.close() | 
README.md
View file @
6f53e57