Commit 6f53e576888f120fd05244c44db145be1cfbd051
1 parent
941ec3b8d7
Exists in
main
first2
Showing 3 changed files with 407 additions and 0 deletions Inline Diff
GenerationData.py
View file @
6f53e57
| File was created | 1 | import numpy as np | ||
| 2 | import pandas as pd | |||
| 3 | import statistics | |||
| 4 | import random | |||
| 5 | #from distfit import distfit | |||
| 6 | import csv | |||
| 7 | #import math | |||
| 8 | from scipy.special import expit | |||
| 9 | ||||
| 10 | class Generator: | |||
| 11 | def __init__(self, base): | |||
| 12 | self.base=base | |||
| 13 | ||||
| 14 | def logit_Normal(self, x): | |||
| 15 | return 1/(1+np.exp(-x)) | |||
| 16 | ||||
| 17 | #Generation of dataset with grade and time for 15 questions and 5 complexity levels | |||
| 18 | def generationDatasetComplexities(self): | |||
| 19 | tbase=pd.DataFrame() | |||
| 20 | #Number of complexity levels | |||
| 21 | #sigma_grade=1.2#Initial | |||
| 22 | #mu_grade=0#initial | |||
| 23 | sigma_grade=0.5 | |||
| 24 | mu_grade=1.5 | |||
| 25 | sigma_time=1.7 | |||
| 26 | mu_time=30 | |||
| 27 | for rows in range(5):#5 | |||
| 28 | tlist=[] | |||
| 29 | #Number of questions | |||
| 30 | for ncomp in range(15):#15 #3 for initial | |||
| 31 | ||||
| 32 | if ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions | |||
| 33 | cgrade2=self.logit_Normal(np.random.normal(-1, 0.2, 700)) | |||
| 34 | #if rows == 0 and ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions | |||
| 35 | # omu_grade=mu_grade | |||
| 36 | # mu_grade=-2 | |||
| 37 | #if rows == 3 and ncomp < 3 :#Simulate mistakes in complexity level 3 first 3 questions | |||
| 38 | # omu_grade=mu_grade | |||
| 39 | # mu_grade=-1 | |||
| 40 | else: | |||
| 41 | cgrade2=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 700)) | |||
| 42 | ||||
| 43 | #Number of questions (grade, time) | |||
| 44 | cgrade=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 300)) | |||
| 45 | cgrade=np.append(cgrade, cgrade2) | |||
| 46 | cgrade=cgrade*10 | |||
| 47 | ctime=np.random.normal(mu_time, sigma_time, 1000) | |||
| 48 | #vcomp=np.ones(len(vgrade))*(ncomp+1) | |||
| 49 | result = [cgrade.tolist(), ctime.tolist()] | |||
| 50 | ||||
| 51 | tbase[len(tbase.columns)]=cgrade | |||
| 52 | tbase[len(tbase.columns)]=ctime | |||
| 53 | #omu_grade+=0.5 | |||
| 54 | mu_grade-=0.2 | |||
| 55 | sigma_grade+=0.08 | |||
| 56 | tbase.to_csv("data.csv", sep=" ") | |||
| 57 | ||||
| 58 | #Generation of dataset with mean of grade and mean of time for 15 questions and 10 sub-competences | |||
| 59 | def generationDatasetMeanSubCompetences(self): | |||
| 60 | tbase=[] | |||
| 61 | #Number of rows to generate | |||
| 62 | for rows in range(1000): | |||
| 63 | sigma_grade=1.7 | |||
| 64 | mu_grade=5 | |||
| 65 | sigma_time=1.7 | |||
| 66 | mu_time=30 | |||
| 67 | tlist=[] | |||
| 68 | #Number of sub-competences | |||
| 69 | for ncomp in range(10): | |||
| 70 | vgrade=[] | |||
| 71 | vtime=[] | |||
| 72 | #Number of questions (grade, time) | |||
| 73 | for i in range(15): | |||
| 74 | cgrade=np.random.normal(mu_grade, sigma_grade, 1)[0] | |||
| 75 | vgrade.append(cgrade) | |||
| 76 | ctime=np.random.normal(mu_time, sigma_time, 1)[0] | |||
| 77 | vtime.append(ctime) | |||
| 78 | nmu_grade=np.mean(vgrade) | |||
| 79 | nmu_time=np.mean(vtime) | |||
| 80 | vcomp=np.ones(len(vgrade))*(ncomp+1) | |||
| 81 | result = [np.mean(vgrade), np.mean(vtime)] | |||
| 82 | tlist=tlist + result | |||
| 83 | mu_grade=np.random.normal(nmu_grade, 0.5, 1)[0] | |||
| 84 | mu_time=np.random.normal(nmu_time, 0.5, 1)[0] | |||
| 85 | sigma_grade=(abs(mu_grade-nmu_grade))*0.4 | |||
| 86 | sigma_time=(abs(mu_time-nmu_time))*0.4 | |||
| 87 | #print(tlist) | |||
| 88 | tbase.append(tlist) | |||
| 89 | #print(tbase) | |||
| 90 | #Write the csv file | |||
| 91 | with open("dataMean.csv", "w", newline="") as f: | |||
| 92 | writer=csv.writer(f) | |||
| 93 | writer.writerows(tbase) | |||
| 94 | ||||
| 95 | def generation(self): | |||
| 96 | vlambda = 0.5 | |||
| 97 | lbase=self.base | |||
| 98 | #print(lbase) | |||
| 99 | for i in range(100): | |||
| 100 | element1=lbase.sample() | |||
| 101 | element1=vlambda*np.array(element1) | |||
| 102 | element2=lbase.sample() | |||
| 103 | element2=(1.0-vlambda)*np.array(element2) | |||
| 104 | #print(element1) | |||
| 105 | #print(element2) | |||
| 106 | #print(element1[0]+element2[0]) | |||
| 107 | elementN=pd.DataFrame(element1+element2) | |||
| 108 | #print(elementN) | |||
| 109 | ||||
| 110 | #Concatenate self.base and elementN | |||
| 111 | ||||
| 112 | ||||
| 113 | return self.base | |||
| 114 | #print(x) | |||
| 115 | ||||
| 116 | ||||
| 117 | #Generation with white noise | |||
| 118 | def generation3(self): | |||
| 119 | mu, sigma = 0, 0.1 | |||
| 120 | x=[sum(self.base.iloc[i,:]) for i in range(21)] | |||
| 121 | #print(x) | |||
| 122 | for i in range(1000): | |||
| 123 | element=self.base.sample() | |||
| 124 | noise=np.random.normal(mu, sigma, [1, element.shape[1]]) | |||
| 125 | nbase=[self.base, element+noise] | |||
| 126 | self.base=pd.concat(nbase) | |||
| 127 | x=[sum(self.base.iloc[i,:]) for i in range(21)] | |||
| 128 | return self.base | |||
| 129 | #print(x) | |||
| 130 | ||||
| 131 | def detection(self, data): | |||
| 132 | dfit=distfit() | |||
| 133 | dfit.fit_transform(data) | |||
| 134 | print(dfit.summary) | |||
| 135 | ||||
| 136 | ||||
| 137 | #Generation with detection of distribution for each column | |||
| 138 | def generation2(self): | |||
| 139 | dfit=distfit() | |||
| 140 | lbase=np.array(self.base) | |||
| 141 | newData=[] | |||
| 142 | for vindex in range(lbase.shape[1]): | |||
| 143 | #print("Column: ",lbase[:,vindex]) | |||
| 144 | dfit.fit_transform(lbase[:,vindex]) | |||
| 145 | sigma=dfit.model['scale'] | |||
| 146 | nrand=dfit.generate(1) | |||
| 147 | newData.append(nrand) | |||
| 148 | lbase=lbase[(lbase[:,vindex] < (nrand + (sigma/1.0))) & (lbase[:,vindex] > (nrand - (sigma/1.0)))] | |||
| 149 | print(newData) | |||
| 150 | self.base.loc[len(self.base.index)]=newData | |||
| 151 | print(self.base.corr()) | |||
| 152 | ||||
| 153 | #Generation with normal distribution | |||
| 154 | def generation0(self): | |||
| 155 | lbase=self.base | |||
| 156 | print(lbase.corr()) | |||
| 157 | #print(lbase[lbase[20].values==0].corr()) | |||
| 158 | #print(lbase[lbase[20].values==0].iloc[1:100,:].corr()) | |||
| 159 | for n in range(100): | |||
| 160 | vindex=0 | |||
| 161 | newData=[] | |||
| 162 | lbase=self.base |
Prediction.py
View file @
6f53e57
| File was created | 1 | import statistics | ||
| 2 | import numpy as np | |||
| 3 | ||||
| 4 | class Prediction: | |||
| 5 | def __init__(self, ilevels, levels, betap, deltaP, penalisation, gradeT): | |||
| 6 | #The level has the note and the time for each question | |||
| 7 | #Test data | |||
| 8 | self.ilevels=ilevels | |||
| 9 | self.levels=levels | |||
| 10 | #Initialization of beta distributions for each level | |||
| 11 | self.betap=betap | |||
| 12 | self.INLevel=[] | |||
| 13 | self.NLevel=[] | |||
| 14 | ||||
| 15 | self.maxGrade=10 | |||
| 16 | self.factor=10 | |||
| 17 | ||||
| 18 | self.deltaPlus=deltaP | |||
| 19 | self.gradeThreshold=gradeT | |||
| 20 | self.timePenalisation=penalisation | |||
| 21 | ||||
| 22 | def Structure(self, base, total, questions): | |||
| 23 | v1=[] | |||
| 24 | v2=[] | |||
| 25 | elem=1 | |||
| 26 | #for i in range(1,150,2): | |||
| 27 | for i in range(1,total,2): | |||
| 28 | #print(i," ",dr.tolist()[0+i:2+i]) | |||
| 29 | v1.append(base.tolist()[0+i:2+i]) | |||
| 30 | #if (elem % 15) == 0: | |||
| 31 | if (elem % questions) == 0: | |||
| 32 | v2.append(v1) | |||
| 33 | v1=[] | |||
| 34 | elem+=1 | |||
| 35 | #In this case, v2 is the last of all rows | |||
| 36 | return v2 | |||
| 37 | ||||
| 38 | def CalculateGradePenalization(self): | |||
| 39 | self.ilevels=self.Structure(self.ilevels,30,3) | |||
| 40 | self.levels=self.Structure(self.levels,150,15) | |||
| 41 | #Calculate the note with penalization for time used | |||
| 42 | self.INLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.ilevels] | |||
| 43 | self.NLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.levels] | |||
| 44 | #Generalized Thompson sampling | |||
| 45 | def Calculate1(self, NLevel): | |||
| 46 | ||||
| 47 | r=1 | |||
| 48 | maxGrade=10 | |||
| 49 | #Here start the Multi armed bandits for choose the best level. Thompson Sampling... | |||
| 50 | for i in range(min(len(self.levels[0]),len(self.levels[1]),len(self.levels[2]))): | |||
| 51 | if i==0: | |||
| 52 | IRLevel=0 | |||
| 53 | else: | |||
| 54 | #Take a sample for all calculated posterior distributions | |||
| 55 | Rlevel=[np.random.beta(self.betap[0][0],self.betap[0][1]), | |||
| 56 | np.random.beta(self.betap[1][0],self.betap[1][1]), | |||
| 57 | np.random.beta(self.betap[2][0],self.betap[2][1])] | |||
| 58 | IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] | |||
| 59 | print(Rlevel, self.betap) | |||
| 60 | print("Mean 1: ", self.betap[0][0]/(self.betap[0][0]+self.betap[0][1])) | |||
| 61 | print("Mean 2: ", self.betap[1][0]/(self.betap[1][0]+self.betap[1][1])) | |||
| 62 | print("Mean 3: ", self.betap[2][0]/(self.betap[2][0]+self.betap[2][1])) | |||
| 63 | print(IRLevel,NLevel[IRLevel][i]) | |||
| 64 | #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success. The reward is normalized value between 0 and 1 | |||
| 65 | #NLevel[IRLevel][i]=NLevel[IRLevel][i]/self.maxGrade | |||
| 66 | grade=NLevel[IRLevel][i] | |||
| 67 | deltaMu=(maxGrade-(2*grade))/10 | |||
| 68 | ||||
| 69 | print("DeltaMu: ",deltaMu) | |||
| 70 | ||||
| 71 | #Change the value of beta parameter index 0 | |||
| 72 | sumAlphaBeta=self.betap[IRLevel][0]+self.betap[IRLevel][1] | |||
| 73 | self.betap[IRLevel][0]=round((self.betap[IRLevel][0]*r)+(deltaMu*(sumAlphaBeta)),0) | |||
| 74 | #Control the limits of value for first parameter | |||
| 75 | self.betap[IRLevel][0]=max(1, self.betap[IRLevel][0]) | |||
| 76 | ||||
| 77 | #Change the value of beta parameter index 1 | |||
| 78 | self.betap[IRLevel][1]=round((sumAlphaBeta*r)-self.betap[IRLevel][0],0) | |||
| 79 | #Control the limits of value for second parameter | |||
| 80 | self.betap[IRLevel][1]=max(1, self.betap[IRLevel][1]) | |||
| 81 | ||||
| 82 | print(self.betap[IRLevel][0], self.betap[IRLevel][1]) | |||
| 83 | print(Rlevel, self.betap) | |||
| 84 | ||||
| 85 | #Bernoulli Thompson Sampling | |||
| 86 | def UpdateBeta(self, grade, level): | |||
| 87 | if grade >= self.gradeThreshold: | |||
| 88 | #Change the value of beta parameter index 1 | |||
| 89 | self.betap[level][1]+=self.deltaPlus | |||
| 90 | #Correlated Thompson Sampling | |||
| 91 | if level>0: | |||
| 92 | self.betap[level-1][1]+=self.deltaPlus/2 | |||
| 93 | if level<len(self.betap)-1: | |||
| 94 | self.betap[level+1][0]+=self.deltaPlus/2 | |||
| 95 | ||||
| 96 | else: | |||
| 97 | #Change the value of alpha parameter index 0 | |||
| 98 | self.betap[level][0]+=self.deltaPlus | |||
| 99 | #Correlated Thompson Sampling | |||
| 100 | if level>0: | |||
| 101 | self.betap[level-1][0]+=self.deltaPlus/2 | |||
| 102 | if level<len(self.betap)-1: | |||
| 103 | self.betap[level+1][1]+=self.deltaPlus/2 | |||
| 104 | ||||
| 105 | def InitializeBeta(self): | |||
| 106 | c=0 | |||
| 107 | for itemc in self.INLevel: | |||
| 108 | for i in range(len(itemc)): | |||
| 109 | #print(itemc[i]) | |||
| 110 | self.UpdateBeta(itemc[i],c) | |||
| 111 | c+=1 | |||
| 112 | ||||
| 113 | def Calculate(self): | |||
| 114 | self.InitializeBeta() | |||
| 115 | NLevel=self.NLevel | |||
| 116 | file1=open('results_slevel.csv','a+') | |||
| 117 | file2=open('results_sgrade.csv','a+') | |||
| 118 | #Here start the Multi armed bandits for choose the best level. Thompson Sampling... | |||
| 119 | #print("NLevel Vector: ",NLevel) | |||
| 120 | for i in range(len(NLevel[0])): | |||
| 121 | #if i==0: | |||
| 122 | # IRLevel=0 | |||
| 123 | #else: | |||
| 124 | #Take a sample for all calculated posterior distributions | |||
| 125 | Rlevel=[np.random.beta(p[0],p[1]) for p in self.betap] | |||
| 126 | #Take the max probability value | |||
| 127 | #IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] | |||
| 128 | IRLevel=max( (v, i) for i, v in enumerate(Rlevel) )[1] | |||
| 129 | #print(Rlevel, self.betap, [p[0]/(p[0]+p[1]) for p in self.betap]) | |||
| 130 | print("Stochastic ",i," ",IRLevel) | |||
| 131 | #print(NLevel[IRLevel][i]) | |||
| 132 | file1.write(str(IRLevel)+" ") | |||
| 133 | file2.write(str(IRLevel)+" "+str(NLevel[IRLevel][i])+" ") | |||
| 134 | #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success | |||
| 135 | self.UpdateBeta(NLevel[IRLevel][i],IRLevel) | |||
| 136 | file1.write("\n") | |||
| 137 | file2.write("\n") | |||
| 138 | file1.close() | |||
| 139 | file2.close() | |||
| 140 | ||||
| 141 | def CalculateSW(self): | |||
| 142 | file1=open('results_dlevel.csv','a+') | |||
| 143 | file2=open('results_dgrade.csv','a+') | |||
| 144 | #file2=open('results_dtm.csv','a+') | |||
| 145 | IRLevel=0 | |||
| 146 | NIRLevel=0 | |||
| 147 | step=0 | |||
| 148 | mc0=0 | |||
| 149 | mc1=0 | |||
| 150 | mc2=0 | |||
| 151 | mc3=0 | |||
| 152 | mc4=0 | |||
| 153 | #print("INLevel: ",INLevel) | |||
| 154 | #for i in range(len(NLevel[0])): | |||
| 155 | ||||
| 156 | Level=self.INLevel | |||
| 157 | clevel=[0,0,0,0,0] | |||
| 158 | vindex=[0,0,0,0,0] | |||
| 159 | ||||
| 160 | for i in range(15): | |||
| 161 | if i>0: | |||
| 162 | #print("Value ",Level) | |||
| 163 | Level[NIRLevel].append(self.NLevel[NIRLevel][vindex[NIRLevel]]) | |||
| 164 | Level[NIRLevel].pop(0) | |||
| 165 | #print("Value ",Level) | |||
| 166 | vindex[NIRLevel]+=1 | |||
| 167 | #vindex+=1 | |||
| 168 | mc0=0 | |||
| 169 | mc1=0 | |||
| 170 | mc2=0 | |||
| 171 | mc3=0 | |||
| 172 | mc4=0 | |||
| 173 | mc=0 | |||
| 174 | #print("Level Vector ",Level) | |||
| 175 | for IRL in range(4): | |||
| 176 | if IRL == 0: | |||
| 177 | mc0=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
| 178 | mc0=mc0/3 | |||
| 179 | mc0=10*mc0/5 | |||
| 180 | if IRL == 1: | |||
| 181 | mc1=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
| 182 | mc1=mc1/3 | |||
| 183 | if IRL == 2: | |||
| 184 | mc2=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
| 185 | mc2=mc2/3 | |||
| 186 | if IRL == 3: | |||
| 187 | mc3=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
| 188 | mc3=mc3/3 | |||
| 189 | if IRL == 4: | |||
| 190 | mc4=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
| 191 | mc4=mc4/3 | |||
| 192 | ||||
| 193 | #print(mc0," ",mc1," ",mc2,"",mc3) | |||
| 194 | mc1=max(mc0+((10/5)*mc1), (20/5)*mc1) | |||
| 195 | mc2=max(mc1+((10/5)*mc2), (30/5)*mc2) | |||
| 196 | mc3=max(mc2+((10/5)*mc3), (40/5)*mc3) | |||
| 197 | mc=max(mc3+((10/5)*mc4), (50/5)*mc4) | |||
| 198 | #print(mc0," ",mc1," ",mc2," ",mc3," ",mc) | |||
| 199 | #print(mc) | |||
| 200 | #file2.write(str(mc)+" ") | |||
| 201 | if mc >= 0 and mc <= 15: | |||
| 202 | NIRLevel=0 | |||
| 203 | elif mc >= 16 and mc <= 25: | |||
| 204 | NIRLevel=1 | |||
| 205 | elif mc >= 26 and mc <= 35: |
README.md
View file @
6f53e57
| File was created | 1 | # Recommender System AI-VT | ||
| 2 |