Commit 6f53e576888f120fd05244c44db145be1cfbd051
1 parent
941ec3b8d7
Exists in
main
first2
Showing 3 changed files with 407 additions and 0 deletions Side-by-side Diff
GenerationData.py
View file @
6f53e57
1 | +import numpy as np | |
2 | +import pandas as pd | |
3 | +import statistics | |
4 | +import random | |
5 | +#from distfit import distfit | |
6 | +import csv | |
7 | +#import math | |
8 | +from scipy.special import expit | |
9 | + | |
10 | +class Generator: | |
11 | + def __init__(self, base): | |
12 | + self.base=base | |
13 | + | |
14 | + def logit_Normal(self, x): | |
15 | + return 1/(1+np.exp(-x)) | |
16 | + | |
17 | + #Generation of dataset with grade and time for 15 questions and 5 complexity levels | |
18 | + def generationDatasetComplexities(self): | |
19 | + tbase=pd.DataFrame() | |
20 | + #Number of complexity levels | |
21 | + #sigma_grade=1.2#Initial | |
22 | + #mu_grade=0#initial | |
23 | + sigma_grade=0.5 | |
24 | + mu_grade=1.5 | |
25 | + sigma_time=1.7 | |
26 | + mu_time=30 | |
27 | + for rows in range(5):#5 | |
28 | + tlist=[] | |
29 | + #Number of questions | |
30 | + for ncomp in range(15):#15 #3 for initial | |
31 | + | |
32 | + if ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions | |
33 | + cgrade2=self.logit_Normal(np.random.normal(-1, 0.2, 700)) | |
34 | + #if rows == 0 and ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions | |
35 | + # omu_grade=mu_grade | |
36 | + # mu_grade=-2 | |
37 | + #if rows == 3 and ncomp < 3 :#Simulate mistakes in complexity level 3 first 3 questions | |
38 | + # omu_grade=mu_grade | |
39 | + # mu_grade=-1 | |
40 | + else: | |
41 | + cgrade2=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 700)) | |
42 | + | |
43 | + #Number of questions (grade, time) | |
44 | + cgrade=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 300)) | |
45 | + cgrade=np.append(cgrade, cgrade2) | |
46 | + cgrade=cgrade*10 | |
47 | + ctime=np.random.normal(mu_time, sigma_time, 1000) | |
48 | + #vcomp=np.ones(len(vgrade))*(ncomp+1) | |
49 | + result = [cgrade.tolist(), ctime.tolist()] | |
50 | + | |
51 | + tbase[len(tbase.columns)]=cgrade | |
52 | + tbase[len(tbase.columns)]=ctime | |
53 | + #omu_grade+=0.5 | |
54 | + mu_grade-=0.2 | |
55 | + sigma_grade+=0.08 | |
56 | + tbase.to_csv("data.csv", sep=" ") | |
57 | + | |
58 | + #Generation of dataset with mean of grade and mean of time for 15 questions and 10 sub-competences | |
59 | + def generationDatasetMeanSubCompetences(self): | |
60 | + tbase=[] | |
61 | + #Number of rows to generate | |
62 | + for rows in range(1000): | |
63 | + sigma_grade=1.7 | |
64 | + mu_grade=5 | |
65 | + sigma_time=1.7 | |
66 | + mu_time=30 | |
67 | + tlist=[] | |
68 | + #Number of sub-competences | |
69 | + for ncomp in range(10): | |
70 | + vgrade=[] | |
71 | + vtime=[] | |
72 | + #Number of questions (grade, time) | |
73 | + for i in range(15): | |
74 | + cgrade=np.random.normal(mu_grade, sigma_grade, 1)[0] | |
75 | + vgrade.append(cgrade) | |
76 | + ctime=np.random.normal(mu_time, sigma_time, 1)[0] | |
77 | + vtime.append(ctime) | |
78 | + nmu_grade=np.mean(vgrade) | |
79 | + nmu_time=np.mean(vtime) | |
80 | + vcomp=np.ones(len(vgrade))*(ncomp+1) | |
81 | + result = [np.mean(vgrade), np.mean(vtime)] | |
82 | + tlist=tlist + result | |
83 | + mu_grade=np.random.normal(nmu_grade, 0.5, 1)[0] | |
84 | + mu_time=np.random.normal(nmu_time, 0.5, 1)[0] | |
85 | + sigma_grade=(abs(mu_grade-nmu_grade))*0.4 | |
86 | + sigma_time=(abs(mu_time-nmu_time))*0.4 | |
87 | + #print(tlist) | |
88 | + tbase.append(tlist) | |
89 | + #print(tbase) | |
90 | + #Write the csv file | |
91 | + with open("dataMean.csv", "w", newline="") as f: | |
92 | + writer=csv.writer(f) | |
93 | + writer.writerows(tbase) | |
94 | + | |
95 | + def generation(self): | |
96 | + vlambda = 0.5 | |
97 | + lbase=self.base | |
98 | + #print(lbase) | |
99 | + for i in range(100): | |
100 | + element1=lbase.sample() | |
101 | + element1=vlambda*np.array(element1) | |
102 | + element2=lbase.sample() | |
103 | + element2=(1.0-vlambda)*np.array(element2) | |
104 | + #print(element1) | |
105 | + #print(element2) | |
106 | + #print(element1[0]+element2[0]) | |
107 | + elementN=pd.DataFrame(element1+element2) | |
108 | + #print(elementN) | |
109 | + | |
110 | + #Concatenate self.base and elementN | |
111 | + | |
112 | + | |
113 | + return self.base | |
114 | + #print(x) | |
115 | + | |
116 | + | |
117 | + #Generation with white noise | |
118 | + def generation3(self): | |
119 | + mu, sigma = 0, 0.1 | |
120 | + x=[sum(self.base.iloc[i,:]) for i in range(21)] | |
121 | + #print(x) | |
122 | + for i in range(1000): | |
123 | + element=self.base.sample() | |
124 | + noise=np.random.normal(mu, sigma, [1, element.shape[1]]) | |
125 | + nbase=[self.base, element+noise] | |
126 | + self.base=pd.concat(nbase) | |
127 | + x=[sum(self.base.iloc[i,:]) for i in range(21)] | |
128 | + return self.base | |
129 | + #print(x) | |
130 | + | |
131 | + def detection(self, data): | |
132 | + dfit=distfit() | |
133 | + dfit.fit_transform(data) | |
134 | + print(dfit.summary) | |
135 | + | |
136 | + | |
137 | + #Generation with detection of distribution for each column | |
138 | + def generation2(self): | |
139 | + dfit=distfit() | |
140 | + lbase=np.array(self.base) | |
141 | + newData=[] | |
142 | + for vindex in range(lbase.shape[1]): | |
143 | + #print("Column: ",lbase[:,vindex]) | |
144 | + dfit.fit_transform(lbase[:,vindex]) | |
145 | + sigma=dfit.model['scale'] | |
146 | + nrand=dfit.generate(1) | |
147 | + newData.append(nrand) | |
148 | + lbase=lbase[(lbase[:,vindex] < (nrand + (sigma/1.0))) & (lbase[:,vindex] > (nrand - (sigma/1.0)))] | |
149 | + print(newData) | |
150 | + self.base.loc[len(self.base.index)]=newData | |
151 | + print(self.base.corr()) | |
152 | + | |
153 | + #Generation with normal distribution | |
154 | + def generation0(self): | |
155 | + lbase=self.base | |
156 | + print(lbase.corr()) | |
157 | + #print(lbase[lbase[20].values==0].corr()) | |
158 | + #print(lbase[lbase[20].values==0].iloc[1:100,:].corr()) | |
159 | + for n in range(100): | |
160 | + vindex=0 | |
161 | + newData=[] | |
162 | + lbase=self.base | |
163 | + for vindex in range(21): | |
164 | + #mu=statistics.median(self.base[vindex]) | |
165 | + mu=statistics.mean(lbase[vindex]) | |
166 | + sigma=statistics.stdev(lbase[vindex]) | |
167 | + nrand=np.random.normal(mu, sigma, 1)[0] | |
168 | + #print(mu, " ", sigma, nrand) | |
169 | + #print(self.base.head()) | |
170 | + lbase=lbase[(lbase[vindex].values < (nrand + (sigma/100.0))) & (lbase[vindex].values > (nrand - (sigma/100.0)))] | |
171 | + newData.append(nrand) | |
172 | + #print(lbase) | |
173 | + #print(newData) | |
174 | + self.base.loc[len(self.base.index)]=newData | |
175 | + print(self.base.corr()) | |
176 | + | |
177 | + | |
178 | +g=Generator([]) | |
179 | +#g.detection(data) | |
180 | +g.generationDatasetComplexities() | |
181 | +#g.generationDatasetMeanSubCompetences() |
Prediction.py
View file @
6f53e57
1 | +import statistics | |
2 | +import numpy as np | |
3 | + | |
4 | +class Prediction: | |
5 | + def __init__(self, ilevels, levels, betap, deltaP, penalisation, gradeT): | |
6 | + #The level has the note and the time for each question | |
7 | + #Test data | |
8 | + self.ilevels=ilevels | |
9 | + self.levels=levels | |
10 | + #Initialization of beta distributions for each level | |
11 | + self.betap=betap | |
12 | + self.INLevel=[] | |
13 | + self.NLevel=[] | |
14 | + | |
15 | + self.maxGrade=10 | |
16 | + self.factor=10 | |
17 | + | |
18 | + self.deltaPlus=deltaP | |
19 | + self.gradeThreshold=gradeT | |
20 | + self.timePenalisation=penalisation | |
21 | + | |
22 | + def Structure(self, base, total, questions): | |
23 | + v1=[] | |
24 | + v2=[] | |
25 | + elem=1 | |
26 | + #for i in range(1,150,2): | |
27 | + for i in range(1,total,2): | |
28 | + #print(i," ",dr.tolist()[0+i:2+i]) | |
29 | + v1.append(base.tolist()[0+i:2+i]) | |
30 | + #if (elem % 15) == 0: | |
31 | + if (elem % questions) == 0: | |
32 | + v2.append(v1) | |
33 | + v1=[] | |
34 | + elem+=1 | |
35 | + #In this case, v2 is the last of all rows | |
36 | + return v2 | |
37 | + | |
38 | + def CalculateGradePenalization(self): | |
39 | + self.ilevels=self.Structure(self.ilevels,30,3) | |
40 | + self.levels=self.Structure(self.levels,150,15) | |
41 | + #Calculate the note with penalization for time used | |
42 | + self.INLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.ilevels] | |
43 | + self.NLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.levels] | |
44 | +#Generalized Thompson sampling | |
45 | + def Calculate1(self, NLevel): | |
46 | + | |
47 | + r=1 | |
48 | + maxGrade=10 | |
49 | + #Here start the Multi armed bandits for choose the best level. Thompson Sampling... | |
50 | + for i in range(min(len(self.levels[0]),len(self.levels[1]),len(self.levels[2]))): | |
51 | + if i==0: | |
52 | + IRLevel=0 | |
53 | + else: | |
54 | + #Take a sample for all calculated posterior distributions | |
55 | + Rlevel=[np.random.beta(self.betap[0][0],self.betap[0][1]), | |
56 | + np.random.beta(self.betap[1][0],self.betap[1][1]), | |
57 | + np.random.beta(self.betap[2][0],self.betap[2][1])] | |
58 | + IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] | |
59 | + print(Rlevel, self.betap) | |
60 | + print("Mean 1: ", self.betap[0][0]/(self.betap[0][0]+self.betap[0][1])) | |
61 | + print("Mean 2: ", self.betap[1][0]/(self.betap[1][0]+self.betap[1][1])) | |
62 | + print("Mean 3: ", self.betap[2][0]/(self.betap[2][0]+self.betap[2][1])) | |
63 | + print(IRLevel,NLevel[IRLevel][i]) | |
64 | + #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success. The reward is normalized value between 0 and 1 | |
65 | + #NLevel[IRLevel][i]=NLevel[IRLevel][i]/self.maxGrade | |
66 | + grade=NLevel[IRLevel][i] | |
67 | + deltaMu=(maxGrade-(2*grade))/10 | |
68 | + | |
69 | + print("DeltaMu: ",deltaMu) | |
70 | + | |
71 | + #Change the value of beta parameter index 0 | |
72 | + sumAlphaBeta=self.betap[IRLevel][0]+self.betap[IRLevel][1] | |
73 | + self.betap[IRLevel][0]=round((self.betap[IRLevel][0]*r)+(deltaMu*(sumAlphaBeta)),0) | |
74 | + #Control the limits of value for first parameter | |
75 | + self.betap[IRLevel][0]=max(1, self.betap[IRLevel][0]) | |
76 | + | |
77 | + #Change the value of beta parameter index 1 | |
78 | + self.betap[IRLevel][1]=round((sumAlphaBeta*r)-self.betap[IRLevel][0],0) | |
79 | + #Control the limits of value for second parameter | |
80 | + self.betap[IRLevel][1]=max(1, self.betap[IRLevel][1]) | |
81 | + | |
82 | + print(self.betap[IRLevel][0], self.betap[IRLevel][1]) | |
83 | + print(Rlevel, self.betap) | |
84 | + | |
85 | +#Bernoulli Thompson Sampling | |
86 | + def UpdateBeta(self, grade, level): | |
87 | + if grade >= self.gradeThreshold: | |
88 | + #Change the value of beta parameter index 1 | |
89 | + self.betap[level][1]+=self.deltaPlus | |
90 | + #Correlated Thompson Sampling | |
91 | + if level>0: | |
92 | + self.betap[level-1][1]+=self.deltaPlus/2 | |
93 | + if level<len(self.betap)-1: | |
94 | + self.betap[level+1][0]+=self.deltaPlus/2 | |
95 | + | |
96 | + else: | |
97 | + #Change the value of alpha parameter index 0 | |
98 | + self.betap[level][0]+=self.deltaPlus | |
99 | + #Correlated Thompson Sampling | |
100 | + if level>0: | |
101 | + self.betap[level-1][0]+=self.deltaPlus/2 | |
102 | + if level<len(self.betap)-1: | |
103 | + self.betap[level+1][1]+=self.deltaPlus/2 | |
104 | + | |
105 | + def InitializeBeta(self): | |
106 | + c=0 | |
107 | + for itemc in self.INLevel: | |
108 | + for i in range(len(itemc)): | |
109 | + #print(itemc[i]) | |
110 | + self.UpdateBeta(itemc[i],c) | |
111 | + c+=1 | |
112 | + | |
113 | + def Calculate(self): | |
114 | + self.InitializeBeta() | |
115 | + NLevel=self.NLevel | |
116 | + file1=open('results_slevel.csv','a+') | |
117 | + file2=open('results_sgrade.csv','a+') | |
118 | + #Here start the Multi armed bandits for choose the best level. Thompson Sampling... | |
119 | + #print("NLevel Vector: ",NLevel) | |
120 | + for i in range(len(NLevel[0])): | |
121 | + #if i==0: | |
122 | + # IRLevel=0 | |
123 | + #else: | |
124 | + #Take a sample for all calculated posterior distributions | |
125 | + Rlevel=[np.random.beta(p[0],p[1]) for p in self.betap] | |
126 | + #Take the max probability value | |
127 | + #IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] | |
128 | + IRLevel=max( (v, i) for i, v in enumerate(Rlevel) )[1] | |
129 | + #print(Rlevel, self.betap, [p[0]/(p[0]+p[1]) for p in self.betap]) | |
130 | + print("Stochastic ",i," ",IRLevel) | |
131 | + #print(NLevel[IRLevel][i]) | |
132 | + file1.write(str(IRLevel)+" ") | |
133 | + file2.write(str(IRLevel)+" "+str(NLevel[IRLevel][i])+" ") | |
134 | + #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success | |
135 | + self.UpdateBeta(NLevel[IRLevel][i],IRLevel) | |
136 | + file1.write("\n") | |
137 | + file2.write("\n") | |
138 | + file1.close() | |
139 | + file2.close() | |
140 | + | |
141 | + def CalculateSW(self): | |
142 | + file1=open('results_dlevel.csv','a+') | |
143 | + file2=open('results_dgrade.csv','a+') | |
144 | + #file2=open('results_dtm.csv','a+') | |
145 | + IRLevel=0 | |
146 | + NIRLevel=0 | |
147 | + step=0 | |
148 | + mc0=0 | |
149 | + mc1=0 | |
150 | + mc2=0 | |
151 | + mc3=0 | |
152 | + mc4=0 | |
153 | + #print("INLevel: ",INLevel) | |
154 | + #for i in range(len(NLevel[0])): | |
155 | + | |
156 | + Level=self.INLevel | |
157 | + clevel=[0,0,0,0,0] | |
158 | + vindex=[0,0,0,0,0] | |
159 | + | |
160 | + for i in range(15): | |
161 | + if i>0: | |
162 | + #print("Value ",Level) | |
163 | + Level[NIRLevel].append(self.NLevel[NIRLevel][vindex[NIRLevel]]) | |
164 | + Level[NIRLevel].pop(0) | |
165 | + #print("Value ",Level) | |
166 | + vindex[NIRLevel]+=1 | |
167 | + #vindex+=1 | |
168 | + mc0=0 | |
169 | + mc1=0 | |
170 | + mc2=0 | |
171 | + mc3=0 | |
172 | + mc4=0 | |
173 | + mc=0 | |
174 | + #print("Level Vector ",Level) | |
175 | + for IRL in range(4): | |
176 | + if IRL == 0: | |
177 | + mc0=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
178 | + mc0=mc0/3 | |
179 | + mc0=10*mc0/5 | |
180 | + if IRL == 1: | |
181 | + mc1=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
182 | + mc1=mc1/3 | |
183 | + if IRL == 2: | |
184 | + mc2=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
185 | + mc2=mc2/3 | |
186 | + if IRL == 3: | |
187 | + mc3=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
188 | + mc3=mc3/3 | |
189 | + if IRL == 4: | |
190 | + mc4=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |
191 | + mc4=mc4/3 | |
192 | + | |
193 | + #print(mc0," ",mc1," ",mc2,"",mc3) | |
194 | + mc1=max(mc0+((10/5)*mc1), (20/5)*mc1) | |
195 | + mc2=max(mc1+((10/5)*mc2), (30/5)*mc2) | |
196 | + mc3=max(mc2+((10/5)*mc3), (40/5)*mc3) | |
197 | + mc=max(mc3+((10/5)*mc4), (50/5)*mc4) | |
198 | + #print(mc0," ",mc1," ",mc2," ",mc3," ",mc) | |
199 | + #print(mc) | |
200 | + #file2.write(str(mc)+" ") | |
201 | + if mc >= 0 and mc <= 15: | |
202 | + NIRLevel=0 | |
203 | + elif mc >= 16 and mc <= 25: | |
204 | + NIRLevel=1 | |
205 | + elif mc >= 26 and mc <= 35: | |
206 | + NIRLevel=2 | |
207 | + elif mc >= 36 and mc <= 42: | |
208 | + NIRLevel=2 | |
209 | + elif mc >= 43 and mc <= 50: | |
210 | + NIRLevel=3 | |
211 | + elif mc >= 51 and mc <= 75: | |
212 | + NIRLevel=3 | |
213 | + elif mc >= 76 and mc <= 100: | |
214 | + NIRLevel=4 | |
215 | + print("Deterministic ",i," ",NIRLevel) | |
216 | + if NIRLevel != IRLevel: | |
217 | + IRLevel=NIRLevel | |
218 | + file1.write(str(IRLevel)+" ") | |
219 | + file2.write(str(IRLevel)+" "+str(self.NLevel[IRLevel][vindex[IRLevel]])+" ") | |
220 | + file1.write("\n") | |
221 | + file2.write("\n") | |
222 | + file1.close() | |
223 | + file2.close() |
README.md
View file @
6f53e57