Commit 6f53e576888f120fd05244c44db145be1cfbd051
1 parent
941ec3b8d7
Exists in
main
first2
Showing 3 changed files with 407 additions and 0 deletions Inline Diff
GenerationData.py
View file @
6f53e57
File was created | 1 | import numpy as np | ||
2 | import pandas as pd | |||
3 | import statistics | |||
4 | import random | |||
5 | #from distfit import distfit | |||
6 | import csv | |||
7 | #import math | |||
8 | from scipy.special import expit | |||
9 | ||||
10 | class Generator: | |||
11 | def __init__(self, base): | |||
12 | self.base=base | |||
13 | ||||
14 | def logit_Normal(self, x): | |||
15 | return 1/(1+np.exp(-x)) | |||
16 | ||||
17 | #Generation of dataset with grade and time for 15 questions and 5 complexity levels | |||
18 | def generationDatasetComplexities(self): | |||
19 | tbase=pd.DataFrame() | |||
20 | #Number of complexity levels | |||
21 | #sigma_grade=1.2#Initial | |||
22 | #mu_grade=0#initial | |||
23 | sigma_grade=0.5 | |||
24 | mu_grade=1.5 | |||
25 | sigma_time=1.7 | |||
26 | mu_time=30 | |||
27 | for rows in range(5):#5 | |||
28 | tlist=[] | |||
29 | #Number of questions | |||
30 | for ncomp in range(15):#15 #3 for initial | |||
31 | ||||
32 | if ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions | |||
33 | cgrade2=self.logit_Normal(np.random.normal(-1, 0.2, 700)) | |||
34 | #if rows == 0 and ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions | |||
35 | # omu_grade=mu_grade | |||
36 | # mu_grade=-2 | |||
37 | #if rows == 3 and ncomp < 3 :#Simulate mistakes in complexity level 3 first 3 questions | |||
38 | # omu_grade=mu_grade | |||
39 | # mu_grade=-1 | |||
40 | else: | |||
41 | cgrade2=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 700)) | |||
42 | ||||
43 | #Number of questions (grade, time) | |||
44 | cgrade=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 300)) | |||
45 | cgrade=np.append(cgrade, cgrade2) | |||
46 | cgrade=cgrade*10 | |||
47 | ctime=np.random.normal(mu_time, sigma_time, 1000) | |||
48 | #vcomp=np.ones(len(vgrade))*(ncomp+1) | |||
49 | result = [cgrade.tolist(), ctime.tolist()] | |||
50 | ||||
51 | tbase[len(tbase.columns)]=cgrade | |||
52 | tbase[len(tbase.columns)]=ctime | |||
53 | #omu_grade+=0.5 | |||
54 | mu_grade-=0.2 | |||
55 | sigma_grade+=0.08 | |||
56 | tbase.to_csv("data.csv", sep=" ") | |||
57 | ||||
58 | #Generation of dataset with mean of grade and mean of time for 15 questions and 10 sub-competences | |||
59 | def generationDatasetMeanSubCompetences(self): | |||
60 | tbase=[] | |||
61 | #Number of rows to generate | |||
62 | for rows in range(1000): | |||
63 | sigma_grade=1.7 | |||
64 | mu_grade=5 | |||
65 | sigma_time=1.7 | |||
66 | mu_time=30 | |||
67 | tlist=[] | |||
68 | #Number of sub-competences | |||
69 | for ncomp in range(10): | |||
70 | vgrade=[] | |||
71 | vtime=[] | |||
72 | #Number of questions (grade, time) | |||
73 | for i in range(15): | |||
74 | cgrade=np.random.normal(mu_grade, sigma_grade, 1)[0] | |||
75 | vgrade.append(cgrade) | |||
76 | ctime=np.random.normal(mu_time, sigma_time, 1)[0] | |||
77 | vtime.append(ctime) | |||
78 | nmu_grade=np.mean(vgrade) | |||
79 | nmu_time=np.mean(vtime) | |||
80 | vcomp=np.ones(len(vgrade))*(ncomp+1) | |||
81 | result = [np.mean(vgrade), np.mean(vtime)] | |||
82 | tlist=tlist + result | |||
83 | mu_grade=np.random.normal(nmu_grade, 0.5, 1)[0] | |||
84 | mu_time=np.random.normal(nmu_time, 0.5, 1)[0] | |||
85 | sigma_grade=(abs(mu_grade-nmu_grade))*0.4 | |||
86 | sigma_time=(abs(mu_time-nmu_time))*0.4 | |||
87 | #print(tlist) | |||
88 | tbase.append(tlist) | |||
89 | #print(tbase) | |||
90 | #Write the csv file | |||
91 | with open("dataMean.csv", "w", newline="") as f: | |||
92 | writer=csv.writer(f) | |||
93 | writer.writerows(tbase) | |||
94 | ||||
95 | def generation(self): | |||
96 | vlambda = 0.5 | |||
97 | lbase=self.base | |||
98 | #print(lbase) | |||
99 | for i in range(100): | |||
100 | element1=lbase.sample() | |||
101 | element1=vlambda*np.array(element1) | |||
102 | element2=lbase.sample() | |||
103 | element2=(1.0-vlambda)*np.array(element2) | |||
104 | #print(element1) | |||
105 | #print(element2) | |||
106 | #print(element1[0]+element2[0]) | |||
107 | elementN=pd.DataFrame(element1+element2) | |||
108 | #print(elementN) | |||
109 | ||||
110 | #Concatenate self.base and elementN | |||
111 | ||||
112 | ||||
113 | return self.base | |||
114 | #print(x) | |||
115 | ||||
116 | ||||
117 | #Generation with white noise | |||
118 | def generation3(self): | |||
119 | mu, sigma = 0, 0.1 | |||
120 | x=[sum(self.base.iloc[i,:]) for i in range(21)] | |||
121 | #print(x) | |||
122 | for i in range(1000): | |||
123 | element=self.base.sample() | |||
124 | noise=np.random.normal(mu, sigma, [1, element.shape[1]]) | |||
125 | nbase=[self.base, element+noise] | |||
126 | self.base=pd.concat(nbase) | |||
127 | x=[sum(self.base.iloc[i,:]) for i in range(21)] | |||
128 | return self.base | |||
129 | #print(x) | |||
130 | ||||
131 | def detection(self, data): | |||
132 | dfit=distfit() | |||
133 | dfit.fit_transform(data) | |||
134 | print(dfit.summary) | |||
135 | ||||
136 | ||||
137 | #Generation with detection of distribution for each column | |||
138 | def generation2(self): | |||
139 | dfit=distfit() | |||
140 | lbase=np.array(self.base) | |||
141 | newData=[] | |||
142 | for vindex in range(lbase.shape[1]): | |||
143 | #print("Column: ",lbase[:,vindex]) | |||
144 | dfit.fit_transform(lbase[:,vindex]) | |||
145 | sigma=dfit.model['scale'] | |||
146 | nrand=dfit.generate(1) | |||
147 | newData.append(nrand) | |||
148 | lbase=lbase[(lbase[:,vindex] < (nrand + (sigma/1.0))) & (lbase[:,vindex] > (nrand - (sigma/1.0)))] | |||
149 | print(newData) | |||
150 | self.base.loc[len(self.base.index)]=newData | |||
151 | print(self.base.corr()) | |||
152 | ||||
153 | #Generation with normal distribution | |||
154 | def generation0(self): | |||
155 | lbase=self.base | |||
156 | print(lbase.corr()) | |||
157 | #print(lbase[lbase[20].values==0].corr()) | |||
158 | #print(lbase[lbase[20].values==0].iloc[1:100,:].corr()) | |||
159 | for n in range(100): | |||
160 | vindex=0 | |||
161 | newData=[] | |||
162 | lbase=self.base |
Prediction.py
View file @
6f53e57
File was created | 1 | import statistics | ||
2 | import numpy as np | |||
3 | ||||
4 | class Prediction: | |||
5 | def __init__(self, ilevels, levels, betap, deltaP, penalisation, gradeT): | |||
6 | #The level has the note and the time for each question | |||
7 | #Test data | |||
8 | self.ilevels=ilevels | |||
9 | self.levels=levels | |||
10 | #Initialization of beta distributions for each level | |||
11 | self.betap=betap | |||
12 | self.INLevel=[] | |||
13 | self.NLevel=[] | |||
14 | ||||
15 | self.maxGrade=10 | |||
16 | self.factor=10 | |||
17 | ||||
18 | self.deltaPlus=deltaP | |||
19 | self.gradeThreshold=gradeT | |||
20 | self.timePenalisation=penalisation | |||
21 | ||||
22 | def Structure(self, base, total, questions): | |||
23 | v1=[] | |||
24 | v2=[] | |||
25 | elem=1 | |||
26 | #for i in range(1,150,2): | |||
27 | for i in range(1,total,2): | |||
28 | #print(i," ",dr.tolist()[0+i:2+i]) | |||
29 | v1.append(base.tolist()[0+i:2+i]) | |||
30 | #if (elem % 15) == 0: | |||
31 | if (elem % questions) == 0: | |||
32 | v2.append(v1) | |||
33 | v1=[] | |||
34 | elem+=1 | |||
35 | #In this case, v2 is the last of all rows | |||
36 | return v2 | |||
37 | ||||
38 | def CalculateGradePenalization(self): | |||
39 | self.ilevels=self.Structure(self.ilevels,30,3) | |||
40 | self.levels=self.Structure(self.levels,150,15) | |||
41 | #Calculate the note with penalization for time used | |||
42 | self.INLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.ilevels] | |||
43 | self.NLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.levels] | |||
44 | #Generalized Thompson sampling | |||
45 | def Calculate1(self, NLevel): | |||
46 | ||||
47 | r=1 | |||
48 | maxGrade=10 | |||
49 | #Here start the Multi armed bandits for choose the best level. Thompson Sampling... | |||
50 | for i in range(min(len(self.levels[0]),len(self.levels[1]),len(self.levels[2]))): | |||
51 | if i==0: | |||
52 | IRLevel=0 | |||
53 | else: | |||
54 | #Take a sample for all calculated posterior distributions | |||
55 | Rlevel=[np.random.beta(self.betap[0][0],self.betap[0][1]), | |||
56 | np.random.beta(self.betap[1][0],self.betap[1][1]), | |||
57 | np.random.beta(self.betap[2][0],self.betap[2][1])] | |||
58 | IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] | |||
59 | print(Rlevel, self.betap) | |||
60 | print("Mean 1: ", self.betap[0][0]/(self.betap[0][0]+self.betap[0][1])) | |||
61 | print("Mean 2: ", self.betap[1][0]/(self.betap[1][0]+self.betap[1][1])) | |||
62 | print("Mean 3: ", self.betap[2][0]/(self.betap[2][0]+self.betap[2][1])) | |||
63 | print(IRLevel,NLevel[IRLevel][i]) | |||
64 | #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success. The reward is normalized value between 0 and 1 | |||
65 | #NLevel[IRLevel][i]=NLevel[IRLevel][i]/self.maxGrade | |||
66 | grade=NLevel[IRLevel][i] | |||
67 | deltaMu=(maxGrade-(2*grade))/10 | |||
68 | ||||
69 | print("DeltaMu: ",deltaMu) | |||
70 | ||||
71 | #Change the value of beta parameter index 0 | |||
72 | sumAlphaBeta=self.betap[IRLevel][0]+self.betap[IRLevel][1] | |||
73 | self.betap[IRLevel][0]=round((self.betap[IRLevel][0]*r)+(deltaMu*(sumAlphaBeta)),0) | |||
74 | #Control the limits of value for first parameter | |||
75 | self.betap[IRLevel][0]=max(1, self.betap[IRLevel][0]) | |||
76 | ||||
77 | #Change the value of beta parameter index 1 | |||
78 | self.betap[IRLevel][1]=round((sumAlphaBeta*r)-self.betap[IRLevel][0],0) | |||
79 | #Control the limits of value for second parameter | |||
80 | self.betap[IRLevel][1]=max(1, self.betap[IRLevel][1]) | |||
81 | ||||
82 | print(self.betap[IRLevel][0], self.betap[IRLevel][1]) | |||
83 | print(Rlevel, self.betap) | |||
84 | ||||
85 | #Bernoulli Thompson Sampling | |||
86 | def UpdateBeta(self, grade, level): | |||
87 | if grade >= self.gradeThreshold: | |||
88 | #Change the value of beta parameter index 1 | |||
89 | self.betap[level][1]+=self.deltaPlus | |||
90 | #Correlated Thompson Sampling | |||
91 | if level>0: | |||
92 | self.betap[level-1][1]+=self.deltaPlus/2 | |||
93 | if level<len(self.betap)-1: | |||
94 | self.betap[level+1][0]+=self.deltaPlus/2 | |||
95 | ||||
96 | else: | |||
97 | #Change the value of alpha parameter index 0 | |||
98 | self.betap[level][0]+=self.deltaPlus | |||
99 | #Correlated Thompson Sampling | |||
100 | if level>0: | |||
101 | self.betap[level-1][0]+=self.deltaPlus/2 | |||
102 | if level<len(self.betap)-1: | |||
103 | self.betap[level+1][1]+=self.deltaPlus/2 | |||
104 | ||||
105 | def InitializeBeta(self): | |||
106 | c=0 | |||
107 | for itemc in self.INLevel: | |||
108 | for i in range(len(itemc)): | |||
109 | #print(itemc[i]) | |||
110 | self.UpdateBeta(itemc[i],c) | |||
111 | c+=1 | |||
112 | ||||
113 | def Calculate(self): | |||
114 | self.InitializeBeta() | |||
115 | NLevel=self.NLevel | |||
116 | file1=open('results_slevel.csv','a+') | |||
117 | file2=open('results_sgrade.csv','a+') | |||
118 | #Here start the Multi armed bandits for choose the best level. Thompson Sampling... | |||
119 | #print("NLevel Vector: ",NLevel) | |||
120 | for i in range(len(NLevel[0])): | |||
121 | #if i==0: | |||
122 | # IRLevel=0 | |||
123 | #else: | |||
124 | #Take a sample for all calculated posterior distributions | |||
125 | Rlevel=[np.random.beta(p[0],p[1]) for p in self.betap] | |||
126 | #Take the max probability value | |||
127 | #IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] | |||
128 | IRLevel=max( (v, i) for i, v in enumerate(Rlevel) )[1] | |||
129 | #print(Rlevel, self.betap, [p[0]/(p[0]+p[1]) for p in self.betap]) | |||
130 | print("Stochastic ",i," ",IRLevel) | |||
131 | #print(NLevel[IRLevel][i]) | |||
132 | file1.write(str(IRLevel)+" ") | |||
133 | file2.write(str(IRLevel)+" "+str(NLevel[IRLevel][i])+" ") | |||
134 | #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success | |||
135 | self.UpdateBeta(NLevel[IRLevel][i],IRLevel) | |||
136 | file1.write("\n") | |||
137 | file2.write("\n") | |||
138 | file1.close() | |||
139 | file2.close() | |||
140 | ||||
141 | def CalculateSW(self): | |||
142 | file1=open('results_dlevel.csv','a+') | |||
143 | file2=open('results_dgrade.csv','a+') | |||
144 | #file2=open('results_dtm.csv','a+') | |||
145 | IRLevel=0 | |||
146 | NIRLevel=0 | |||
147 | step=0 | |||
148 | mc0=0 | |||
149 | mc1=0 | |||
150 | mc2=0 | |||
151 | mc3=0 | |||
152 | mc4=0 | |||
153 | #print("INLevel: ",INLevel) | |||
154 | #for i in range(len(NLevel[0])): | |||
155 | ||||
156 | Level=self.INLevel | |||
157 | clevel=[0,0,0,0,0] | |||
158 | vindex=[0,0,0,0,0] | |||
159 | ||||
160 | for i in range(15): | |||
161 | if i>0: | |||
162 | #print("Value ",Level) | |||
163 | Level[NIRLevel].append(self.NLevel[NIRLevel][vindex[NIRLevel]]) | |||
164 | Level[NIRLevel].pop(0) | |||
165 | #print("Value ",Level) | |||
166 | vindex[NIRLevel]+=1 | |||
167 | #vindex+=1 | |||
168 | mc0=0 | |||
169 | mc1=0 | |||
170 | mc2=0 | |||
171 | mc3=0 | |||
172 | mc4=0 | |||
173 | mc=0 | |||
174 | #print("Level Vector ",Level) | |||
175 | for IRL in range(4): | |||
176 | if IRL == 0: | |||
177 | mc0=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
178 | mc0=mc0/3 | |||
179 | mc0=10*mc0/5 | |||
180 | if IRL == 1: | |||
181 | mc1=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
182 | mc1=mc1/3 | |||
183 | if IRL == 2: | |||
184 | mc2=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
185 | mc2=mc2/3 | |||
186 | if IRL == 3: | |||
187 | mc3=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
188 | mc3=mc3/3 | |||
189 | if IRL == 4: | |||
190 | mc4=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] | |||
191 | mc4=mc4/3 | |||
192 | ||||
193 | #print(mc0," ",mc1," ",mc2,"",mc3) | |||
194 | mc1=max(mc0+((10/5)*mc1), (20/5)*mc1) | |||
195 | mc2=max(mc1+((10/5)*mc2), (30/5)*mc2) | |||
196 | mc3=max(mc2+((10/5)*mc3), (40/5)*mc3) | |||
197 | mc=max(mc3+((10/5)*mc4), (50/5)*mc4) | |||
198 | #print(mc0," ",mc1," ",mc2," ",mc3," ",mc) | |||
199 | #print(mc) | |||
200 | #file2.write(str(mc)+" ") | |||
201 | if mc >= 0 and mc <= 15: | |||
202 | NIRLevel=0 | |||
203 | elif mc >= 16 and mc <= 25: | |||
204 | NIRLevel=1 | |||
205 | elif mc >= 26 and mc <= 35: |
README.md
View file @
6f53e57
File was created | 1 | # Recommender System AI-VT | ||
2 |