Commit 6f53e576888f120fd05244c44db145be1cfbd051

Authored by dsotofor
1 parent 941ec3b8d7
Exists in main

first2

Showing 3 changed files with 407 additions and 0 deletions Inline Diff

GenerationData.py View file @ 6f53e57
File was created 1 import numpy as np
2 import pandas as pd
3 import statistics
4 import random
5 #from distfit import distfit
6 import csv
7 #import math
8 from scipy.special import expit
9
10 class Generator:
11 def __init__(self, base):
12 self.base=base
13
14 def logit_Normal(self, x):
15 return 1/(1+np.exp(-x))
16
17 #Generation of dataset with grade and time for 15 questions and 5 complexity levels
18 def generationDatasetComplexities(self):
19 tbase=pd.DataFrame()
20 #Number of complexity levels
21 #sigma_grade=1.2#Initial
22 #mu_grade=0#initial
23 sigma_grade=0.5
24 mu_grade=1.5
25 sigma_time=1.7
26 mu_time=30
27 for rows in range(5):#5
28 tlist=[]
29 #Number of questions
30 for ncomp in range(15):#15 #3 for initial
31
32 if ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions
33 cgrade2=self.logit_Normal(np.random.normal(-1, 0.2, 700))
34 #if rows == 0 and ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions
35 # omu_grade=mu_grade
36 # mu_grade=-2
37 #if rows == 3 and ncomp < 3 :#Simulate mistakes in complexity level 3 first 3 questions
38 # omu_grade=mu_grade
39 # mu_grade=-1
40 else:
41 cgrade2=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 700))
42
43 #Number of questions (grade, time)
44 cgrade=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 300))
45 cgrade=np.append(cgrade, cgrade2)
46 cgrade=cgrade*10
47 ctime=np.random.normal(mu_time, sigma_time, 1000)
48 #vcomp=np.ones(len(vgrade))*(ncomp+1)
49 result = [cgrade.tolist(), ctime.tolist()]
50
51 tbase[len(tbase.columns)]=cgrade
52 tbase[len(tbase.columns)]=ctime
53 #omu_grade+=0.5
54 mu_grade-=0.2
55 sigma_grade+=0.08
56 tbase.to_csv("data.csv", sep=" ")
57
58 #Generation of dataset with mean of grade and mean of time for 15 questions and 10 sub-competences
59 def generationDatasetMeanSubCompetences(self):
60 tbase=[]
61 #Number of rows to generate
62 for rows in range(1000):
63 sigma_grade=1.7
64 mu_grade=5
65 sigma_time=1.7
66 mu_time=30
67 tlist=[]
68 #Number of sub-competences
69 for ncomp in range(10):
70 vgrade=[]
71 vtime=[]
72 #Number of questions (grade, time)
73 for i in range(15):
74 cgrade=np.random.normal(mu_grade, sigma_grade, 1)[0]
75 vgrade.append(cgrade)
76 ctime=np.random.normal(mu_time, sigma_time, 1)[0]
77 vtime.append(ctime)
78 nmu_grade=np.mean(vgrade)
79 nmu_time=np.mean(vtime)
80 vcomp=np.ones(len(vgrade))*(ncomp+1)
81 result = [np.mean(vgrade), np.mean(vtime)]
82 tlist=tlist + result
83 mu_grade=np.random.normal(nmu_grade, 0.5, 1)[0]
84 mu_time=np.random.normal(nmu_time, 0.5, 1)[0]
85 sigma_grade=(abs(mu_grade-nmu_grade))*0.4
86 sigma_time=(abs(mu_time-nmu_time))*0.4
87 #print(tlist)
88 tbase.append(tlist)
89 #print(tbase)
90 #Write the csv file
91 with open("dataMean.csv", "w", newline="") as f:
92 writer=csv.writer(f)
93 writer.writerows(tbase)
94
95 def generation(self):
96 vlambda = 0.5
97 lbase=self.base
98 #print(lbase)
99 for i in range(100):
100 element1=lbase.sample()
101 element1=vlambda*np.array(element1)
102 element2=lbase.sample()
103 element2=(1.0-vlambda)*np.array(element2)
104 #print(element1)
105 #print(element2)
106 #print(element1[0]+element2[0])
107 elementN=pd.DataFrame(element1+element2)
108 #print(elementN)
109
110 #Concatenate self.base and elementN
111
112
113 return self.base
114 #print(x)
115
116
117 #Generation with white noise
118 def generation3(self):
119 mu, sigma = 0, 0.1
120 x=[sum(self.base.iloc[i,:]) for i in range(21)]
121 #print(x)
122 for i in range(1000):
123 element=self.base.sample()
124 noise=np.random.normal(mu, sigma, [1, element.shape[1]])
125 nbase=[self.base, element+noise]
126 self.base=pd.concat(nbase)
127 x=[sum(self.base.iloc[i,:]) for i in range(21)]
128 return self.base
129 #print(x)
130
131 def detection(self, data):
132 dfit=distfit()
133 dfit.fit_transform(data)
134 print(dfit.summary)
135
136
137 #Generation with detection of distribution for each column
138 def generation2(self):
139 dfit=distfit()
140 lbase=np.array(self.base)
141 newData=[]
142 for vindex in range(lbase.shape[1]):
143 #print("Column: ",lbase[:,vindex])
144 dfit.fit_transform(lbase[:,vindex])
145 sigma=dfit.model['scale']
146 nrand=dfit.generate(1)
147 newData.append(nrand)
148 lbase=lbase[(lbase[:,vindex] < (nrand + (sigma/1.0))) & (lbase[:,vindex] > (nrand - (sigma/1.0)))]
149 print(newData)
150 self.base.loc[len(self.base.index)]=newData
151 print(self.base.corr())
152
153 #Generation with normal distribution
154 def generation0(self):
155 lbase=self.base
156 print(lbase.corr())
157 #print(lbase[lbase[20].values==0].corr())
158 #print(lbase[lbase[20].values==0].iloc[1:100,:].corr())
159 for n in range(100):
160 vindex=0
161 newData=[]
162 lbase=self.base
Prediction.py View file @ 6f53e57
File was created 1 import statistics
2 import numpy as np
3
4 class Prediction:
5 def __init__(self, ilevels, levels, betap, deltaP, penalisation, gradeT):
6 #The level has the note and the time for each question
7 #Test data
8 self.ilevels=ilevels
9 self.levels=levels
10 #Initialization of beta distributions for each level
11 self.betap=betap
12 self.INLevel=[]
13 self.NLevel=[]
14
15 self.maxGrade=10
16 self.factor=10
17
18 self.deltaPlus=deltaP
19 self.gradeThreshold=gradeT
20 self.timePenalisation=penalisation
21
22 def Structure(self, base, total, questions):
23 v1=[]
24 v2=[]
25 elem=1
26 #for i in range(1,150,2):
27 for i in range(1,total,2):
28 #print(i," ",dr.tolist()[0+i:2+i])
29 v1.append(base.tolist()[0+i:2+i])
30 #if (elem % 15) == 0:
31 if (elem % questions) == 0:
32 v2.append(v1)
33 v1=[]
34 elem+=1
35 #In this case, v2 is the last of all rows
36 return v2
37
38 def CalculateGradePenalization(self):
39 self.ilevels=self.Structure(self.ilevels,30,3)
40 self.levels=self.Structure(self.levels,150,15)
41 #Calculate the note with penalization for time used
42 self.INLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.ilevels]
43 self.NLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.levels]
44 #Generalized Thompson sampling
45 def Calculate1(self, NLevel):
46
47 r=1
48 maxGrade=10
49 #Here start the Multi armed bandits for choose the best level. Thompson Sampling...
50 for i in range(min(len(self.levels[0]),len(self.levels[1]),len(self.levels[2]))):
51 if i==0:
52 IRLevel=0
53 else:
54 #Take a sample for all calculated posterior distributions
55 Rlevel=[np.random.beta(self.betap[0][0],self.betap[0][1]),
56 np.random.beta(self.betap[1][0],self.betap[1][1]),
57 np.random.beta(self.betap[2][0],self.betap[2][1])]
58 IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0]
59 print(Rlevel, self.betap)
60 print("Mean 1: ", self.betap[0][0]/(self.betap[0][0]+self.betap[0][1]))
61 print("Mean 2: ", self.betap[1][0]/(self.betap[1][0]+self.betap[1][1]))
62 print("Mean 3: ", self.betap[2][0]/(self.betap[2][0]+self.betap[2][1]))
63 print(IRLevel,NLevel[IRLevel][i])
64 #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success. The reward is normalized value between 0 and 1
65 #NLevel[IRLevel][i]=NLevel[IRLevel][i]/self.maxGrade
66 grade=NLevel[IRLevel][i]
67 deltaMu=(maxGrade-(2*grade))/10
68
69 print("DeltaMu: ",deltaMu)
70
71 #Change the value of beta parameter index 0
72 sumAlphaBeta=self.betap[IRLevel][0]+self.betap[IRLevel][1]
73 self.betap[IRLevel][0]=round((self.betap[IRLevel][0]*r)+(deltaMu*(sumAlphaBeta)),0)
74 #Control the limits of value for first parameter
75 self.betap[IRLevel][0]=max(1, self.betap[IRLevel][0])
76
77 #Change the value of beta parameter index 1
78 self.betap[IRLevel][1]=round((sumAlphaBeta*r)-self.betap[IRLevel][0],0)
79 #Control the limits of value for second parameter
80 self.betap[IRLevel][1]=max(1, self.betap[IRLevel][1])
81
82 print(self.betap[IRLevel][0], self.betap[IRLevel][1])
83 print(Rlevel, self.betap)
84
85 #Bernoulli Thompson Sampling
86 def UpdateBeta(self, grade, level):
87 if grade >= self.gradeThreshold:
88 #Change the value of beta parameter index 1
89 self.betap[level][1]+=self.deltaPlus
90 #Correlated Thompson Sampling
91 if level>0:
92 self.betap[level-1][1]+=self.deltaPlus/2
93 if level<len(self.betap)-1:
94 self.betap[level+1][0]+=self.deltaPlus/2
95
96 else:
97 #Change the value of alpha parameter index 0
98 self.betap[level][0]+=self.deltaPlus
99 #Correlated Thompson Sampling
100 if level>0:
101 self.betap[level-1][0]+=self.deltaPlus/2
102 if level<len(self.betap)-1:
103 self.betap[level+1][1]+=self.deltaPlus/2
104
105 def InitializeBeta(self):
106 c=0
107 for itemc in self.INLevel:
108 for i in range(len(itemc)):
109 #print(itemc[i])
110 self.UpdateBeta(itemc[i],c)
111 c+=1
112
113 def Calculate(self):
114 self.InitializeBeta()
115 NLevel=self.NLevel
116 file1=open('results_slevel.csv','a+')
117 file2=open('results_sgrade.csv','a+')
118 #Here start the Multi armed bandits for choose the best level. Thompson Sampling...
119 #print("NLevel Vector: ",NLevel)
120 for i in range(len(NLevel[0])):
121 #if i==0:
122 # IRLevel=0
123 #else:
124 #Take a sample for all calculated posterior distributions
125 Rlevel=[np.random.beta(p[0],p[1]) for p in self.betap]
126 #Take the max probability value
127 #IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0]
128 IRLevel=max( (v, i) for i, v in enumerate(Rlevel) )[1]
129 #print(Rlevel, self.betap, [p[0]/(p[0]+p[1]) for p in self.betap])
130 print("Stochastic ",i," ",IRLevel)
131 #print(NLevel[IRLevel][i])
132 file1.write(str(IRLevel)+" ")
133 file2.write(str(IRLevel)+" "+str(NLevel[IRLevel][i])+" ")
134 #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success
135 self.UpdateBeta(NLevel[IRLevel][i],IRLevel)
136 file1.write("\n")
137 file2.write("\n")
138 file1.close()
139 file2.close()
140
141 def CalculateSW(self):
142 file1=open('results_dlevel.csv','a+')
143 file2=open('results_dgrade.csv','a+')
144 #file2=open('results_dtm.csv','a+')
145 IRLevel=0
146 NIRLevel=0
147 step=0
148 mc0=0
149 mc1=0
150 mc2=0
151 mc3=0
152 mc4=0
153 #print("INLevel: ",INLevel)
154 #for i in range(len(NLevel[0])):
155
156 Level=self.INLevel
157 clevel=[0,0,0,0,0]
158 vindex=[0,0,0,0,0]
159
160 for i in range(15):
161 if i>0:
162 #print("Value ",Level)
163 Level[NIRLevel].append(self.NLevel[NIRLevel][vindex[NIRLevel]])
164 Level[NIRLevel].pop(0)
165 #print("Value ",Level)
166 vindex[NIRLevel]+=1
167 #vindex+=1
168 mc0=0
169 mc1=0
170 mc2=0
171 mc3=0
172 mc4=0
173 mc=0
174 #print("Level Vector ",Level)
175 for IRL in range(4):
176 if IRL == 0:
177 mc0=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
178 mc0=mc0/3
179 mc0=10*mc0/5
180 if IRL == 1:
181 mc1=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
182 mc1=mc1/3
183 if IRL == 2:
184 mc2=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
185 mc2=mc2/3
186 if IRL == 3:
187 mc3=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
188 mc3=mc3/3
189 if IRL == 4:
190 mc4=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
191 mc4=mc4/3
192
193 #print(mc0," ",mc1," ",mc2,"",mc3)
194 mc1=max(mc0+((10/5)*mc1), (20/5)*mc1)
195 mc2=max(mc1+((10/5)*mc2), (30/5)*mc2)
196 mc3=max(mc2+((10/5)*mc3), (40/5)*mc3)
197 mc=max(mc3+((10/5)*mc4), (50/5)*mc4)
198 #print(mc0," ",mc1," ",mc2," ",mc3," ",mc)
199 #print(mc)
200 #file2.write(str(mc)+" ")
201 if mc >= 0 and mc <= 15:
202 NIRLevel=0
203 elif mc >= 16 and mc <= 25:
204 NIRLevel=1
205 elif mc >= 26 and mc <= 35:
File was created 1 # Recommender System AI-VT
2