Commit 6f53e576888f120fd05244c44db145be1cfbd051

Authored by dsotofor
1 parent 941ec3b8d7
Exists in main

first2

Showing 3 changed files with 407 additions and 0 deletions Side-by-side Diff

GenerationData.py View file @ 6f53e57
  1 +import numpy as np
  2 +import pandas as pd
  3 +import statistics
  4 +import random
  5 +#from distfit import distfit
  6 +import csv
  7 +#import math
  8 +from scipy.special import expit
  9 +
  10 +class Generator:
  11 + def __init__(self, base):
  12 + self.base=base
  13 +
  14 + def logit_Normal(self, x):
  15 + return 1/(1+np.exp(-x))
  16 +
  17 + #Generation of dataset with grade and time for 15 questions and 5 complexity levels
  18 + def generationDatasetComplexities(self):
  19 + tbase=pd.DataFrame()
  20 + #Number of complexity levels
  21 + #sigma_grade=1.2#Initial
  22 + #mu_grade=0#initial
  23 + sigma_grade=0.5
  24 + mu_grade=1.5
  25 + sigma_time=1.7
  26 + mu_time=30
  27 + for rows in range(5):#5
  28 + tlist=[]
  29 + #Number of questions
  30 + for ncomp in range(15):#15 #3 for initial
  31 +
  32 + if ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions
  33 + cgrade2=self.logit_Normal(np.random.normal(-1, 0.2, 700))
  34 + #if rows == 0 and ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions
  35 + # omu_grade=mu_grade
  36 + # mu_grade=-2
  37 + #if rows == 3 and ncomp < 3 :#Simulate mistakes in complexity level 3 first 3 questions
  38 + # omu_grade=mu_grade
  39 + # mu_grade=-1
  40 + else:
  41 + cgrade2=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 700))
  42 +
  43 + #Number of questions (grade, time)
  44 + cgrade=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 300))
  45 + cgrade=np.append(cgrade, cgrade2)
  46 + cgrade=cgrade*10
  47 + ctime=np.random.normal(mu_time, sigma_time, 1000)
  48 + #vcomp=np.ones(len(vgrade))*(ncomp+1)
  49 + result = [cgrade.tolist(), ctime.tolist()]
  50 +
  51 + tbase[len(tbase.columns)]=cgrade
  52 + tbase[len(tbase.columns)]=ctime
  53 + #omu_grade+=0.5
  54 + mu_grade-=0.2
  55 + sigma_grade+=0.08
  56 + tbase.to_csv("data.csv", sep=" ")
  57 +
  58 + #Generation of dataset with mean of grade and mean of time for 15 questions and 10 sub-competences
  59 + def generationDatasetMeanSubCompetences(self):
  60 + tbase=[]
  61 + #Number of rows to generate
  62 + for rows in range(1000):
  63 + sigma_grade=1.7
  64 + mu_grade=5
  65 + sigma_time=1.7
  66 + mu_time=30
  67 + tlist=[]
  68 + #Number of sub-competences
  69 + for ncomp in range(10):
  70 + vgrade=[]
  71 + vtime=[]
  72 + #Number of questions (grade, time)
  73 + for i in range(15):
  74 + cgrade=np.random.normal(mu_grade, sigma_grade, 1)[0]
  75 + vgrade.append(cgrade)
  76 + ctime=np.random.normal(mu_time, sigma_time, 1)[0]
  77 + vtime.append(ctime)
  78 + nmu_grade=np.mean(vgrade)
  79 + nmu_time=np.mean(vtime)
  80 + vcomp=np.ones(len(vgrade))*(ncomp+1)
  81 + result = [np.mean(vgrade), np.mean(vtime)]
  82 + tlist=tlist + result
  83 + mu_grade=np.random.normal(nmu_grade, 0.5, 1)[0]
  84 + mu_time=np.random.normal(nmu_time, 0.5, 1)[0]
  85 + sigma_grade=(abs(mu_grade-nmu_grade))*0.4
  86 + sigma_time=(abs(mu_time-nmu_time))*0.4
  87 + #print(tlist)
  88 + tbase.append(tlist)
  89 + #print(tbase)
  90 + #Write the csv file
  91 + with open("dataMean.csv", "w", newline="") as f:
  92 + writer=csv.writer(f)
  93 + writer.writerows(tbase)
  94 +
  95 + def generation(self):
  96 + vlambda = 0.5
  97 + lbase=self.base
  98 + #print(lbase)
  99 + for i in range(100):
  100 + element1=lbase.sample()
  101 + element1=vlambda*np.array(element1)
  102 + element2=lbase.sample()
  103 + element2=(1.0-vlambda)*np.array(element2)
  104 + #print(element1)
  105 + #print(element2)
  106 + #print(element1[0]+element2[0])
  107 + elementN=pd.DataFrame(element1+element2)
  108 + #print(elementN)
  109 +
  110 + #Concatenate self.base and elementN
  111 +
  112 +
  113 + return self.base
  114 + #print(x)
  115 +
  116 +
  117 + #Generation with white noise
  118 + def generation3(self):
  119 + mu, sigma = 0, 0.1
  120 + x=[sum(self.base.iloc[i,:]) for i in range(21)]
  121 + #print(x)
  122 + for i in range(1000):
  123 + element=self.base.sample()
  124 + noise=np.random.normal(mu, sigma, [1, element.shape[1]])
  125 + nbase=[self.base, element+noise]
  126 + self.base=pd.concat(nbase)
  127 + x=[sum(self.base.iloc[i,:]) for i in range(21)]
  128 + return self.base
  129 + #print(x)
  130 +
  131 + def detection(self, data):
  132 + dfit=distfit()
  133 + dfit.fit_transform(data)
  134 + print(dfit.summary)
  135 +
  136 +
  137 + #Generation with detection of distribution for each column
  138 + def generation2(self):
  139 + dfit=distfit()
  140 + lbase=np.array(self.base)
  141 + newData=[]
  142 + for vindex in range(lbase.shape[1]):
  143 + #print("Column: ",lbase[:,vindex])
  144 + dfit.fit_transform(lbase[:,vindex])
  145 + sigma=dfit.model['scale']
  146 + nrand=dfit.generate(1)
  147 + newData.append(nrand)
  148 + lbase=lbase[(lbase[:,vindex] < (nrand + (sigma/1.0))) & (lbase[:,vindex] > (nrand - (sigma/1.0)))]
  149 + print(newData)
  150 + self.base.loc[len(self.base.index)]=newData
  151 + print(self.base.corr())
  152 +
  153 + #Generation with normal distribution
  154 + def generation0(self):
  155 + lbase=self.base
  156 + print(lbase.corr())
  157 + #print(lbase[lbase[20].values==0].corr())
  158 + #print(lbase[lbase[20].values==0].iloc[1:100,:].corr())
  159 + for n in range(100):
  160 + vindex=0
  161 + newData=[]
  162 + lbase=self.base
  163 + for vindex in range(21):
  164 + #mu=statistics.median(self.base[vindex])
  165 + mu=statistics.mean(lbase[vindex])
  166 + sigma=statistics.stdev(lbase[vindex])
  167 + nrand=np.random.normal(mu, sigma, 1)[0]
  168 + #print(mu, " ", sigma, nrand)
  169 + #print(self.base.head())
  170 + lbase=lbase[(lbase[vindex].values < (nrand + (sigma/100.0))) & (lbase[vindex].values > (nrand - (sigma/100.0)))]
  171 + newData.append(nrand)
  172 + #print(lbase)
  173 + #print(newData)
  174 + self.base.loc[len(self.base.index)]=newData
  175 + print(self.base.corr())
  176 +
  177 +
  178 +g=Generator([])
  179 +#g.detection(data)
  180 +g.generationDatasetComplexities()
  181 +#g.generationDatasetMeanSubCompetences()
Prediction.py View file @ 6f53e57
  1 +import statistics
  2 +import numpy as np
  3 +
  4 +class Prediction:
  5 + def __init__(self, ilevels, levels, betap, deltaP, penalisation, gradeT):
  6 + #The level has the note and the time for each question
  7 + #Test data
  8 + self.ilevels=ilevels
  9 + self.levels=levels
  10 + #Initialization of beta distributions for each level
  11 + self.betap=betap
  12 + self.INLevel=[]
  13 + self.NLevel=[]
  14 +
  15 + self.maxGrade=10
  16 + self.factor=10
  17 +
  18 + self.deltaPlus=deltaP
  19 + self.gradeThreshold=gradeT
  20 + self.timePenalisation=penalisation
  21 +
  22 + def Structure(self, base, total, questions):
  23 + v1=[]
  24 + v2=[]
  25 + elem=1
  26 + #for i in range(1,150,2):
  27 + for i in range(1,total,2):
  28 + #print(i," ",dr.tolist()[0+i:2+i])
  29 + v1.append(base.tolist()[0+i:2+i])
  30 + #if (elem % 15) == 0:
  31 + if (elem % questions) == 0:
  32 + v2.append(v1)
  33 + v1=[]
  34 + elem+=1
  35 + #In this case, v2 is the last of all rows
  36 + return v2
  37 +
  38 + def CalculateGradePenalization(self):
  39 + self.ilevels=self.Structure(self.ilevels,30,3)
  40 + self.levels=self.Structure(self.levels,150,15)
  41 + #Calculate the note with penalization for time used
  42 + self.INLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.ilevels]
  43 + self.NLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.levels]
  44 +#Generalized Thompson sampling
  45 + def Calculate1(self, NLevel):
  46 +
  47 + r=1
  48 + maxGrade=10
  49 + #Here start the Multi armed bandits for choose the best level. Thompson Sampling...
  50 + for i in range(min(len(self.levels[0]),len(self.levels[1]),len(self.levels[2]))):
  51 + if i==0:
  52 + IRLevel=0
  53 + else:
  54 + #Take a sample for all calculated posterior distributions
  55 + Rlevel=[np.random.beta(self.betap[0][0],self.betap[0][1]),
  56 + np.random.beta(self.betap[1][0],self.betap[1][1]),
  57 + np.random.beta(self.betap[2][0],self.betap[2][1])]
  58 + IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0]
  59 + print(Rlevel, self.betap)
  60 + print("Mean 1: ", self.betap[0][0]/(self.betap[0][0]+self.betap[0][1]))
  61 + print("Mean 2: ", self.betap[1][0]/(self.betap[1][0]+self.betap[1][1]))
  62 + print("Mean 3: ", self.betap[2][0]/(self.betap[2][0]+self.betap[2][1]))
  63 + print(IRLevel,NLevel[IRLevel][i])
  64 + #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success. The reward is normalized value between 0 and 1
  65 + #NLevel[IRLevel][i]=NLevel[IRLevel][i]/self.maxGrade
  66 + grade=NLevel[IRLevel][i]
  67 + deltaMu=(maxGrade-(2*grade))/10
  68 +
  69 + print("DeltaMu: ",deltaMu)
  70 +
  71 + #Change the value of beta parameter index 0
  72 + sumAlphaBeta=self.betap[IRLevel][0]+self.betap[IRLevel][1]
  73 + self.betap[IRLevel][0]=round((self.betap[IRLevel][0]*r)+(deltaMu*(sumAlphaBeta)),0)
  74 + #Control the limits of value for first parameter
  75 + self.betap[IRLevel][0]=max(1, self.betap[IRLevel][0])
  76 +
  77 + #Change the value of beta parameter index 1
  78 + self.betap[IRLevel][1]=round((sumAlphaBeta*r)-self.betap[IRLevel][0],0)
  79 + #Control the limits of value for second parameter
  80 + self.betap[IRLevel][1]=max(1, self.betap[IRLevel][1])
  81 +
  82 + print(self.betap[IRLevel][0], self.betap[IRLevel][1])
  83 + print(Rlevel, self.betap)
  84 +
  85 +#Bernoulli Thompson Sampling
  86 + def UpdateBeta(self, grade, level):
  87 + if grade >= self.gradeThreshold:
  88 + #Change the value of beta parameter index 1
  89 + self.betap[level][1]+=self.deltaPlus
  90 + #Correlated Thompson Sampling
  91 + if level>0:
  92 + self.betap[level-1][1]+=self.deltaPlus/2
  93 + if level<len(self.betap)-1:
  94 + self.betap[level+1][0]+=self.deltaPlus/2
  95 +
  96 + else:
  97 + #Change the value of alpha parameter index 0
  98 + self.betap[level][0]+=self.deltaPlus
  99 + #Correlated Thompson Sampling
  100 + if level>0:
  101 + self.betap[level-1][0]+=self.deltaPlus/2
  102 + if level<len(self.betap)-1:
  103 + self.betap[level+1][1]+=self.deltaPlus/2
  104 +
  105 + def InitializeBeta(self):
  106 + c=0
  107 + for itemc in self.INLevel:
  108 + for i in range(len(itemc)):
  109 + #print(itemc[i])
  110 + self.UpdateBeta(itemc[i],c)
  111 + c+=1
  112 +
  113 + def Calculate(self):
  114 + self.InitializeBeta()
  115 + NLevel=self.NLevel
  116 + file1=open('results_slevel.csv','a+')
  117 + file2=open('results_sgrade.csv','a+')
  118 + #Here start the Multi armed bandits for choose the best level. Thompson Sampling...
  119 + #print("NLevel Vector: ",NLevel)
  120 + for i in range(len(NLevel[0])):
  121 + #if i==0:
  122 + # IRLevel=0
  123 + #else:
  124 + #Take a sample for all calculated posterior distributions
  125 + Rlevel=[np.random.beta(p[0],p[1]) for p in self.betap]
  126 + #Take the max probability value
  127 + #IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0]
  128 + IRLevel=max( (v, i) for i, v in enumerate(Rlevel) )[1]
  129 + #print(Rlevel, self.betap, [p[0]/(p[0]+p[1]) for p in self.betap])
  130 + print("Stochastic ",i," ",IRLevel)
  131 + #print(NLevel[IRLevel][i])
  132 + file1.write(str(IRLevel)+" ")
  133 + file2.write(str(IRLevel)+" "+str(NLevel[IRLevel][i])+" ")
  134 + #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success
  135 + self.UpdateBeta(NLevel[IRLevel][i],IRLevel)
  136 + file1.write("\n")
  137 + file2.write("\n")
  138 + file1.close()
  139 + file2.close()
  140 +
  141 + def CalculateSW(self):
  142 + file1=open('results_dlevel.csv','a+')
  143 + file2=open('results_dgrade.csv','a+')
  144 + #file2=open('results_dtm.csv','a+')
  145 + IRLevel=0
  146 + NIRLevel=0
  147 + step=0
  148 + mc0=0
  149 + mc1=0
  150 + mc2=0
  151 + mc3=0
  152 + mc4=0
  153 + #print("INLevel: ",INLevel)
  154 + #for i in range(len(NLevel[0])):
  155 +
  156 + Level=self.INLevel
  157 + clevel=[0,0,0,0,0]
  158 + vindex=[0,0,0,0,0]
  159 +
  160 + for i in range(15):
  161 + if i>0:
  162 + #print("Value ",Level)
  163 + Level[NIRLevel].append(self.NLevel[NIRLevel][vindex[NIRLevel]])
  164 + Level[NIRLevel].pop(0)
  165 + #print("Value ",Level)
  166 + vindex[NIRLevel]+=1
  167 + #vindex+=1
  168 + mc0=0
  169 + mc1=0
  170 + mc2=0
  171 + mc3=0
  172 + mc4=0
  173 + mc=0
  174 + #print("Level Vector ",Level)
  175 + for IRL in range(4):
  176 + if IRL == 0:
  177 + mc0=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
  178 + mc0=mc0/3
  179 + mc0=10*mc0/5
  180 + if IRL == 1:
  181 + mc1=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
  182 + mc1=mc1/3
  183 + if IRL == 2:
  184 + mc2=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
  185 + mc2=mc2/3
  186 + if IRL == 3:
  187 + mc3=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
  188 + mc3=mc3/3
  189 + if IRL == 4:
  190 + mc4=Level[IRL][2]+Level[IRL][1]+Level[IRL][0]
  191 + mc4=mc4/3
  192 +
  193 + #print(mc0," ",mc1," ",mc2,"",mc3)
  194 + mc1=max(mc0+((10/5)*mc1), (20/5)*mc1)
  195 + mc2=max(mc1+((10/5)*mc2), (30/5)*mc2)
  196 + mc3=max(mc2+((10/5)*mc3), (40/5)*mc3)
  197 + mc=max(mc3+((10/5)*mc4), (50/5)*mc4)
  198 + #print(mc0," ",mc1," ",mc2," ",mc3," ",mc)
  199 + #print(mc)
  200 + #file2.write(str(mc)+" ")
  201 + if mc >= 0 and mc <= 15:
  202 + NIRLevel=0
  203 + elif mc >= 16 and mc <= 25:
  204 + NIRLevel=1
  205 + elif mc >= 26 and mc <= 35:
  206 + NIRLevel=2
  207 + elif mc >= 36 and mc <= 42:
  208 + NIRLevel=2
  209 + elif mc >= 43 and mc <= 50:
  210 + NIRLevel=3
  211 + elif mc >= 51 and mc <= 75:
  212 + NIRLevel=3
  213 + elif mc >= 76 and mc <= 100:
  214 + NIRLevel=4
  215 + print("Deterministic ",i," ",NIRLevel)
  216 + if NIRLevel != IRLevel:
  217 + IRLevel=NIRLevel
  218 + file1.write(str(IRLevel)+" ")
  219 + file2.write(str(IRLevel)+" "+str(self.NLevel[IRLevel][vindex[IRLevel]])+" ")
  220 + file1.write("\n")
  221 + file2.write("\n")
  222 + file1.close()
  223 + file2.close()
  1 +# Recommender System AI-VT
  2 +
  3 +This project contains the source code and results of the deterministic and stochastic recommendation models.