From 6f53e576888f120fd05244c44db145be1cfbd051 Mon Sep 17 00:00:00 2001 From: dsotofor Date: Mon, 20 Nov 2023 11:41:15 +0100 Subject: [PATCH] first2 --- GenerationData.py | 181 +++++++++++++++++++++++++++++++++++++++++++ Prediction.py | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 3 + 3 files changed, 407 insertions(+) create mode 100644 GenerationData.py create mode 100644 Prediction.py create mode 100644 README.md diff --git a/GenerationData.py b/GenerationData.py new file mode 100644 index 0000000..21d907a --- /dev/null +++ b/GenerationData.py @@ -0,0 +1,181 @@ +import numpy as np +import pandas as pd +import statistics +import random +#from distfit import distfit +import csv +#import math +from scipy.special import expit + +class Generator: + def __init__(self, base): + self.base=base + + def logit_Normal(self, x): + return 1/(1+np.exp(-x)) + + #Generation of dataset with grade and time for 15 questions and 5 complexity levels + def generationDatasetComplexities(self): + tbase=pd.DataFrame() + #Number of complexity levels + #sigma_grade=1.2#Initial + #mu_grade=0#initial + sigma_grade=0.5 + mu_grade=1.5 + sigma_time=1.7 + mu_time=30 + for rows in range(5):#5 + tlist=[] + #Number of questions + for ncomp in range(15):#15 #3 for initial + + if ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions + cgrade2=self.logit_Normal(np.random.normal(-1, 0.2, 700)) + #if rows == 0 and ncomp < 10:#Simulate mistakes in complexity level 1 first 3 questions + # omu_grade=mu_grade + # mu_grade=-2 + #if rows == 3 and ncomp < 3 :#Simulate mistakes in complexity level 3 first 3 questions + # omu_grade=mu_grade + # mu_grade=-1 + else: + cgrade2=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 700)) + + #Number of questions (grade, time) + cgrade=self.logit_Normal(np.random.normal(mu_grade, sigma_grade, 300)) + cgrade=np.append(cgrade, cgrade2) + cgrade=cgrade*10 + ctime=np.random.normal(mu_time, sigma_time, 1000) + #vcomp=np.ones(len(vgrade))*(ncomp+1) + result = [cgrade.tolist(), ctime.tolist()] + + tbase[len(tbase.columns)]=cgrade + tbase[len(tbase.columns)]=ctime + #omu_grade+=0.5 + mu_grade-=0.2 + sigma_grade+=0.08 + tbase.to_csv("data.csv", sep=" ") + + #Generation of dataset with mean of grade and mean of time for 15 questions and 10 sub-competences + def generationDatasetMeanSubCompetences(self): + tbase=[] + #Number of rows to generate + for rows in range(1000): + sigma_grade=1.7 + mu_grade=5 + sigma_time=1.7 + mu_time=30 + tlist=[] + #Number of sub-competences + for ncomp in range(10): + vgrade=[] + vtime=[] + #Number of questions (grade, time) + for i in range(15): + cgrade=np.random.normal(mu_grade, sigma_grade, 1)[0] + vgrade.append(cgrade) + ctime=np.random.normal(mu_time, sigma_time, 1)[0] + vtime.append(ctime) + nmu_grade=np.mean(vgrade) + nmu_time=np.mean(vtime) + vcomp=np.ones(len(vgrade))*(ncomp+1) + result = [np.mean(vgrade), np.mean(vtime)] + tlist=tlist + result + mu_grade=np.random.normal(nmu_grade, 0.5, 1)[0] + mu_time=np.random.normal(nmu_time, 0.5, 1)[0] + sigma_grade=(abs(mu_grade-nmu_grade))*0.4 + sigma_time=(abs(mu_time-nmu_time))*0.4 + #print(tlist) + tbase.append(tlist) + #print(tbase) + #Write the csv file + with open("dataMean.csv", "w", newline="") as f: + writer=csv.writer(f) + writer.writerows(tbase) + + def generation(self): + vlambda = 0.5 + lbase=self.base + #print(lbase) + for i in range(100): + element1=lbase.sample() + element1=vlambda*np.array(element1) + element2=lbase.sample() + element2=(1.0-vlambda)*np.array(element2) + #print(element1) + #print(element2) + #print(element1[0]+element2[0]) + elementN=pd.DataFrame(element1+element2) + #print(elementN) + + #Concatenate self.base and elementN + + + return self.base + #print(x) + + + #Generation with white noise + def generation3(self): + mu, sigma = 0, 0.1 + x=[sum(self.base.iloc[i,:]) for i in range(21)] + #print(x) + for i in range(1000): + element=self.base.sample() + noise=np.random.normal(mu, sigma, [1, element.shape[1]]) + nbase=[self.base, element+noise] + self.base=pd.concat(nbase) + x=[sum(self.base.iloc[i,:]) for i in range(21)] + return self.base + #print(x) + + def detection(self, data): + dfit=distfit() + dfit.fit_transform(data) + print(dfit.summary) + + + #Generation with detection of distribution for each column + def generation2(self): + dfit=distfit() + lbase=np.array(self.base) + newData=[] + for vindex in range(lbase.shape[1]): + #print("Column: ",lbase[:,vindex]) + dfit.fit_transform(lbase[:,vindex]) + sigma=dfit.model['scale'] + nrand=dfit.generate(1) + newData.append(nrand) + lbase=lbase[(lbase[:,vindex] < (nrand + (sigma/1.0))) & (lbase[:,vindex] > (nrand - (sigma/1.0)))] + print(newData) + self.base.loc[len(self.base.index)]=newData + print(self.base.corr()) + + #Generation with normal distribution + def generation0(self): + lbase=self.base + print(lbase.corr()) + #print(lbase[lbase[20].values==0].corr()) + #print(lbase[lbase[20].values==0].iloc[1:100,:].corr()) + for n in range(100): + vindex=0 + newData=[] + lbase=self.base + for vindex in range(21): + #mu=statistics.median(self.base[vindex]) + mu=statistics.mean(lbase[vindex]) + sigma=statistics.stdev(lbase[vindex]) + nrand=np.random.normal(mu, sigma, 1)[0] + #print(mu, " ", sigma, nrand) + #print(self.base.head()) + lbase=lbase[(lbase[vindex].values < (nrand + (sigma/100.0))) & (lbase[vindex].values > (nrand - (sigma/100.0)))] + newData.append(nrand) + #print(lbase) + #print(newData) + self.base.loc[len(self.base.index)]=newData + print(self.base.corr()) + + +g=Generator([]) +#g.detection(data) +g.generationDatasetComplexities() +#g.generationDatasetMeanSubCompetences() diff --git a/Prediction.py b/Prediction.py new file mode 100644 index 0000000..f19969f --- /dev/null +++ b/Prediction.py @@ -0,0 +1,223 @@ +import statistics +import numpy as np + +class Prediction: + def __init__(self, ilevels, levels, betap, deltaP, penalisation, gradeT): + #The level has the note and the time for each question + #Test data + self.ilevels=ilevels + self.levels=levels + #Initialization of beta distributions for each level + self.betap=betap + self.INLevel=[] + self.NLevel=[] + + self.maxGrade=10 + self.factor=10 + + self.deltaPlus=deltaP + self.gradeThreshold=gradeT + self.timePenalisation=penalisation + + def Structure(self, base, total, questions): + v1=[] + v2=[] + elem=1 + #for i in range(1,150,2): + for i in range(1,total,2): + #print(i," ",dr.tolist()[0+i:2+i]) + v1.append(base.tolist()[0+i:2+i]) + #if (elem % 15) == 0: + if (elem % questions) == 0: + v2.append(v1) + v1=[] + elem+=1 + #In this case, v2 is the last of all rows + return v2 + + def CalculateGradePenalization(self): + self.ilevels=self.Structure(self.ilevels,30,3) + self.levels=self.Structure(self.levels,150,15) + #Calculate the note with penalization for time used + self.INLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.ilevels] + self.NLevel=[[ilist[0]-((ilist[0])*((ilist[1]/60)*self.timePenalisation)) for ilist in lev] for lev in self.levels] +#Generalized Thompson sampling + def Calculate1(self, NLevel): + + r=1 + maxGrade=10 + #Here start the Multi armed bandits for choose the best level. Thompson Sampling... + for i in range(min(len(self.levels[0]),len(self.levels[1]),len(self.levels[2]))): + if i==0: + IRLevel=0 + else: + #Take a sample for all calculated posterior distributions + Rlevel=[np.random.beta(self.betap[0][0],self.betap[0][1]), + np.random.beta(self.betap[1][0],self.betap[1][1]), + np.random.beta(self.betap[2][0],self.betap[2][1])] + IRLevel=max(enumerate(Rlevel),key=lambda x: x[1])[0] + print(Rlevel, self.betap) + print("Mean 1: ", self.betap[0][0]/(self.betap[0][0]+self.betap[0][1])) + print("Mean 2: ", self.betap[1][0]/(self.betap[1][0]+self.betap[1][1])) + print("Mean 3: ", self.betap[2][0]/(self.betap[2][0]+self.betap[2][1])) + print(IRLevel,NLevel[IRLevel][i]) + #Rewards for succes or not of machine results, here the rewards are inversed because we want to rest into the machine with less success. The reward is normalized value between 0 and 1 + #NLevel[IRLevel][i]=NLevel[IRLevel][i]/self.maxGrade + grade=NLevel[IRLevel][i] + deltaMu=(maxGrade-(2*grade))/10 + + print("DeltaMu: ",deltaMu) + + #Change the value of beta parameter index 0 + sumAlphaBeta=self.betap[IRLevel][0]+self.betap[IRLevel][1] + self.betap[IRLevel][0]=round((self.betap[IRLevel][0]*r)+(deltaMu*(sumAlphaBeta)),0) + #Control the limits of value for first parameter + self.betap[IRLevel][0]=max(1, self.betap[IRLevel][0]) + + #Change the value of beta parameter index 1 + self.betap[IRLevel][1]=round((sumAlphaBeta*r)-self.betap[IRLevel][0],0) + #Control the limits of value for second parameter + self.betap[IRLevel][1]=max(1, self.betap[IRLevel][1]) + + print(self.betap[IRLevel][0], self.betap[IRLevel][1]) + print(Rlevel, self.betap) + +#Bernoulli Thompson Sampling + def UpdateBeta(self, grade, level): + if grade >= self.gradeThreshold: + #Change the value of beta parameter index 1 + self.betap[level][1]+=self.deltaPlus + #Correlated Thompson Sampling + if level>0: + self.betap[level-1][1]+=self.deltaPlus/2 + if level0: + self.betap[level-1][0]+=self.deltaPlus/2 + if level0: + #print("Value ",Level) + Level[NIRLevel].append(self.NLevel[NIRLevel][vindex[NIRLevel]]) + Level[NIRLevel].pop(0) + #print("Value ",Level) + vindex[NIRLevel]+=1 + #vindex+=1 + mc0=0 + mc1=0 + mc2=0 + mc3=0 + mc4=0 + mc=0 + #print("Level Vector ",Level) + for IRL in range(4): + if IRL == 0: + mc0=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] + mc0=mc0/3 + mc0=10*mc0/5 + if IRL == 1: + mc1=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] + mc1=mc1/3 + if IRL == 2: + mc2=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] + mc2=mc2/3 + if IRL == 3: + mc3=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] + mc3=mc3/3 + if IRL == 4: + mc4=Level[IRL][2]+Level[IRL][1]+Level[IRL][0] + mc4=mc4/3 + + #print(mc0," ",mc1," ",mc2,"",mc3) + mc1=max(mc0+((10/5)*mc1), (20/5)*mc1) + mc2=max(mc1+((10/5)*mc2), (30/5)*mc2) + mc3=max(mc2+((10/5)*mc3), (40/5)*mc3) + mc=max(mc3+((10/5)*mc4), (50/5)*mc4) + #print(mc0," ",mc1," ",mc2," ",mc3," ",mc) + #print(mc) + #file2.write(str(mc)+" ") + if mc >= 0 and mc <= 15: + NIRLevel=0 + elif mc >= 16 and mc <= 25: + NIRLevel=1 + elif mc >= 26 and mc <= 35: + NIRLevel=2 + elif mc >= 36 and mc <= 42: + NIRLevel=2 + elif mc >= 43 and mc <= 50: + NIRLevel=3 + elif mc >= 51 and mc <= 75: + NIRLevel=3 + elif mc >= 76 and mc <= 100: + NIRLevel=4 + print("Deterministic ",i," ",NIRLevel) + if NIRLevel != IRLevel: + IRLevel=NIRLevel + file1.write(str(IRLevel)+" ") + file2.write(str(IRLevel)+" "+str(self.NLevel[IRLevel][vindex[IRLevel]])+" ") + file1.write("\n") + file2.write("\n") + file1.close() + file2.close() diff --git a/README.md b/README.md new file mode 100644 index 0000000..780d204 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Recommender System AI-VT + +This project contains the source code and results of the deterministic and stochastic recommendation models. -- 1.7.10.4