ItemCF--Python
5,407 阅读
# coding = utf-8
# Please feel free to contact with me if you have any question with the code.
__author__ = 'wangjinkun@mail.hfut.edu.cn'
import numpy as np
import time
def load_matrix(filename, num_users, num_items):
t0 = time.time()
matrix = np.zeros((num_users,num_items))
for line in open(filename):
user,item,_,_ = line.split()
user = int(user)
item = int(item)
count = 1.0
matrix[user-1,item-1] = count
t1 = time.time()
print 'Finished loading matrix in %f seconds' % (t1-t0)
return matrix
class ItemCF:
def __init__(self,traindata,testdata):
self.traindata = traindata
self.testdata = testdata
self.num_users = traindata.shape[0]
self.num_items = traindata.shape[1]
def ItemSimilarity(self):
t0 = time.time()
train = self.traindata
num_items = self.num_items
self.item_similarity = np.zeros((num_items,num_items))
i np.arange(,num_items):
r_i = train[:,i]
.item_similarity[i,i] =
j np.arange(i+,num_items):
r_j = train[:,j]
num = np.dot(r_i.T, r_j)
denom = np.linalg.norm(r_i) * np.linalg.norm(r_j)
denom == :
cos =
:
cos = num / denom
.item_similarity[i,j] = cos
.item_similarity[j,i] = cos
.item_neighbor = np.argsort(-.item_similarity)
t1 = time.time()
% (t1-t0)
():
train = .traindata
similarity = .item_similarity
r_u = train[user_id]
rated_items = np.nonzero(r_u)
rated_items_idx = rated_items[]
predict_items_idx = np.setdiff1d(np.arange(,.num_items),rated_items_idx)
pred_score = np.zeros((,.num_items))
i predict_items_idx:
item_idx = i
neighbor_ordered = .item_neighbor[item_idx]
neigh neighbor_ordered[:kNN]:
pred_score[,i] = pred_score[,i] + train[user_id,neigh] * similarity[i,neigh]
rec_candidate = np.argsort(-pred_score)
rec_candidate_X = rec_candidate[]
rec_list = rec_candidate_X[:top_N]
rec_list
():
test = .testdata
num_users = .num_users
precision =
recall =
user_count =
i np.arange(,num_users):
r_i = test[i]
test_items = np.nonzero(r_i)
test_items_idx = test_items[]
(test_items_idx) == :
:
rec_of_i = .Recommendation(i,kNN,top_N)
hit_set = np.intersect1d(rec_of_i,test_items_idx)
precision = precision + (hit_set) / (top_N * )
recall = recall + (hit_set) / ((test_items_idx) * )
user_count = user_count +
precision = precision / (user_count * )
recall = recall / (user_count * )
precision,recall
():
kNN = []
top_N = []
train = load_matrix(,,)
test = load_matrix(,,)
kNNItemCF = ItemCF(train,test)
kNNItemCF.ItemSimilarity()
% (,,,)
k kNN:
N top_N:
precision,recall = kNNItemCF.Evaluate(k,N)
% (k,N,precision*,recall*)
__name__==:
test()
