推荐系统实践 代码实现

匿名 (未验证) 提交于 2019-12-03 00:22:01
import random import math from operator import itemgetter  def Splitdata(data, M, k, seed):     test = dict()     train = dict()     random.seed(seed)     for user, item in data:         rdm = random.randint(0, M)         if rdm == k:             if user not in test:                 test[user] = set()             test[user].add(item)              # test.append([user, item])         else:             if user not in train:                 train[user] = set()             train[user].add(item)              # train.append([user, item])     return train, test  def Recall(train, test, N, K):     hit = 0     all = 0     W = UserSimilarity(train)      for user in train.keys():         if user in test:             tu = test[user]             rank = Recommend(user, train, W, K)             rk = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]             for item, pui in rk:                 if item in tu:                     hit += 1             all += len(tu)     return hit / (all * 1.0)  def Precision(train, test, N, K):     hit = 0     all = 0     W = UserSimilarity(train)      for user in train.keys:         tu = test[user]         rank = Recommend(user, train, W, K)         rk = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]         for item, pui in rk:             if item in tu:                 hit += 1         all += N     return hit / (all * 1.0)  def Coverage(train, test, N, K):     recommend_items = set()     all_items = set()     W = UserSimilarity(train)     for user in train.keys:         for item in train[user]:             all_items.add(item)         rank = Recommend(user, train, W, K)         rk = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]         for item, pui in rk:             recommend_items.add(item)     return len(recommend_items) / (len(all_items)*1.0)  def popularity(train, test, N, K):     item_popularity = dict()     for user, items in train.items():         for item in items:             if item not in item_popularity:                 item_popularity[item] = 0             item_popularity[item] += 1     ret = 0     n = 0     W = UserSimilarity(train)     for user in train.keys():         rank = Recommend(user, train, W, K)         rk = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]         for item, pui in rk:             ret += math.log(1 + item_popularity[item])             n += 1     ret /= n * 1.0     return ret  def UserSimilarity(train):     item_users = dict()     # print(train.items())     for u, items in train.items():         for i in items:             if i not in item_users:                 item_users[i] = set()             item_users[i].add(u)      C = dict()     N = dict()     for i, users in item_users.items():         for u in users:             if u not in N:                 N[u] = 0             N[u] += 1             for v in users:                 if u == v:                     continue                 if u not in C:                     C[u] = dict()                 if v not in C[u]:                     val = 1 / math.log(1 + len(users))                     C[u].update({v:val})                 else:                     val = C[u][v] + 1 / math.log(1 + len(users))                     C[u].update({v: val})      W = dict()     for u, related_users in C.items():         if u not in W:             W[u] = dict()         for v, cuv in related_users.items():             if v not in W[u]:                 val = cuv / math.sqrt(N[u] * N[v])                 W[u].update({v:val})     return W  def Recommend(user, train, W, K):     rank = dict()     interacted_items = train[user]     li = W[user].items()     for v, wuv in sorted(W[user].items(), key=itemgetter(1), reverse=True)[0:K]:         for i in train[v]:             if i not in interacted_items:                 if(i in rank):                     rank[i] += wuv                 else:                     rank[i] = wuv     return rank   def ItemSimilarity(train):     C = dict()     N = dict()     for u, items in train.items():         for i in items:             if i not in N:                 N[i] = 0             N[i] += 1             for j in items:                 if i == j:                     continue                 if i not in C:                     C[i] = dict()                 if j not in C[i]:                     val = 1 / math.log(1 + len(items)*1.0)                     C[i].update({j: val})                 else:                     val = C[i][j] + 1 / math.log(1 + len(items)*1.0)                     C[i].update({j: val})     W = dict()     for i, related_items in C.items():         for j, cij in related_items.items():             if i not in W:                 W[i] = dict()              val = cij / math.sqrt(N[i] * N[j])             W[i].update({j: val})      return W  def ItemCFRecommend(train, user_id, W, K):     rank = dict()     ru = train[user_id]     for i in ru:         for j, wj in sorted(W[i].items(), key=itemgetter(1), reverse=True)[0:K]:             if j in ru:                 continue             if j not in rank:                 rank[j] = wj             else:                 rank[j] +=  wj     return rank  def RandomSelectNegativeSample(self, items):     ret = dict()     for i in items.keys():         ret[i] = 1     n = 0     for i in range(0, len(items) * 3):         item = items_  path = 'F:\\Project\\python\\ml-100k\\u.data' datalines = open(path) data = [] for line in datalines.readlines():     arr = line.split('\t')     data.append((arr[0], arr[1]))  trn, tst = Splitdata(data, 10, 1, 10) print(len(trn)) print(len(tst)) itemW = ItemSimilarity(trn) rk = ItemCFRecommend(trn, '1', itemW, 5) print(rk)  # # recall = Recall(trn, tst, 100, 80) # print('recall: ', recall) # print 'recall: '+ recall # Wtmp = UserSimilarity(trn) # rk = Recommend('1', trn, Wtmp, 3) # print(rk)  # t = dict() # t['A'] = 1 # t['B'] = 2 # print(t) # r = sorted(t.items(), key=itemgetter(1),reverse=True) # print(r) # for a1, a2 in r: #     print(a1,a2)

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!