Skip to content

Commit

Permalink
添加了推荐方法接口和更新了readme
Browse files Browse the repository at this point in the history
  • Loading branch information
Holy-Shine committed Jun 17, 2018
1 parent 1726ae8 commit 953efd1
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 3 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
- 2018-6-17 更新推荐方法接口

一个简单的电影推荐系统。

## 1. Glimpse
Expand Down Expand Up @@ -28,7 +30,7 @@

在说明具体实现前,先来讲下代码的构成。

代码很简单,一共4个文件(3个代码文件+1个数据文件)
代码很简单,一共5个文件(4个代码文件+1个数据文件)

- **data.p**: 保存了输入数据的pickle文件,加载完毕后是一个pandas(>=0.22.0)的DataFrame对象(如图)

Expand All @@ -40,6 +42,8 @@

- **main.py**: 主文件。提供模型训练等操作

- **recInterface.py**: 推荐方法接口,包含KNN等推荐方法



## 4. 模型实现
Expand Down
5 changes: 4 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def train(model,num_epochs=5, lr=0.0001):

model.zero_grad()

tag_rank = model(user_inputs, movie_inputs)
tag_rank , _ , _ = model(user_inputs, movie_inputs)

loss = loss_function(tag_rank, target)
if i_batch%20 ==0:
Expand All @@ -57,6 +57,9 @@ def train(model,num_epochs=5, lr=0.0001):
print('Epoch {}:\t loss:{}'.format(epoch,loss_all))
writer.export_scalars_to_json("./test.json")
writer.close()



if __name__=='__main__':
model = rec_model(user_max_dict=user_max_dict, movie_max_dict=movie_max_dict, convParams=convParams)
model=model.to(device)
Expand Down
4 changes: 3 additions & 1 deletion model.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def forward(self, user_input, movie_input):
feature_age = self.BN(F.relu(self.fc_age(self.embedding_age(age))))
feature_job = self.BN(F.relu(self.fc_job(self.embedding_job(job))))

# feature_user B x 1 x 200
feature_user = F.tanh(self.fc_user_combine(
torch.cat([feature_uid, feature_gender, feature_age, feature_job], 3)
)).view(-1,1,200)
Expand All @@ -103,10 +104,11 @@ def forward(self, user_input, movie_input):

feature_flattern_dropout = F.dropout(torch.cat(flattern_tensors,2), p=0.5) # to B x 32

# feature_movie B x 1 x 200
feature_movie = F.tanh(self.fc_movie_combine(
torch.cat([feature_mid.view(-1,1,32), feature_mtype.view(-1,1,32), feature_flattern_dropout], 2)
))

output = torch.sum(feature_user * feature_movie, 2) # B x rank
return output
return output, feature_user, feature_movie

120 changes: 120 additions & 0 deletions recInterface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Recommendation Interface

import torch
from torch.utils.data import DataLoader
from dataset import MovieRankDataset

import numpy as np
import pickle as pkl

def saveMovieAndUserFeature(model):
'''
Save Movie and User feature into HD
'''

datasets = MovieRankDataset(pkl_file='data.p')
dataloader = DataLoader(datasets, batch_size=1, shuffle=False)

# format: {id(int) : feature(numpy array)}
user_feature_dict = {}
movie_feature_dict = {}
movies=set()
users = set()
with torch.no_grad():
for i_batch, sample_batch in enumerate(dataloader):
user_inputs = sample_batch['user_inputs']
movie_inputs = sample_batch['movie_inputs']

# B x 1 x 1 = 1 x 1 x 1
uid = user_inputs['uid'].item() # uid
mid = movie_inputs['mid'].item() # mid

movies.add(mid)
users.add(uid)

# B x 1 x 200 = 1 x 1 x 200
_, feature_user, feature_movie = model(user_inputs, movie_inputs)

# 1 x 200
feature_user = feature_user.view(-1,200).numpy()
feature_movie = feature_movie.view(-1,200).numpy()

if uid not in user_feature_dict.keys():
user_feature_dict[uid]=feature_user
if mid not in movie_feature_dict.keys():
movie_feature_dict[mid]=feature_movie

feature_data = {'feature_user': feature_user, 'feature_movie':feature_movie}
ids_user_movie={'user': users, 'movie':movies}
pkl.dump(feature_data,open('Params/feature_data.pkl','wb'))
pkl.dump(ids_user_movie, open('Params/user_movie_ids.pkl','wb'))



def getKNNitem(itemID,itemName='movie',K=1):
'''
Use KNN at feature data to get K neighbors
Args:
itemID: target item's id
itemName: 'movie' or 'user'
K: K-neighbors
return:
a list of item ids of which close to itemID
'''
assert K>=1, 'Expect K bigger than 0 but get K<1'

# get cosine similarity between vec1 and vec2
def getCosineSimilarity(vec1, vec2):
cosine_sim = float(vec1.dot(vec2.T).item()) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
return cosine_sim

feature_data = pkl.load(open('Params/feature_data.pkl','rb'))

feature_items = feature_data['itemName']
feature_current = feature_items[itemID]

id_sim = [(item_id,getCosineSimilarity(feature_current,vec2)) for item_id,vec2 in feature_items.items()]
id_sim = sorted(id_sim,key=lambda x:x[1],reverse=True)

return [id_sim[i][0] for i in range(K+1)][1:]


def getUserMostLike(model,uid):
'''
Get user(uid) mostly like movie
Args:
model: net model
uid: target user's id
return:
the biggest rank movie id
'''

user_movie_ids = pkl.load(open('Params/user_movie_ids.pkl','rb'))
movie_ids = user_movie_ids['movie']

mid_rank={}

# Step 1. Go through net to get user_movie score
user_inputs = torch.LongTensor([uid]).view(-1,1,1)
with torch.no_grad():
for mid in movie_ids:
movie_inputs = torch.LongTensor([mid]).view(-1,1,1)

rank, _, _ = model(user_inputs,movie_inputs)

if mid not in mid_rank.keys():
mid_rank[mid]=rank.item()

mid_rank = [(mid, rank) for mid, rank in mid_rank.items()]
mids = [mid[0] for mid in sorted(mid_rank, key=lambda x: x[1], reverse=True)]

return mids[0]




0 comments on commit 953efd1

Please sign in to comment.