admin 管理员组文章数量: 1184232
2024年1月10日发(作者:xml文件名用什么程序打开)
def load_data(dataset_zip): """ Load Dataset from Zip File """ with e(dataset_zip) as zf: #
读取User数据 with ('ml-1m/') as users_raw_data: users_title = ['UserID', 'Gender', 'Age', 'JobID', 'Zip-code'] users = _table(users_raw_data, sep=b'::', header=None, names=users_title, engine='python') users = (regex='UserID|Gender|Age|JobID') #
改变User数据中性别和年龄 gender_map = {b'F': 0, b'M': 1} users['GenderIndex'] = users['Gender'].map(gender_map) age_map = {val: ii for ii, val in enumerate(set(users['Age']))} users['AgeIndex'] = users['Age'].map(age_map) #
读取Movie数据集 with ('ml-1m/') as movies_raw_data: movies_title = ['MovieID', 'Title', 'Genres'] movies = _table(movies_raw_data, sep=b'::', header=None, names=movies_title, engine='python') #
将Title中的年份去掉 pattern = e(b'^(.*)((d+))$') movies['TitleWithoutYear'] = movies['Title'].map(lambda x: (x).group(1)) #
电影题材Multi-Hot编码 genre_set = set() for val in movies['Genres'].(b'|'): genre_(val) genre_int_map = {val: ii for ii, val in enumerate(genre_set)} movies['GenresMultiHot'] = movies['Genres'].map(genres_multi_hot(genre_int_map)) #
电影Title转数字列表,word的下标从1开始,0作为填充值 word_set = set() for val in movies['TitleWithoutYear'].(): word_(val) word_int_map = {val: ii for ii, val in enumerate(word_set, start=1)} movies['TitleIndex'] = movies['TitleWithoutYear'].map(title_encode(word_int_map)) #
读取评分数据集 with ('ml-1m/') as ratings_raw_data: ratings_title = ['UserID', 'MovieID', 'ratings', 'timestamps'] ratings = _table(ratings_raw_data, sep=b'::', header=None, names=ratings_title, engine='python') ratings = (regex='UserID|MovieID|ratings') #
合并三个表 data = ((ratings, users), movies) #
将数据分成X和y两张表 features, targets = (['ratings'], axis=1), data[['ratings']] return features, targets, age_map, gender_map, genre_int_map, word_int_map, users, movies模型设计
版权声明:本文标题:在MovieLens1M数据集上使用深度学习进行评分预测 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.roclinux.cn/p/1704886716a465666.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论