admin 管理员组

文章数量: 1184232


2024年1月10日发(作者:xml文件名用什么程序打开)

def load_data(dataset_zip): """ Load Dataset from Zip File """ with e(dataset_zip) as zf: #

读取User数据 with ('ml-1m/') as users_raw_data: users_title = ['UserID', 'Gender', 'Age', 'JobID', 'Zip-code'] users = _table(users_raw_data, sep=b'::', header=None, names=users_title, engine='python') users = (regex='UserID|Gender|Age|JobID') #

改变User数据中性别和年龄 gender_map = {b'F': 0, b'M': 1} users['GenderIndex'] = users['Gender'].map(gender_map) age_map = {val: ii for ii, val in enumerate(set(users['Age']))} users['AgeIndex'] = users['Age'].map(age_map) #

读取Movie数据集 with ('ml-1m/') as movies_raw_data: movies_title = ['MovieID', 'Title', 'Genres'] movies = _table(movies_raw_data, sep=b'::', header=None, names=movies_title, engine='python') #

将Title中的年份去掉 pattern = e(b'^(.*)((d+))$') movies['TitleWithoutYear'] = movies['Title'].map(lambda x: (x).group(1)) #

电影题材Multi-Hot编码 genre_set = set() for val in movies['Genres'].(b'|'): genre_(val) genre_int_map = {val: ii for ii, val in enumerate(genre_set)} movies['GenresMultiHot'] = movies['Genres'].map(genres_multi_hot(genre_int_map)) #

电影Title转数字列表,word的下标从1开始,0作为填充值 word_set = set() for val in movies['TitleWithoutYear'].(): word_(val) word_int_map = {val: ii for ii, val in enumerate(word_set, start=1)} movies['TitleIndex'] = movies['TitleWithoutYear'].map(title_encode(word_int_map)) #

读取评分数据集 with ('ml-1m/') as ratings_raw_data: ratings_title = ['UserID', 'MovieID', 'ratings', 'timestamps'] ratings = _table(ratings_raw_data, sep=b'::', header=None, names=ratings_title, engine='python') ratings = (regex='UserID|MovieID|ratings') #

合并三个表 data = ((ratings, users), movies) #

将数据分成X和y两张表 features, targets = (['ratings'], axis=1), data[['ratings']] return features, targets, age_map, gender_map, genre_int_map, word_int_map, users, movies模型设计


本文标签: 数据 文件名 电影 开始