imdb-Linux大棚

admin 管理员组

文章数量: 1184232

imdb

代码及解析：

import os
import cv2
import re
import time
import random
import shutil
import glob
import scipy.ioimport numpy as npfrom shutil import copyfile
from datetime import datetime, timedelta
from multiprocessing import Poolfrom config import config, parser
from align_faces import FaceAlignerparser = parser['DATA']def parse_matlab_date(x):""":param x: date string in matlab format:return: int, year"""x, date = int(x), -1try:date = (datetime.fromordinal(int(x))+ timedelta(days=x % 1)- timedelta(days=366)).yearexcept:print("[convertMatlabDate] Failed to parse string {}".format(x))return datedef clear_dir(path):"""remove all files in this directionary:param path: path to dir:return: """if os.path.exists(path):shutil.rmtree(path)os.mkdir(path)returndef addlabels(data = 'wiki', clean = False):"""move pictures to labled dir and rename to [Age]_[Gender]_[Name].jpg format:param data: 'wiki' or 'imdb':return: """# 1, clean previousorigin_dir = config.wiki_raw if data == 'wiki' else config.imdb_rawif clean: clear_dir(config.labeled)# 2, read meta datamat = scipy.io.loadmat(origin_dir + data + '.mat')[data][0][0]# recordsno_face_image = 0multiple_face_image = 0wrong_age = 0wrong_gender = 0successful = 0# dob: 出生日期# dop: 照片拍照日期# path: 文件路径# gender: 性别# name: 名人的名字# mat[5][0]： 脸的位置 IMG(face_location（2）：face_location（4），face_location（1）：face_location（3），:)# face_score: 人脸得分# face_score2: 第二个人脸得分for dob, dop, path, gender, name, face_score, face_score2 \in zip(mat[0][0], mat[1][0], mat[2][0], mat[3][0], mat[4][0], mat[6][0], mat[7][0]):if face_score < 0 or not np.isnan(face_score2):if face_score < 0: no_face_image += 1        #没有人脸if not np.isnan(face_score2): multiple_face_image += 1    #多个人脸的都跳过，不记录continueage = dop - parse_matlab_date(dob)if age < int(parser['age_lower']) or age > int(parser['age_upper']):     #age不在年龄范围的去掉wrong_age += 1continueif gender not in [1.0, 0.0]:    #性别错误的去掉wrong_gender += 1continuenewName = "{}_{}_{}.jpg".format(age,int(gender),name[0].replace(' ', '').replace('/', '').replace(':', ''))# 2.1 check duplicate# 2.1 if duplicate exist, append a random number to it namenewNameNoDupli = newNamewhile os.path.exists(config.labeled + newNameNoDupli):newNameNoDupli = "{}{}{}".format(newName[:-4], random.randint(1, 9999), newName[-4:])# 2.2 save as a new filecopyfile(origin_dir + path[0], config.labeled + newNameNoDupli)successful += 1print("{} Successful, {} no_face_image, {} multiple_face_image, {} wrong_age, {} wrong_gender".format(successful, no_face_image, multiple_face_image, wrong_age, wrong_gender))return# sort photos by their names
def sort_out_by_name(clean = False):pwd = os.getcwd()if clean:clear_dir(config.named)os.chdir(config.aligned)for img in glob.glob("*.jpg"):name = re.findall(r'[^_]*_[^_]*_([\D]*)[0-9]*.jpg', img)if not len(name): continuename = name[0].lower()if not os.path.exists(config.named + name + '/'):os.mkdir(config.named + name + '/')copyfile(img, config.named + name + '/' + img)os.chdir(pwd)# TODO: any other ways to get around this public variable?
FL = FaceAligner()
def sub_align_face(picname):"""sub thread function to get and store aligned faces:param picname: pic names:return: """aligned = FL.getAligns(picname)if len(aligned) == 0:return# copyfile(picname, config.aligned + picname)cv2.imwrite(config.aligned + picname, aligned[0])def creat_fgnet_val(clean = False):if clean:clear_dir(config.val)pwd = os.getcwd()os.chdir(config.fgnet_raw)for pic in glob.glob("*"):name, age = re.findall(r'(\d)*A(\d*).*', pic)[0]newName = "{}_1_{}.jpg".format(age,name[0].replace(' ', '').replace('/', '').replace(':', ''))# 2.1 check duplicate# 2.1 if duplicate exist, append a random number to it namenewNameNoDupli = newNamewhile os.path.exists(config.labeled + newNameNoDupli):newNameNoDupli = "{}{}{}".format(newName[:-4], random.randint(1, 9999), newName[-4:])# 2.2 save as a new filecopyfile(config.fgnet_raw + pic, config.val + newNameNoDupli)os.chdir(pwd)def align_faces(clean = False):"""get aligned faces from labeled folder and store it in aligned folder for training:param data: 'wiki' or 'imdb':param clean: if set, clean aligned folder, else append or rewrite to it:return: """if clean: clear_dir(config.aligned)os.chdir(config.labeled)jobs = glob.glob("*.jpg")# un-parallel# for picname in jobs:#   aligned = FL.getAligns(picname)#   if len(aligned) != 1: return#   cv2.imwrite(config.aligned + picname, aligned[0])# parallelwith Pool() as pool:try:pool.map(sub_align_face, jobs)finally:pool.close()returndef sub_divideTrainVal(img):"""distribute images randomly to train or test foled by 95% train prob:param img: image path:return: """if np.random.rand() < float(parser['train_test_div']):copyfile(config.aligned + img, config.train + img)else:copyfile(config.aligned + img, config.val + img)returndef divideTrainVal():"""distribute images randomly to train or test foled by 95% train prob:return: """pwt = os.getcwd()os.chdir(config.aligned)# cleanclear_dir(config.train)clear_dir(config.val)# read into mem# train, val = [], []# parallelwith Pool() as pool:try:pool.map(sub_divideTrainVal, glob.glob("*.jpg"))finally:pool.close()os.chdir(pwt)returnif __name__ == "__main__":print("labeling..")addlabels(data='wiki', clean=True)print("aligning..")align_faces(clean = True)print("dividing..")divideTrainVal()#   creat_fgnet_val(clean=True)pass

参考：

本文标签： imdb

版权声明：本文标题：imdb 内容由网友自发贡献，该文观点仅代表作者本人，转载请联系作者并注明出处：http://www.roclinux.cn/p/1698335690a294038.html，本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容，一经查实，本站将立刻删除。

Linux大棚 – 不忘初心的技术博客，浮躁时代的安静角落

imdb

imdb

更多相关文章

imdb

电影评论分类：二分类问题（IMDB数据集）

【深度学习】IMDB数据集上电影评论二分类

发表评论

推荐文章

怎样在电脑上设置路由器的WiFi密码_电脑怎么设置wifi密码

解决LazyVim在Windows系统下Ctrl+Space键映射失效问题

基于YOLOv5、YOLOv8的烟雾报警检测（超实用项目）_python烟雾报警系统

一个可以很好解决IE8按钮和字体变小的办法_ie8上如何获取字体的fontsize

易用EasyRecovery，你的安卓照片恢复专家，误删不再怕

热门文章

树莓派进阶教程：静态IP设置轻松上手，网络流畅不卡顿

Flash中心提示未知错误？全面排查解决策略

网络基础-网关，DNS，MAC地址，子网掩码，网段_网关地址

斐讯 K2 路由器 无线中继 无线扩展设置教程图文_斐讯k2无线中继教程

《英雄联盟》游戏启动时闪退提示“缺少common.dll文件”该怎么处理？英雄联盟LOL游戏崩溃提示“找不到common.dll”的修复方法_wegame找不到common.dll

PS选区工具和羽化的运用_选区边缘羽化

降低电脑屏幕对眼睛伤害的绿色设置教程

R3nzSkin皮肤异常故障排除指南：修复多玩家皮肤重置问题

构建高效企业协作模式：SWF与Flash中心的实战应用

一文读懂Dism命令行，Adobe Flash Player安装不再难！

最新文章

一文教会你AIX系统备份：mksysb实用指南

SWF文件备份失败？这些步骤让你轻松搞定

Win10系统备份轻松搞定：掌握captureimage命令的关键技巧

Linux系统安全小贴士：掌握备份与恢复，安心每一天

省时省心！三步完成电脑系统高效备份！

Ubuntu系统维护秘籍：备份步骤详解，保护你的劳动成果！

Linux系统不哭：高效备份与快速恢复方案

Ubuntu系统安全大计，备份技巧大公开

GHOST教程：系统备份和还原，小白也能变成高手！

Linux备份与恢复必修课：SWF文件安全策略从入门到精通

Exploring the Finest Accommodations: A Comprehensive Guide to Ruston LA Hotels

The Enchanting Experience of ScaliniTella NYC: A Culinary Gem in the Heart of Manhattan

Exploring the Exquisite Aloft Chicago O'Hare: A Blend of Modern Luxury and Convenience

A Culinary Journey: Discovering the Finest Dining Experiences in Waco, TX

A Culinary Journey: Discovering the Finest Dining Experiences in Athens, GA

电脑设备管理器在哪里？一次让我抓狂又兴奋的寻找经历

与GWX的持久战：一段关于Windows10升级弹窗的私人记忆

以管理员身份运行：那些年我们追过的权限与踩过的坑

斐讯 K2 路由器无线中继无线扩展设置教程图文_斐讯k2无线中继教程