admin 管理员组文章数量: 1184232
imdb
代码及解析:
import os
import cv2
import re
import time
import random
import shutil
import glob
import scipy.ioimport numpy as npfrom shutil import copyfile
from datetime import datetime, timedelta
from multiprocessing import Poolfrom config import config, parser
from align_faces import FaceAlignerparser = parser['DATA']def parse_matlab_date(x):""":param x: date string in matlab format:return: int, year"""x, date = int(x), -1try:date = (datetime.fromordinal(int(x))+ timedelta(days=x % 1)- timedelta(days=366)).yearexcept:print("[convertMatlabDate] Failed to parse string {}".format(x))return datedef clear_dir(path):"""remove all files in this directionary:param path: path to dir:return: """if os.path.exists(path):shutil.rmtree(path)os.mkdir(path)returndef addlabels(data = 'wiki', clean = False):"""move pictures to labled dir and rename to [Age]_[Gender]_[Name].jpg format:param data: 'wiki' or 'imdb':return: """# 1, clean previousorigin_dir = config.wiki_raw if data == 'wiki' else config.imdb_rawif clean: clear_dir(config.labeled)# 2, read meta datamat = scipy.io.loadmat(origin_dir + data + '.mat')[data][0][0]# recordsno_face_image = 0multiple_face_image = 0wrong_age = 0wrong_gender = 0successful = 0# dob: 出生日期# dop: 照片拍照日期# path: 文件路径# gender: 性别# name: 名人的名字# mat[5][0]: 脸的位置 IMG(face_location(2):face_location(4),face_location(1):face_location(3),:)# face_score: 人脸得分# face_score2: 第二个人脸得分for dob, dop, path, gender, name, face_score, face_score2 \in zip(mat[0][0], mat[1][0], mat[2][0], mat[3][0], mat[4][0], mat[6][0], mat[7][0]):if face_score < 0 or not np.isnan(face_score2):if face_score < 0: no_face_image += 1 #没有人脸if not np.isnan(face_score2): multiple_face_image += 1 #多个人脸的都跳过,不记录continueage = dop - parse_matlab_date(dob)if age < int(parser['age_lower']) or age > int(parser['age_upper']): #age不在年龄范围的去掉wrong_age += 1continueif gender not in [1.0, 0.0]: #性别错误的去掉wrong_gender += 1continuenewName = "{}_{}_{}.jpg".format(age,int(gender),name[0].replace(' ', '').replace('/', '').replace(':', ''))# 2.1 check duplicate# 2.1 if duplicate exist, append a random number to it namenewNameNoDupli = newNamewhile os.path.exists(config.labeled + newNameNoDupli):newNameNoDupli = "{}{}{}".format(newName[:-4], random.randint(1, 9999), newName[-4:])# 2.2 save as a new filecopyfile(origin_dir + path[0], config.labeled + newNameNoDupli)successful += 1print("{} Successful, {} no_face_image, {} multiple_face_image, {} wrong_age, {} wrong_gender".format(successful, no_face_image, multiple_face_image, wrong_age, wrong_gender))return# sort photos by their names
def sort_out_by_name(clean = False):pwd = os.getcwd()if clean:clear_dir(config.named)os.chdir(config.aligned)for img in glob.glob("*.jpg"):name = re.findall(r'[^_]*_[^_]*_([\D]*)[0-9]*.jpg', img)if not len(name): continuename = name[0].lower()if not os.path.exists(config.named + name + '/'):os.mkdir(config.named + name + '/')copyfile(img, config.named + name + '/' + img)os.chdir(pwd)# TODO: any other ways to get around this public variable?
FL = FaceAligner()
def sub_align_face(picname):"""sub thread function to get and store aligned faces:param picname: pic names:return: """aligned = FL.getAligns(picname)if len(aligned) == 0:return# copyfile(picname, config.aligned + picname)cv2.imwrite(config.aligned + picname, aligned[0])def creat_fgnet_val(clean = False):if clean:clear_dir(config.val)pwd = os.getcwd()os.chdir(config.fgnet_raw)for pic in glob.glob("*"):name, age = re.findall(r'(\d)*A(\d*).*', pic)[0]newName = "{}_1_{}.jpg".format(age,name[0].replace(' ', '').replace('/', '').replace(':', ''))# 2.1 check duplicate# 2.1 if duplicate exist, append a random number to it namenewNameNoDupli = newNamewhile os.path.exists(config.labeled + newNameNoDupli):newNameNoDupli = "{}{}{}".format(newName[:-4], random.randint(1, 9999), newName[-4:])# 2.2 save as a new filecopyfile(config.fgnet_raw + pic, config.val + newNameNoDupli)os.chdir(pwd)def align_faces(clean = False):"""get aligned faces from labeled folder and store it in aligned folder for training:param data: 'wiki' or 'imdb':param clean: if set, clean aligned folder, else append or rewrite to it:return: """if clean: clear_dir(config.aligned)os.chdir(config.labeled)jobs = glob.glob("*.jpg")# un-parallel# for picname in jobs:# aligned = FL.getAligns(picname)# if len(aligned) != 1: return# cv2.imwrite(config.aligned + picname, aligned[0])# parallelwith Pool() as pool:try:pool.map(sub_align_face, jobs)finally:pool.close()returndef sub_divideTrainVal(img):"""distribute images randomly to train or test foled by 95% train prob:param img: image path:return: """if np.random.rand() < float(parser['train_test_div']):copyfile(config.aligned + img, config.train + img)else:copyfile(config.aligned + img, config.val + img)returndef divideTrainVal():"""distribute images randomly to train or test foled by 95% train prob:return: """pwt = os.getcwd()os.chdir(config.aligned)# cleanclear_dir(config.train)clear_dir(config.val)# read into mem# train, val = [], []# parallelwith Pool() as pool:try:pool.map(sub_divideTrainVal, glob.glob("*.jpg"))finally:pool.close()os.chdir(pwt)returnif __name__ == "__main__":print("labeling..")addlabels(data='wiki', clean=True)print("aligning..")align_faces(clean = True)print("dividing..")divideTrainVal()# creat_fgnet_val(clean=True)pass
参考:
本文标签: imdb
版权声明:本文标题:imdb 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.roclinux.cn/p/1698335690a294038.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论