From 495ffc4777522e40941753e3b1b79c02f84b25b4 Mon Sep 17 00:00:00 2001 From: Grafting Rayman <156515434+GraftingRayman@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:00:30 +0000 Subject: Add files via upload --- r_basicsr/data/ffhq_dataset.py | 80 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 r_basicsr/data/ffhq_dataset.py (limited to 'r_basicsr/data/ffhq_dataset.py') diff --git a/r_basicsr/data/ffhq_dataset.py b/r_basicsr/data/ffhq_dataset.py new file mode 100644 index 0000000..03dc72c --- /dev/null +++ b/r_basicsr/data/ffhq_dataset.py @@ -0,0 +1,80 @@ +import random +import time +from os import path as osp +from torch.utils import data as data +from torchvision.transforms.functional import normalize + +from r_basicsr.data.transforms import augment +from r_basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor +from r_basicsr.utils.registry import DATASET_REGISTRY + + +@DATASET_REGISTRY.register() +class FFHQDataset(data.Dataset): + """FFHQ dataset for StyleGAN. + + Args: + opt (dict): Config for train datasets. It contains the following keys: + dataroot_gt (str): Data root path for gt. + io_backend (dict): IO backend type and other kwarg. + mean (list | tuple): Image mean. + std (list | tuple): Image std. + use_hflip (bool): Whether to horizontally flip. + + """ + + def __init__(self, opt): + super(FFHQDataset, self).__init__() + self.opt = opt + # file client (io backend) + self.file_client = None + self.io_backend_opt = opt['io_backend'] + + self.gt_folder = opt['dataroot_gt'] + self.mean = opt['mean'] + self.std = opt['std'] + + if self.io_backend_opt['type'] == 'lmdb': + self.io_backend_opt['db_paths'] = self.gt_folder + if not self.gt_folder.endswith('.lmdb'): + raise ValueError("'dataroot_gt' should end with '.lmdb', but received {self.gt_folder}") + with open(osp.join(self.gt_folder, 'meta_info.txt')) as fin: + self.paths = [line.split('.')[0] for line in fin] + else: + # FFHQ has 70000 images in total + self.paths = [osp.join(self.gt_folder, f'{v:08d}.png') for v in range(70000)] + + def __getitem__(self, index): + if self.file_client is None: + self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) + + # load gt image + gt_path = self.paths[index] + # avoid errors caused by high latency in reading files + retry = 3 + while retry > 0: + try: + img_bytes = self.file_client.get(gt_path) + except Exception as e: + logger = get_root_logger() + logger.warning(f'File client error: {e}, remaining retry times: {retry - 1}') + # change another file to read + index = random.randint(0, self.__len__()) + gt_path = self.paths[index] + time.sleep(1) # sleep 1s for occasional server congestion + else: + break + finally: + retry -= 1 + img_gt = imfrombytes(img_bytes, float32=True) + + # random horizontal flip + img_gt = augment(img_gt, hflip=self.opt['use_hflip'], rotation=False) + # BGR to RGB, HWC to CHW, numpy to tensor + img_gt = img2tensor(img_gt, bgr2rgb=True, float32=True) + # normalize + normalize(img_gt, self.mean, self.std, inplace=True) + return {'gt': img_gt, 'gt_path': gt_path} + + def __len__(self): + return len(self.paths) -- cgit v1.2.3