Skip to content

Commit 2b780a5

Browse files
committed
[add] ego4d RLDS dataset builder
1 parent 56eab98 commit 2b780a5

14 files changed

Lines changed: 1155 additions & 1 deletion
Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 Karl Pertsch
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import numpy as np
2+
from PIL import Image
3+
import os
4+
import json
5+
from tqdm import tqdm
6+
from multiprocessing import Pool
7+
from functools import partial
8+
import argparse
9+
10+
def parse_arguments():
11+
"""Parse command line arguments for the Ego4D data processing script."""
12+
parser = argparse.ArgumentParser(description='Process Ego4D data to create fake episodes.')
13+
14+
parser.add_argument('--source_dir', type=str, required=True,
15+
help='Directory containing the source video clips')
16+
parser.add_argument('--target_dir', type=str, required=True,
17+
help='Directory to save the processed episodes')
18+
parser.add_argument('--annotation_file', type=str, required=True,
19+
help='Path to the annotation JSON file')
20+
parser.add_argument('--processes', type=int, default=96,
21+
help='Number of worker processes to use (default: 96)')
22+
parser.add_argument('--target_size', type=int, nargs=2, default=[224, 224],
23+
help='Target size for resizing images as "height width" (default: 224 224)')
24+
parser.add_argument('--verify', action='store_true',
25+
help='Verify saved episodes by loading them after creation')
26+
27+
return parser.parse_args()
28+
29+
def center_crop_and_resize(image, target_size=(224, 224)):
30+
"""
31+
Center crop and resize the input image while maintaining aspect ratio.
32+
33+
Args:
34+
image (numpy.ndarray): Input image array with shape (H, W, C)
35+
target_size (tuple): Desired output size as (height, width)
36+
37+
Returns:
38+
numpy.ndarray: Resized image array with shape (target_height, target_width, C)
39+
"""
40+
height, width, _ = image.shape
41+
42+
# Determine which dimension to crop (the longer side)
43+
if height < width:
44+
# Landscape image - crop width
45+
crop_size = height
46+
start_x = (width - crop_size) // 2
47+
start_y = 0
48+
else:
49+
# Portrait image - crop height
50+
crop_size = width
51+
start_x = 0
52+
start_y = (height - crop_size) // 2
53+
54+
# Perform center crop
55+
cropped_image = image[start_y:start_y + crop_size, start_x:start_x + crop_size, :]
56+
57+
# Convert to PIL Image for high-quality resizing
58+
pil_image = Image.fromarray(cropped_image)
59+
resized_image = pil_image.resize(target_size, Image.BILINEAR)
60+
61+
return np.array(resized_image)
62+
63+
def create_fake_episode(clip_dir, save_dir, annotation, target_size, verify=False):
64+
"""
65+
Create a fake episode from a video clip by processing all frames.
66+
67+
Args:
68+
clip_dir (str): Path to directory containing clip frames
69+
save_dir (str): Directory to save the output episode
70+
annotation (list): List of annotation dictionaries
71+
target_size (tuple): Target size for frame resizing
72+
verify (bool): Whether to verify the saved episode
73+
74+
Returns:
75+
None (saves episode to disk as .npy file)
76+
"""
77+
episode_data = []
78+
clip_name = os.path.basename(clip_dir)
79+
video_name = os.path.basename(os.path.dirname(clip_dir))
80+
81+
# Find matching annotation for this clip
82+
caption = None
83+
episode_id = None
84+
for anno in annotation:
85+
if anno['video_name'] == video_name and anno['action_name'] == clip_name:
86+
caption = anno['language'][5:] # Remove first 5 characters '#C C '
87+
episode_id = anno['id'] - 1
88+
break
89+
90+
if caption is None or episode_id is None:
91+
print(f"No matching annotation found for {video_name}/{clip_name}")
92+
return
93+
94+
save_path = os.path.join(save_dir, f'episode_{episode_id}.npy')
95+
96+
# Process each frame in the clip
97+
for frame_name in sorted(os.listdir(clip_dir)):
98+
frame_path = os.path.join(clip_dir, frame_name)
99+
try:
100+
frame = np.load(frame_path)
101+
frame = frame[:, :, ::-1] # Convert BGR to RGB
102+
frame = center_crop_and_resize(frame, target_size)
103+
104+
episode_data.append({
105+
'image': np.asarray(frame, dtype=np.uint8),
106+
'wrist_image': np.asarray(np.zeros([1, 1, 1]), dtype=np.uint8),
107+
'state': np.asarray(np.zeros(7), dtype=np.float32),
108+
'action': np.asarray(np.zeros(7), dtype=np.float32),
109+
'language_instruction': caption,
110+
})
111+
except Exception as e:
112+
print(f"Error processing frame {frame_path}: {str(e)}")
113+
continue
114+
115+
# Save the episode data
116+
np.save(save_path, episode_data)
117+
118+
# Optional verification step
119+
if verify:
120+
try:
121+
loaded_data = np.load(save_path, allow_pickle=True)
122+
if len(loaded_data) == 0:
123+
print(f"Warning: Empty episode saved at {save_path}")
124+
except Exception as e:
125+
print(f"Failed to verify saved episode {episode_id}: {str(e)}")
126+
127+
def process_video(video_dir, target_dir, annotation, target_size, verify=False):
128+
"""
129+
Process all clips within a single video directory.
130+
131+
Args:
132+
video_dir (str): Path to video directory containing clips
133+
target_dir (str): Directory to save processed episodes
134+
annotation (list): List of annotation dictionaries
135+
target_size (tuple): Target size for frame resizing
136+
verify (bool): Whether to verify saved episodes
137+
"""
138+
for clip_name in sorted(os.listdir(video_dir)):
139+
clip_dir = os.path.join(video_dir, clip_name)
140+
create_fake_episode(
141+
clip_dir=clip_dir,
142+
save_dir=target_dir,
143+
annotation=annotation,
144+
target_size=target_size,
145+
verify=verify
146+
)
147+
148+
def main():
149+
args = parse_arguments()
150+
151+
# Create target directory if it doesn't exist
152+
os.makedirs(args.target_dir, exist_ok=True)
153+
154+
# Load annotation file
155+
print("Loading annotation file...")
156+
try:
157+
with open(args.annotation_file) as f:
158+
annotation = json.load(f)
159+
except Exception as e:
160+
print(f"Failed to load annotation file: {str(e)}")
161+
return
162+
163+
# Get list of video directories
164+
video_dirs = [
165+
os.path.join(args.source_dir, d)
166+
for d in sorted(os.listdir(args.source_dir))
167+
if os.path.isdir(os.path.join(args.source_dir, d))
168+
]
169+
170+
print(f"Processing {len(video_dirs)} videos using {args.processes} workers...")
171+
172+
# Process videos in parallel
173+
with Pool(processes=args.processes) as pool:
174+
process_func = partial(
175+
process_video,
176+
target_dir=args.target_dir,
177+
annotation=annotation,
178+
target_size=tuple(args.target_size),
179+
verify=args.verify
180+
)
181+
182+
# Process with progress bar
183+
results = list(tqdm(
184+
pool.imap_unordered(process_func, video_dirs),
185+
total=len(video_dirs),
186+
desc='Processing videos'
187+
))
188+
189+
print('Ego4D data processing completed successfully!')
190+
191+
if __name__ == "__main__":
192+
main()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
// TODO(ego4d): BibTeX citation
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
## Converting Ego4D dataset to RLDS
2+
3+
4+
#### Step.0 Prepare Pre-training Dataset
5+
Download [Ego4D](https://ego4d-data.org/docs/start-here/) Hand-and-Object dataset:
6+
```
7+
# Download the CLI
8+
pip install ego4d
9+
# Select Subset Of Hand-and-Object
10+
python -m ego4d.cli.cli --output_directory=<path-to-save-dir> --datasets clips annotations --metadata --version v2 --benchmarks FHO
11+
```
12+
13+
Your directory tree should look like this:
14+
```
15+
$<path-to-ego4d-save-dir>
16+
├── ego4d.json
17+
└── v2
18+
|—— annotations
19+
└── clips
20+
```
21+
22+
23+
#### :one: Install necessary dependencies
24+
25+
First create a conda environment using the provided environment.yml file (use `environment_ubuntu.yml` or `environment_macos.yml` depending on the operating system you're using):
26+
```
27+
conda env create -f environment_ubuntu.yml
28+
```
29+
30+
Then activate the environment using:
31+
```
32+
conda activate rlds_env
33+
cd vla-scripts/extern/ego4d_rlds_dataset_builder/ego4d
34+
pip install -e .
35+
```
36+
37+
Then, download all necessary dependencies form [huggingface](https://huggingface.co/datasets/qwbu/univla-ego4d-rlds-dependencies) and put them under ```vla-scripts/extern/ego4d_rlds_dataset_builder```.
38+
39+
40+
#### :two: We first extract the interaction frames (video clips within ```pre_frame``` and ```post_frame```) with a FPS of 2 and save them as ```.npy``` files.
41+
42+
We first process the citical information about the interaction clips and key frames (```pre_frame```, ```pnr_frame```, and ```post_frame```) into a json file (```info_clips.json```) with [this script](https://github.com/OpenDriveLab/MPI/blob/79798d0d6c40919adcf3263c6df7e86758fdd59a/prepare_dataset.py), or you can directly download the json file from [here](https://huggingface.co/datasets/qwbu/univla-ego4d-rlds-dependencies).
43+
44+
```bash
45+
python preprocess_ego4d.py \
46+
--denseclips_dir /path/to/output/denseclips \ # output dir for processed clips
47+
--info_clips_json /path/to/info_clips.json \ # metadata of keyframes
48+
--source_videos_dir <path-to-ego4d-save-dir>/v2/clips \ # ego4d videos path
49+
--frame_interval 15 # downsample Ego4D to 2 fps
50+
```
51+
52+
53+
#### :three: We then create episodes with according to desirable format with:
54+
55+
```bash
56+
mkdir ../ego4d_rlds_dataset_builder/ego4d/data
57+
mkdir ../ego4d_rlds_dataset_builder/ego4d/data/train
58+
59+
python create_episode_ego4d.py \
60+
--source_dir /path/to/output/denseclips \ # processed clips from the step.2
61+
--target_dir ../ego4d_rlds_dataset_builder/ego4d/data/train \ # path to save episodes
62+
--annotation_file /path/to/output/denseclips/annotations.json \ # processed meta-info from step.2
63+
--processes 64 # multi-processing
64+
```
65+
66+
#### :four: Create ego4d rlds dataset
67+
68+
```bash
69+
cd vla-scripts/extern/ego4d_rlds_dataset_builder/ego4d
70+
tfds build --overwrite --beam_pipeline_options="direct_running_mode=multi_processing,direct_num_workers=16"
71+
```
72+
73+
The default save path for the dataset is `/root/tensorflow_datasets/ego4d_dataset`. Directly process the whole dataset may face memory limit issue, we can split the dataset into several parts and process them separately:
74+
75+
```bash
76+
cd vla-scripts/extern/ego4d_rlds_dataset_builder/ego4d
77+
mkdir data/val
78+
rsync -av --files-from=<(printf "episode_%d.npy\n" {0000..9999}) data/train/ data/val/
79+
tfds build --overwrite --beam_pipeline_options="direct_running_mode=multi_processing,direct_num_workers=4"
80+
mkdir /root/tensorflow_datasets/ego4d_dataset/ego4d_split_1
81+
mv /root/tensorflow_datasets/ego4d_dataset/1.0.0 /root/tensorflow_datasets/ego4d_dataset/ego4d_split_1/1.0.0
82+
rm -r data/val
83+
84+
rsync -av --files-from=<(printf "episode_%d.npy\n" {10000..19999}) data/train/ data/val/
85+
tfds build --overwrite --beam_pipeline_options="direct_running_mode=multi_processing,direct_num_workers=4"
86+
mkdir /root/tensorflow_datasets/ego4d_dataset/ego4d_split_2
87+
mv /root/tensorflow_datasets/ego4d_dataset/1.0.0 /root/tensorflow_datasets/ego4d_dataset/ego4d_split_2/1.0.0
88+
rm -r data/val
89+
90+
# repeat until all data is processed
91+
```

0 commit comments

Comments
 (0)