-
Notifications
You must be signed in to change notification settings - Fork 10
/
benchmark_tf.py
122 lines (105 loc) · 3.52 KB
/
benchmark_tf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import matplotlib
matplotlib.use('Agg')
import os
import os.path
import random
import time
import soundfile as sf
import argparse
import librosa
import utils
import loaders
import tensorflow as tf
def get_files(dir, extension):
audio_files = []
dir = os.path.expanduser(dir)
for root, _, fnames in sorted(os.walk(dir)):
for fname in fnames:
if fname.endswith(extension):
path = os.path.join(root, fname)
item = path
audio_files.append(item)
return audio_files
def _make_py_loader_function(func):
def _py_loader_function(fp):
return func(fp.numpy().decode())
return _py_loader_function
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Benchmark audio loading in tensorflow'
)
parser.add_argument('--ext', type=str, default="wav")
args = parser.parse_args()
repeat = 3
columns = [
'ext',
'lib',
'duration',
'time',
]
store = utils.DF_writer(columns)
libs = [
'tfio_fromaudio',
'stempeg',
'soxbindings',
'ar_ffmpeg',
'aubio',
'pydub',
'soundfile',
'librosa',
'scipy',
'scipy_mmap',
'tf_decode_wav',
]
for lib in libs:
print("Testing: %s" % lib)
for root, dirs, fnames in sorted(os.walk('AUDIO')):
for audio_dir in dirs:
append = False
duration = int(audio_dir)
audio_files = get_files(
dir=os.path.join(root, audio_dir),
extension=args.ext
)
dataset = tf.data.Dataset.from_tensor_slices(audio_files)
if lib in ["tf_decode_wav"]:
dataset = dataset.map(
lambda x: loaders.load_tf_decode_wav(x),
num_parallel_calls=1
)
elif lib in ["tfio_fromaudio"]:
dataset = dataset.map(
lambda x: loaders.load_tfio_fromaudio(x, args.ext),
num_parallel_calls=1
)
elif lib in ["tfio_fromffmpeg"]:
dataset = dataset.map(
lambda x: loaders.load_tfio_fromffmpeg(x),
num_parallel_calls=1
)
else:
loader_function = getattr(loaders, 'load_' + lib)
dataset = dataset.map(
lambda filename: tf.py_function(
_make_py_loader_function(loader_function),
[filename],
[tf.float32]
),
num_parallel_calls=1
)
dataset = dataset.apply(tf.data.experimental.ignore_errors())
start = time.time()
for i in range(repeat):
for audio in dataset:
value = tf.reduce_max(audio)
if value:
append = True
end = time.time()
if append:
store.append(
ext=args.ext,
lib=lib,
duration=duration,
time=float(end-start) / (len(audio_files) * repeat),
)
store.df.to_pickle("results/benchmark_%s_%s.pickle" % ("tf", args.ext))