Skip to content

Commit cfe4b6b

Browse files
authored
Merge pull request #263 from texadactyl/master
Improve CPU utilisation in turbo_seti use of blank_dc function
2 parents 148e41c + 24c5cc2 commit cfe4b6b

File tree

7 files changed

+68
-15
lines changed

7 files changed

+68
-15
lines changed

VERSION-HISTORY.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ This file is a version history of turbo_seti amendments, beginning with version
44

55
| `YYYY_MM_DD` | `Version` | `Contents` |
66
| :--: | :--: | :-- |
7+
| 2021-07-18 | 2.1.7 | Create a turbo_seti clone of blank_dc that is optional and uses a different strategy (issue #262). |
78
| 2021-07-15 | 2.1.6 | Calculate normalized value inside hitsearch kernel on GPU-mode. |
8-
| 2021-07-16 | 2.1.5 | Failed to pass the gpu_id from find_doppler.py to data_handler.py. (issue #254). |
9+
| 2021-07-16 | 2.1.5 | Failed to pass the gpu_id from find_doppler.py to data_handler.py (issue #254). |
910
| 2021-07-15 | 2.1.4 | Add GPU device selection with cli argument gpu_id. (issue #254). |
1011
| 2021-07-15 | 2.1.3 | Diagnose out of range time steps with correct messages (issue #256). |
1112
| | | Also, stop catching exceptions in seti_event.py which causes a cascade in tracebacks. |

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from setuptools import setup, find_packages
1111

12-
__version__ = "2.1.6"
12+
__version__ = "2.1.7"
1313

1414
with open("turbo_seti/find_doppler/turbo_seti_version.py", "w") as fh:
1515
fh.write("TURBO_SETI_VERSION = '{}'\n".format(__version__))

test/test_turbo_seti.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,14 @@ def test_turboSETI_entry_point():
218218
h5_5 = os.path.join(TESTDIR, OFFNIL_H5)
219219
args = [h5_5, "-P", "y", "-s", str(MIN_SNR), "-M", str(MAX_DRIFT), "-o", TESTDIR, ]
220220
seti_event.main(args)
221+
print("\n===== test_turboSETI_entry_point 6 =====")
222+
h5_5 = os.path.join(TESTDIR, VOYAH5)
223+
args = [h5_5, "--blank_dc", "y", "-s", str(MIN_SNR), "-M", str(MAX_DRIFT), "-o", TESTDIR, ]
224+
seti_event.main(args)
225+
print("\n===== test_turboSETI_entry_point 7 =====")
226+
h5_5 = os.path.join(TESTDIR, VOYAH5)
227+
args = [h5_5, "--blank_dc", "n", "-s", str(MIN_SNR), "-M", str(MAX_DRIFT), "-o", TESTDIR, ]
228+
seti_event.main(args)
221229

222230

223231
def test_make_waterfall_plots():
@@ -350,3 +358,4 @@ def test_flipx_kernel(kernels):
350358
if __name__ == "__main__":
351359
print("Please run: pytest test_turbo_seti.py")
352360
test_find_doppler_voyager()
361+
test_turboSETI_entry_point()

turbo_seti/find_doppler/data_handler.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from blimpy import Waterfall
1212
from blimpy.io import sigproc
1313

14+
from .helper_functions import cut_the_mid_spike
1415
from .kernels import Kernels
1516

1617
logger = logging.getLogger('data_handler')
@@ -249,7 +250,7 @@ def __init__(self, filename, f_start=None, f_stop=None, t_start=None, t_stop=Non
249250
self.shoulder_size = 0
250251
self.tdwidth = self.fftlen + self.shoulder_size * self.tsteps
251252

252-
def load_data(self):
253+
def load_data(self, flag_blank_dc=True):
253254
r"""
254255
Read the spectra and drift indices from file.
255256
@@ -261,11 +262,16 @@ def load_data(self):
261262
self.fil_file.read_data(f_start=self.f_start, f_stop=self.f_stop)
262263

263264
#Blanking DC bin.
264-
if self.n_coarse_chan is not None:
265-
n_coarse_chan = self.n_coarse_chan
265+
if flag_blank_dc:
266+
logger.debug("blank_dc is enabled.")
267+
if self.n_coarse_chan is not None:
268+
n_coarse_chan = self.n_coarse_chan
269+
else:
270+
n_coarse_chan = int(self.fil_file.calc_n_coarse_chan())
271+
cut_the_mid_spike(self.fil_file.data, n_coarse_chan)
266272
else:
267-
n_coarse_chan = int(self.fil_file.calc_n_coarse_chan())
268-
self.fil_file.blank_dc(n_coarse_chan)
273+
logger.debug("blank_dc is disabled.")
274+
269275

270276
dim_time = self.fil_file.data.shape[0]
271277
if dim_time < 2:

turbo_seti/find_doppler/find_doppler.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,13 @@ class FindDoppler:
7373
Append output DAT & LOG files? (True/False)
7474
log_level_int : int, optional
7575
Python logging threshold level (INFO, DEBUG, or WARNING)
76+
blank_dc : bool, optional
77+
Use blank_dc() for spike smoothing.
7678
7779
"""
7880
def __init__(self, datafile, max_drift=4.0, min_drift=0.00001, snr=25.0, out_dir='./', coarse_chans=None,
7981
obs_info=None, flagging=False, n_coarse_chan=None, kernels=None, gpu_backend=False, gpu_id=0,
80-
precision=2, append_output=False, log_level_int=logging.INFO):
82+
precision=2, append_output=False, log_level_int=logging.INFO, blank_dc=True):
8183

8284
print(version_announcements)
8385

@@ -113,10 +115,11 @@ def __init__(self, datafile, max_drift=4.0, min_drift=0.00001, snr=25.0, out_dir
113115
self.status = True
114116
self.flagging = flagging
115117
self.append_output = append_output
118+
self.flag_blank_dc = blank_dc
116119
self.parms = 'datafile={}, max_drift={}, min_drift={}, snr={}, out_dir={}, coarse_chans={}' \
117120
.format(datafile, max_drift, min_drift, snr, out_dir, coarse_chans) \
118-
+ ', flagging={}, n_coarse_chan={}, kernels={}, gpu_id={}, gpu_backend={}' \
119-
.format(flagging, self.n_coarse_chan, kernels, gpu_id, gpu_backend) \
121+
+ ', flagging={}, n_coarse_chan={}, kernels={}, gpu_id={}, gpu_backend={}, blank_dc={}' \
122+
.format(flagging, self.n_coarse_chan, kernels, gpu_id, gpu_backend, blank_dc) \
120123
+ ', precision={}, append_output={}, log_level_int={}, obs_info={}' \
121124
.format(precision, append_output, log_level_int, obs_info)
122125
if min_drift < 0 or max_drift < 0:
@@ -166,7 +169,6 @@ def search(self, n_partitions=1, progress_bar='n'):
166169

167170
# As of 2.1.0, add max_drift_rate and obs_length to FileWriter header input
168171
header_in['max_drift_rate'] = self.max_drift
169-
#header_in['obs_length'] was already set in data_handler.py DATAH __init__
170172

171173
wfilename = filename_in.split('/')[-1].replace('.h5', '').replace('.fits', '').replace('.fil', '')
172174
path_log = '{}/{}.log'.format(self.out_dir.rstrip('/'), wfilename)
@@ -198,7 +200,7 @@ def search(self, n_partitions=1, progress_bar='n'):
198200

199201
# Run serial version
200202
if n_partitions == 1:
201-
sched = Scheduler(load_the_data, [ (l, self.kernels.precision) for l in self.data_handle.data_list ])
203+
sched = Scheduler(load_the_data, [ (l, self.kernels.precision, self.flag_blank_dc) for l in self.data_handle.data_list ])
202204
for dl in self.data_handle.data_list:
203205
search_coarse_channel(dl, self, dataloader=sched, filewriter=filewriter, logwriter=logwriter)
204206
# Run Parallel version via dask
@@ -216,15 +218,15 @@ def search(self, n_partitions=1, progress_bar='n'):
216218
t1 = time.time()
217219
self.last_logwriter(path_log, '\n===== Search time: {:.2f} minutes'.format((t1 - t0)/60.0))
218220

219-
def load_the_data(d, precision):
221+
def load_the_data(d, precision, flag_blank_dc):
220222
data_obj = DATAH5(d['filename'],
221223
f_start=d['f_start'],
222224
f_stop=d['f_stop'],
223225
coarse_chan=d['coarse_chan'],
224226
n_coarse_chan=d['n_coarse_chan'],
225227
gpu_backend=False,
226228
precision=precision)
227-
spectra, drift_indices = data_obj.load_data()
229+
spectra, drift_indices = data_obj.load_data(flag_blank_dc=flag_blank_dc)
228230
data_obj.close()
229231

230232
return (data_obj, spectra, drift_indices)
@@ -277,7 +279,7 @@ def search_coarse_channel(data_dict, find_doppler_instance, dataloader=None, log
277279
if dataloader:
278280
data_obj, spectra, drift_indices = dataloader.get()
279281
else:
280-
data_obj, spectra, drift_indices = load_the_data(d, fd.kernels.precision)
282+
data_obj, spectra, drift_indices = load_the_data(d, fd.kernels.precision, fd.flag_blank_dc)
281283

282284
fileroot_out = filename_in.split('/')[-1].replace('.h5', '').replace('.fits', '').replace('.fil', '')
283285
if logwriter is None:

turbo_seti/find_doppler/helper_functions.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,38 @@
11
#!/usr/bin/env python
22

33
import numpy as np
4+
import logging
5+
6+
7+
def cut_the_mid_spike(data_array, n_coarse_chan):
8+
""" Cut the mid-point spike in coarse channels.
9+
10+
Removes the DC spike in the centre of each coarse channel bin.
11+
12+
Parameters
13+
----------
14+
data_array : ndarray
15+
Full data array.
16+
n_coarse_chan : int
17+
Number of coarse channels.
18+
"""
19+
logger = logging.getLogger('cut_the_mid_spike')
20+
if not type(n_coarse_chan) != "int":
21+
logger.error("Number of coarse channels is not an integer, no action taken!")
22+
return
23+
if n_coarse_chan < 1:
24+
logger.error = "Number of coarse channels < 1, no action taken!"
25+
return
26+
27+
n_fine_chan = data_array.shape[-1]
28+
n_fine_chan_per_coarse_chan = int(n_fine_chan / n_coarse_chan) # ratio of fine channels to coarse channels
29+
30+
mid_chan = int(n_fine_chan_per_coarse_chan / 2)
31+
32+
for ii in range(n_coarse_chan):
33+
ss = ii * n_fine_chan_per_coarse_chan
34+
# Replace the mid point value with the neighbour's value.
35+
data_array[..., ss + mid_chan] = data_array[..., ss + mid_chan + 1]
436

537

638
def chan_freq(header, fine_channel, tdwidth, ref_frame):

turbo_seti/find_doppler/seti_event.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ def main(args=None):
5454
help='Use a progress bar with dask? (y/n)')
5555
p.add_argument('-g', '--gpu', dest='flag_gpu', type=str, default='n',
5656
help='Compute on the GPU? (y/n)')
57+
p.add_argument('-z', '--blank_dc', dest='flag_blank_dc', type=str, default='y',
58+
help='Smooth out the DC spike? (y/n)')
5759
p.add_argument('-d', '--gpu_id', dest='gpu_id', type=int, default=0,
5860
help='Use which GPU device? (0,1,...)')
5961
p.add_argument('-P', '--profile', dest='flag_profile', type=str, default='n',
@@ -132,6 +134,7 @@ def exec_proc(args):
132134
n_coarse_chan=args.n_coarse_chan,
133135
gpu_backend=(args.flag_gpu == "y"),
134136
gpu_id=args.gpu_id,
137+
blank_dc=(args.flag_blank_dc == "y"),
135138
precision=1 if args.flag_single_precision == "y" else 2,
136139
log_level_int=log_level_int)
137140

0 commit comments

Comments
 (0)