Skip to content
This repository was archived by the owner on Feb 8, 2024. It is now read-only.

Commit 2f41afd

Browse files
Addressed review comments.
Signed-off-by: Shriya Deshmukh <[email protected]>
1 parent 0cc8c50 commit 2f41afd

File tree

2 files changed

+55
-14
lines changed

2 files changed

+55
-14
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import mmap
2+
import os
3+
4+
5+
def binarySearch(f, pos, total_size, start_time):
6+
while pos <= total_size:
7+
mid = int(pos + (total_size - 1)/2)
8+
# Check if x is present at mid
9+
# mid_slice = mm[mid:(mid+1)]
10+
# mid_slice = mid_slice.decode('utf-8').strip()
11+
mid = int(total_size/2)
12+
f.seek(mid, 0)
13+
line = f.readline()
14+
print(mid)
15+
if mid_slice == start_time:
16+
return mid
17+
# If x is greater, ignore left half
18+
elif mid_slice < start_time:
19+
pos = mid + 1
20+
print(pos)
21+
# If x is smaller, ignore right half
22+
else:
23+
total_size = mid - 1
24+
print(total_size)
25+
26+
# If we reach here, then the element
27+
# was not present
28+
return -1
29+
30+
# text=f.readlines()
31+
# data=text
32+
#print data
33+
start_time = ''
34+
end_time = ''
35+
with open('rgw-1.log', 'r') as f:
36+
# mm = mmap.mmap(f.fileno(), 0)
37+
f.seek(0, os.SEEK_END)
38+
size = f.tell()
39+
result = binarySearch(f, 0, size, start_time)
40+
if result != -1:
41+
print("Element is present at index % d" % result)
42+
else:
43+
print("Element is not present in array")

py-utils/src/utils/support_framework/log_filters.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -183,32 +183,30 @@ def limit_time(src_dir, dest_dir, duration, file_name_reg_ex,
183183
+ f"unexpected characters: {invalid_chars}")
184184

185185
include_lines_without_timestamp = False
186-
supported_file_types = ['.log', '.gz']
186+
supported_file_types = ['.log', '.txt']
187187
start_time, end_time = FilterLog._parse_duration(duration)
188188
Log.info(f'start_time = {start_time}, end_time = {end_time}')
189189
# sort files based on timestamp
190-
list_of_files = filter(lambda f: os.path.isfile(os.path.join(src_dir, f)),
191-
os.listdir(src_dir))
190+
list_of_files = filter(lambda f: os.path.isfile(
191+
os.path.join(src_dir, f)), os.listdir(src_dir))
192192
# sort the files based on last modification time in descending order
193193
list_of_files = sorted(list_of_files,
194194
key=lambda f: os.path.getmtime(os.path.join(src_dir, f)),
195195
reverse=True)
196196
for file in list_of_files:
197197
log_scope_exceeded = False
198-
is_log_written_to_file = False
199-
file_extension = pathlib.Path(file).suffix
200-
# Ignore processing of other file format.
201-
if file_extension not in supported_file_types:
202-
Log.warn(f'{file} file is skipped..')
203-
continue
198+
log_written_to_file = False
204199
op_file = os.path.join(dest_dir, 'tmp_' + file)
205200
if file.startswith(file_name_reg_ex):
206201
in_file = os.path.join(src_dir, file)
207-
if file.endswith('.gz'):
208-
# File modification time is in
202+
file_extension = pathlib.Path(file).suffix
203+
if file_extension not in supported_file_types:
209204
FilterLog._collect_rotated_log_file(
210205
in_file, dest_dir, start_time, end_time)
211206
continue
207+
# TODO: Instead of processing the file line by line,
208+
# apply binary search on file so that we can get log lines
209+
# in Log(N) complexity.
212210
with open(in_file, 'r') as fd_in, open(op_file, 'a') as fd_out:
213211
line = fd_in.readline()
214212
while (line):
@@ -220,7 +218,7 @@ def limit_time(src_dir, dest_dir, duration, file_name_reg_ex,
220218
log_time = datetime.strptime(log_duration, datetime_format)
221219
if start_time <= log_time and log_time <= end_time:
222220
include_lines_without_timestamp = True
223-
is_log_written_to_file = True
221+
log_written_to_file = True
224222
fd_out.write(line)
225223
elif log_time > end_time and include_lines_without_timestamp:
226224
include_lines_without_timestamp = False
@@ -233,11 +231,11 @@ def limit_time(src_dir, dest_dir, duration, file_name_reg_ex,
233231
# flag include_lines_without_timestamp = True
234232
except ValueError:
235233
if include_lines_without_timestamp:
236-
is_log_written_to_file = True
234+
log_written_to_file = True
237235
fd_out.write(line)
238236
line = fd_in.readline()
239237
try:
240-
if is_log_written_to_file:
238+
if log_written_to_file:
241239
final_op_file = os.path.join(dest_dir, file)
242240
os.rename(op_file, final_op_file)
243241
else:

0 commit comments

Comments
 (0)