Skip to content

Commit ce5ec74

Browse files
authored
Merge pull request #3 from ling6614/feature/add-file-processing-tests
Feature/add file processing tests
2 parents 81b6400 + c0d4047 commit ce5ec74

File tree

3 files changed

+98
-2
lines changed

3 files changed

+98
-2
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,39 @@
11
# Excel parser module
2+
import pandas as pd
3+
import json
4+
import os
5+
6+
class ExcelConverter:
7+
def __init__(self, file_path):
8+
self.file_path = file_path
9+
self.data_frame = pd.read_excel(file_path, engine='openpyxl')
10+
11+
def to_json(self, chunk_size=5):
12+
json_list = []
13+
for index, row in self.data_frame.iterrows():
14+
row_dict = row.to_dict()
15+
for key, value in row_dict.items():
16+
if isinstance(value, pd.Timestamp):
17+
row_dict[key] = value.strftime('%Y-%m-%d %H:%M:%S')
18+
json_list.append(row_dict)
19+
20+
chunked_list = [json_list[i:i + chunk_size] for i in range(0, len(json_list), chunk_size)]
21+
json_string_list = [json.dumps(chunk, ensure_ascii=False) for chunk in chunked_list]
22+
return json_string_list
23+
24+
# TODO: Implement to_yaml method
25+
# def to_yaml(self):
26+
# data = self.data_frame.to_dict(orient='records')
27+
# return yaml.dump(data, allow_unicode=True)
28+
29+
# TODO: Implement to_xml method
30+
# def to_xml(self):
31+
# root = ET.Element("root")
32+
# for _, row in self.data_frame.iterrows():
33+
# item = ET.SubElement(root, "item")
34+
# for key, value in row.items():
35+
# if isinstance(value, pd.Timestamp):
36+
# value = value.strftime('%Y-%m-%d %H:%M:%S')
37+
# child = ET.SubElement(item, key)
38+
# child.text = str(value)
39+
# return ET.tostring(root, encoding='unicode')
Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,17 @@
1-
# JSON validator module
1+
# Json validator module
2+
3+
import json
4+
5+
def remove_keys_from_json(json_string, keys_to_remove):
6+
# 解析JSON字符串为列表
7+
data_list = json.loads(json_string)
8+
9+
# 遍历列表中的每个字典
10+
for data in data_list:
11+
for key_list in keys_to_remove:
12+
for key in key_list:
13+
if key in data:
14+
del data[key]
15+
16+
# 将列表转换回JSON字符串
17+
return json.dumps(data_list, ensure_ascii=False)

tests/test_file_processing.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,43 @@
1-
# Test file processing
1+
import unittest
2+
import json
3+
import os
4+
import pandas as pd
5+
from ai_commons.file_processing.excel_parser import ExcelConverter
6+
from ai_commons.file_processing.json_validator import remove_keys_from_json
7+
8+
class TestExcelConverter(unittest.TestCase):
9+
10+
def setUp(self):
11+
# 创建一个示例Excel文件
12+
self.test_file = 'test.xlsx'
13+
df = pd.DataFrame({
14+
'Timestamp': [pd.Timestamp('2024-04-07 20:50:30'), pd.Timestamp('2024-05-08 15:30:45')],
15+
'User ID': ['U12345', 'U67890'],
16+
'Gift': ['Flower', 'Chocolate']
17+
})
18+
df.to_excel(self.test_file, index=False)
19+
20+
def tearDown(self):
21+
# 删除示例Excel文件
22+
if os.path.exists(self.test_file):
23+
os.remove(self.test_file)
24+
25+
def test_to_json(self):
26+
converter = ExcelConverter(self.test_file)
27+
json_output = converter.to_json(chunk_size=2)
28+
expected_output = [
29+
'[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]'
30+
]
31+
self.assertEqual(json_output, expected_output)
32+
33+
class TestJsonValidator(unittest.TestCase):
34+
35+
def test_remove_keys_from_json(self):
36+
json_string = '[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]'
37+
keys_to_remove = [["Timestamp"], ["User ID"]]
38+
modified_json_string = remove_keys_from_json(json_string, keys_to_remove)
39+
expected_output = '[{"Gift": "Flower"}, {"Gift": "Chocolate"}]'
40+
self.assertEqual(modified_json_string, expected_output)
41+
42+
if __name__ == '__main__':
43+
unittest.main()

0 commit comments

Comments
 (0)