Skip to content

Commit 9c85b7b

Browse files
committed
[update] - thêm các tệp LocFileFcg.py và thongkeNodeAndEdge.py, cập nhật hàm data_vizualization trong output.py để hỗ trợ vẽ PCA cho dữ liệu benign và malware
Fix by: pikconlonton
1 parent fadb83d commit 9c85b7b

File tree

4 files changed

+150
-9
lines changed

4 files changed

+150
-9
lines changed

core/addition/LocFileFcg.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import os
2+
3+
# Định nghĩa đường dẫn của các thư mục và tệp
4+
folder_b = '../../data_storage/processed/fcg_prune' # Thư mục chứa các file .fcg
5+
file_a = '../..data_storage/processed/constant/hash_benign2.txt' # Tệp chứa danh sách các tên file không đuôi
6+
output_file = '../..data_storage/metric_pruning/fcg_prun_benign.txt' # Tệp lưu đường dẫn của các file tìm thấy
7+
8+
# Đọc các tên file từ A.txt vào một danh sách
9+
with open(file_a, 'r') as f:
10+
file_names = [line.strip() for line in f.readlines()]
11+
12+
# Lấy tất cả các file trong folder B có đuôi .fcg
13+
fcg_files = [f for f in os.listdir(folder_b) if f.endswith('.fcg')]
14+
15+
# Lọc ra những file trong folder B mà tên không có đuôi trùng với những tên trong A.txt
16+
matched_files = []
17+
for fcg in fcg_files:
18+
file_name_without_extension = os.path.splitext(fcg)[0] # Bỏ đuôi .fcg
19+
if file_name_without_extension in file_names:
20+
matched_files.append(os.path.join(folder_b, fcg)) # Lưu đường dẫn đầy đủ
21+
22+
# Lưu đường dẫn của các file tìm thấy vào C.txt
23+
with open(output_file, 'w') as f:
24+
for file_path in matched_files:
25+
f.write(file_path + '\n')
26+
27+
print(f"Đã lưu các đường dẫn file vào tệp: {output_file}")
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import dgl
2+
import networkx as nx
3+
import numpy as np
4+
5+
malware_fcg = []
6+
malware_fcg_prune = []
7+
benign_fcg = []
8+
benign_fcg_prune = []
9+
10+
# Đọc danh sách các file từ các tệp
11+
with open('/workspace/Graph_reduce_Contrastive_Learning_ADM-master/core/metric_pruning/fcg_malware.txt', 'r') as f:
12+
malware_fcg = [line.strip() for line in f.readlines()]
13+
14+
with open('/workspace/Graph_reduce_Contrastive_Learning_ADM-master/core/metric_pruning/fcg_benign.txt', 'r') as f:
15+
benign_fcg = [line.strip() for line in f.readlines()]
16+
17+
with open('/workspace/Graph_reduce_Contrastive_Learning_ADM-master/core/metric_pruning/fcg_prun_malware.txt', 'r') as f:
18+
malware_fcg_prune = [line.strip() for line in f.readlines()]
19+
20+
with open('/workspace/Graph_reduce_Contrastive_Learning_ADM-master/core/metric_pruning/fcg_prun_benign.txt', 'r') as f:
21+
benign_fcg_prune = [line.strip() for line in f.readlines()]
22+
23+
# Hàm đọc file .fcg
24+
def loadGraph(path):
25+
g = dgl.load_graphs(path)[0][0]
26+
return g
27+
28+
def edge_node_count(graph):
29+
30+
n_nodes = []
31+
n_edges = []
32+
n_nodes.append(len(graph.nodes()))
33+
tmp = np.array(graph.edges()).tolist()
34+
n_edges.append(len(tmp[0]))
35+
return n_nodes, n_edges
36+
37+
# Khởi tạo các biến đếm
38+
sum_node_malware = 0
39+
sum_edge_malware = 0
40+
sum_node_benign = 0
41+
sum_edge_benign = 0
42+
sum_node_malware_prun = 0
43+
sum_edge_malware_prun = 0
44+
sum_node_benign_prun = 0
45+
sum_edge_benign_prun = 0
46+
# Tính tổng số node và cạnh cho malware
47+
for fcg in malware_fcg:
48+
g = loadGraph(fcg)
49+
nodes, edges = edge_node_count(g)
50+
sum_node_malware += nodes[0]
51+
sum_edge_malware += edges[0]
52+
53+
for fcg in benign_fcg:
54+
g = loadGraph(fcg)
55+
nodes, edges = edge_node_count(g)
56+
sum_node_benign += nodes[0]
57+
sum_edge_benign += edges[0]
58+
59+
for fcg in malware_fcg_prune:
60+
g = loadGraph(fcg)
61+
nodes, edges = edge_node_count(g)
62+
sum_node_malware_prun += nodes[0]
63+
sum_edge_malware_prun += edges[0]
64+
65+
for fcg in benign_fcg_prune:
66+
g = loadGraph(fcg)
67+
nodes, edges = edge_node_count(g)
68+
sum_node_benign_prun += nodes[0]
69+
sum_edge_benign_prun += edges[0]
70+
71+
# Ghi kết quả vào file A.txt
72+
with open('/workspace/Graph_reduce_Contrastive_Learning_ADM-master/core/metric_pruning/LD.txt', 'w') as f:
73+
f.write(f"Avg node malware: {sum_node_malware / len(malware_fcg)}\n")
74+
f.write(f"Avg edge malware: {sum_edge_malware / len(malware_fcg)}\n")
75+
f.write(f"Avg node benign: {sum_node_benign / len(benign_fcg)}\n")
76+
f.write(f"Avg edge benign: {sum_edge_benign / len(benign_fcg)}\n")
77+
f.write(f"Avg node malware_prune: {sum_node_malware_prun / len(malware_fcg_prune)}\n")
78+
f.write(f"Avg edge malware_prune: {sum_edge_malware_prun / len(malware_fcg_prune)}\n")
79+
f.write(f"Avg node benign_prune: {sum_node_benign_prun / len(benign_fcg_prune)}\n")
80+
f.write(f"Avg edge benign_prune: {sum_edge_benign_prun / len(benign_fcg_prune)}\n")
81+
82+

core/output/output.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,25 +87,52 @@ def list_score(types: List[str], y_preds, y_trues, path_save_metrics):
8787
write_txt(paths["metrics"], metrics)
8888

8989
@staticmethod
90-
def data_vizualization(preds, y_trues, path_save_metrics):
91-
90+
def data_vizualization(preds, y_trues, path_save_metrics, name_img_visual):
9291
os.makedirs(path_save_metrics, exist_ok=True)
92+
9393
pca = PCA(n_components=2)
9494
vectors_2d = pca.fit_transform(preds)
95+
96+
y_trues = np.array(y_trues)
97+
9598
plt.figure(figsize=(8, 6))
96-
97-
plt.scatter(vectors_2d[:, 0], vectors_2d[:, 1], c='blue', edgecolors='black')
98-
for i, (x, y) in enumerate(vectors_2d):
99-
plt.text(x, y, f"{y_trues[i]}", fontsize=8, color='red', ha='right')
10099

100+
# Benign (label 0)
101+
idx_0 = y_trues == 0
102+
plt.scatter(
103+
vectors_2d[idx_0, 0],
104+
vectors_2d[idx_0, 1],
105+
c='steelblue',
106+
label='Benign (0)',
107+
edgecolors='black',
108+
alpha=0.7
109+
)
110+
111+
# Malware (label 1)
112+
idx_1 = y_trues == 1
113+
plt.scatter(
114+
vectors_2d[idx_1, 0],
115+
vectors_2d[idx_1, 1],
116+
c='darkorange',
117+
label='Malware (1)',
118+
edgecolors='black',
119+
alpha=0.7
120+
)
121+
122+
# Trục & tiêu đề
101123
plt.axhline(0, color='gray', linewidth=0.5)
102124
plt.axvline(0, color='gray', linewidth=0.5)
103125
plt.xlabel("PCA Component 1")
104126
plt.ylabel("PCA Component 2")
105-
plt.title("2D Visualization of High-Dimensional Vectors")
127+
plt.title("PCA Visualization of Benign and Malware Samples")
106128
plt.grid(True)
107-
path_save_image = f"{path_save_metrics}/pca_cl.png"
129+
130+
# Chú thích ở góc trên bên phải
131+
plt.legend(loc='upper right', title='Class Labels')
132+
133+
path_save_image = f"{path_save_metrics}/{name_img_visual}.png"
108134
plt.savefig(path_save_image, dpi=300, bbox_inches='tight')
135+
plt.close()
109136

110137
# Mã thử nghiệm
111138
# pred = [1, 1, 0]

core/trainer/trainer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import pathlib
44
import time
5+
import numpy as np
56
import torch
67
import logging
78
from omegaconf import OmegaConf
@@ -130,6 +131,7 @@ def test_engine(config, test_dataloader):
130131
progress_loader = tqdm(test_dataloader, file=open(os.devnull, "w"))
131132
preds = []
132133
trusts = []
134+
h_list = []
133135

134136
# time start
135137
start_time = time.time()
@@ -146,7 +148,8 @@ def test_engine(config, test_dataloader):
146148
ans_batch = ans.argmax(dim=1).long()
147149
preds.extend(ans_batch.cpu().tolist())
148150
trusts.extend(labels.cpu().tolist())
149-
151+
h_list.extend(h.cpu().numpy()) # lưu embedding
152+
embeddings = np.array(h_list, dtype=np.float32)
150153
end_time = time.time()
151154
elapsed_time = end_time - start_time
152155

@@ -163,6 +166,8 @@ def test_engine(config, test_dataloader):
163166
y_preds=preds,
164167
y_trues=trusts,
165168
path_save_metrics=config.training.path_save_metrics)
169+
logger.info(f"Plot points of data")
170+
ScoreOutput.data_vizualization(embeddings, trusts, config.training.path_save_metrics, "pca_benign_malware")
166171
logger.info(f"Completed Testing model with output:\n\
167172
\tPath to model: {config.training.path_save_metrics}")
168173

0 commit comments

Comments
 (0)