Monday, January 05, 2026
Python code of Leiden Communities on Japanese eel ovarian transcriptome
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import seaborn as sns
import requests
import time
from pathlib import Path
from typing import List, Tuple, Optional, Dict
from gseapy import enrichr
import warnings
warnings.filterwarnings('ignore')
# --- 1. CNA 核心分析類別 ---
class CNA_Dual_Analyzer:
def __init__(self, species: int = 9606, max_api_genes: int = 200):
self.species = species
self.max_api_genes = max_api_genes
self.api_url = "https://string-db.org/api/json/network"
def _safe_string_api(self, genes: List[str]) -> pd.DataFrame:
if len(genes) < 2: return pd.DataFrame()
batch_genes = genes[:self.max_api_genes]
params = {
"identifiers": "%0d".join(batch_genes),
"species": self.species,
"caller_identity": "cna_v2_1"
}
try:
resp = requests.post(self.api_url, data=params, timeout=30)
resp.raise_for_status()
data = resp.json()
if not data: return pd.DataFrame()
df = pd.DataFrame(data)
return df[['preferredName_A', 'preferredName_B', 'score']].dropna()
except Exception as e:
print(f"⚠️ STRING API失敗: {e}")
return pd.DataFrame()
def compute_cna_metrics(self, cluster_data: pd.DataFrame, at_cols: List[str], mt_cols: List[str]) -> pd.DataFrame:
metrics = []
for _, row in cluster_data.iterrows():
gene = str(row.get('Gene_symbol', row.index[0])).strip().upper()
if not gene or gene in ['NAN', 'NONE', '']: continue
mean_at = row[at_cols].mean() if at_cols else 0.0
mean_mt = row[mt_cols].mean() if mt_cols else 0.0
asinh_diff = np.arcsinh(mean_mt) - np.arcsinh(mean_at)
log2fc = np.log2((mean_mt + 1e-6) / (mean_at + 1e-6))
metrics.append({
'Gene': gene, 'Mean_AT': float(mean_at), 'Mean_MT': float(mean_mt),
'asinh_Diff': float(asinh_diff), 'Log2FC': float(log2fc),
'Expr_Change': 'UP' if asinh_diff > 0 else 'DOWN'
})
return pd.DataFrame(metrics)
def build_control_network(self, genes: List[str], metrics_df: pd.DataFrame) -> Tuple[Optional[nx.Graph], pd.DataFrame]:
ppi_df = self._safe_string_api(genes)
if ppi_df.empty: return None, pd.DataFrame()
G = nx.Graph()
for _, row in ppi_df.iterrows():
G.add_edge(row['preferredName_A'], row['preferredName_B'], weight=float(row['score']) / 1000)
centrality = nx.degree_centrality(G)
results = []
for node in G.nodes():
gene_metrics = metrics_df[metrics_df['Gene'] == node]
asinh_diff = gene_metrics['asinh_Diff'].iloc[0] if not gene_metrics.empty else 0.0
ctrl_score = centrality.get(node, 0) * (abs(asinh_diff) + 1)
results.append({
'Gene': node, 'Control_Score': ctrl_score, 'Centrality': centrality.get(node, 0),
'asinh_Diff': asinh_diff, 'Log2FC': gene_metrics['Log2FC'].iloc[0] if not gene_metrics.empty else 0.0,
'Expr_Change': gene_metrics['Expr_Change'].iloc[0] if not gene_metrics.empty else 'NONE'
})
stats_df = pd.DataFrame(results).sort_values('Control_Score', ascending=False)
return G, stats_df
# --- 2. 視覺化類別 (整合 CNetplot) ---
class NetworkVisualizer:
@staticmethod
def create_cna_network(G, stats_df, output_path, cluster_id):
"""核心 PPI 控制網絡圖"""
if G is None or stats_df.empty: return
plt.figure(figsize=(12, 10), dpi=100)
pos = nx.spring_layout(G, k=0.3, iterations=50)
sizes = 100 + stats_df.set_index('Gene').reindex(G.nodes())['Control_Score'] * 2000
colors = stats_df.set_index('Gene').reindex(G.nodes())['asinh_Diff']
nx.draw_networkx_nodes(G, pos, node_size=sizes, node_color=colors, cmap='RdYlBu_r', alpha=0.8)
nx.draw_networkx_edges(G, pos, alpha=0.2)
top_genes = stats_df.head(20)['Gene'].tolist()
nx.draw_networkx_labels(G, pos, {n: n for n in G.nodes() if n in top_genes}, font_size=8)
plt.title(f"CNA Network: Cluster {cluster_id}")
plt.savefig(output_path, bbox_inches='tight')
plt.close()
@staticmethod
def create_cnet_plot(stats_df: pd.DataFrame, output_path: Path, cluster_id: str):
"""生成 CNetplot (Gene-Pathway Network)"""
print(f"🧬 正在為 Cluster {cluster_id} 生成 CNetplot...")
genes = stats_df.head(100)['Gene'].tolist() # 取控制力前100的基因
try:
enr = enrichr(gene_list=genes, gene_sets='KEGG_2021_Human', organism='Human', outdir=None)
res = enr.results.nsmallest(5, 'Adjusted P-value') # 取前5個通路
if res.empty: return
G = nx.Graph()
# 建立基因與通路的連接
gene_colors = dict(zip(stats_df['Gene'], stats_df['asinh_Diff']))
for _, row in res.iterrows():
pathway = row['Term'].split('__')[-1]
G.add_node(pathway, type='pathway')
path_genes = row['Genes'].split(';')
for g in path_genes:
if g in genes:
G.add_node(g, type='gene')
G.add_edge(pathway, g)
plt.figure(figsize=(14, 10))
pos = nx.spring_layout(G, k=0.5)
# 繪製路徑節點 (大橘色)
path_nodes = [n for n, d in G.nodes(data=True) if d['type'] == 'pathway']
nx.draw_networkx_nodes(G, pos, nodelist=path_nodes, node_color='#FF7F0E',
node_size=1200, node_shape='p', label='Pathways')
# 繪製基因節點 (顏色代表 asinh_Diff)
gene_nodes = [n for n, d in G.nodes(data=True) if d['type'] == 'gene']
g_colors = [gene_colors.get(n, 0) for n in gene_nodes]
g_sizes = [stats_df[stats_df['Gene']==n]['Control_Score'].iloc[0]*800 + 100 for n in gene_nodes]
nodes = nx.draw_networkx_nodes(G, pos, nodelist=gene_nodes, node_color=g_colors,
node_size=g_sizes, cmap='RdYlBu_r', label='Genes')
nx.draw_networkx_edges(G, pos, alpha=0.3, edge_color='gray')
nx.draw_networkx_labels(G, pos, font_size=9, font_weight='bold')
plt.colorbar(nodes, label='asinh Difference')
plt.title(f"CNetplot: Top Pathways & Control Genes (Cluster {cluster_id})")
plt.legend(scatterpoints=1)
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
except Exception as e:
print(f"⚠️ CNetplot失敗: {e}")
# --- 3. 主程序 ---
def main_pipeline(excel_path: Path):
output_dir = Path.home() / "Desktop" / "CNA_Analysis_CNet"
output_dir.mkdir(exist_ok=True)
df = pd.read_excel(excel_path)
at_cols = [c for c in df.columns if 'AT' in str(c).upper()]
mt_cols = [c for c in df.columns if 'MT' in str(c).upper()]
gene_col = next(c for c in ['Gene_symbol', 'GeneID'] if c in df.columns)
analyzer = CNA_Dual_Analyzer()
visualizer = NetworkVisualizer()
clusters = sorted(df['leiden'].unique().astype(str))
for cid in clusters:
print(f"\n🚀 處理 Cluster {cid}...")
cluster_df = df[df['leiden'].astype(str) == cid]
cluster_dir = output_dir / f"Cluster_{cid}"
cluster_dir.mkdir(exist_ok=True)
# 1. 計算指標
metrics_df = analyzer.compute_cna_metrics(cluster_df, at_cols, mt_cols)
# 2. 構建 CNA 網絡
genes = metrics_df['Gene'].tolist()
G, stats_df = analyzer.build_control_network(genes, metrics_df)
if not stats_df.empty:
stats_df.to_excel(cluster_dir / "CNA_Report.xlsx", index=False)
# 3. 繪製 PPI 控制圖
visualizer.create_cna_network(G, stats_df, cluster_dir / "CNA_Network.png", cid)
# 4. 繪製 CNetplot (路徑-基因交互圖)
visualizer.create_cnet_plot(stats_df, cluster_dir / "CNetplot.png", cid)
time.sleep(1)
if __name__ == "__main__":
file_path = Path.home() / "Desktop" / "Aja 21649_LeidCom_clustered.xlsx"
main_pipeline(file_path)
Python coding of AUTOENCODER on Japanese eel ovarian transcriptome features-selection
import scanpy as sc
import anndata as ad
import numpy as np0
import torch
import torch.nn as nn
import torch.optim as optim
# 1. 讀取原始檔案
AData = sc.read_h5ad("6ovaries_analysis_results.h5ad")
adata = sc.read_h5ad("6ovaries_analysis_results.h5ad")
# 2. 檢查目前結構
# 如果讀進來已經是 6 x 20899,就不要執行 .T
# 如果讀進來是 20899 x 6,才執行一次 .T 把它轉正
if adata.n_obs > adata.n_vars:
adata = adata
print(f"數據維度: {adata.n_obs} 樣本, {adata.n_vars} 基因")
print("目前的 obs 欄位:", adata.obs.columns)
# 標準化與對數轉換 (深度學習對數值範圍敏感)
sc.pp.log1p(adata)
sc.pp.scale(adata, max_value=10) # 建議增加 scale 步驟
# 轉換為 Tensor
X = adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X
X_tensor = torch.FloatTensor(X)
# 2. 定義自編碼器模型
class Autoencoder(nn.Module):
def __init__(self, input_dim, latent_dim=2):
super(Autoencoder, self).__init__()
# 編碼器:降低維度
self.encoder = nn.Sequential(
nn.Linear(input_dim, 128),
nn.ReLU(),
nn.Linear(128, latent_dim),
)
# 解碼器:重建原始維度
self.decoder = nn.Sequential(
nn.Linear(latent_dim, 128),
nn.ReLU(),
nn.Linear(128, input_dim),
)
def forward(self, x):
latent = self.encoder(x)
reconstructed = self.decoder(latent)
return reconstructed, latent
# 3. 初始化模型、損失函數與優化器
input_dim = X.shape[1]
latent_dim = 1000 # 只有 6 個樣本,建議將隱藏層設小 (如 2),方便可視化
model = Autoencoder(input_dim, latent_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# 4. 訓練模型
epochs = 1000
model.train()
for epoch in range(epochs):
# 前向傳播
reconstructed, latent = model(X_tensor)
loss = criterion(reconstructed, X_tensor)
# 反向傳播與優化
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 20 == 0:
print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
# 5. 提取隱藏層特徵 (Latent Space)
model.eval()
with torch.no_grad():
_, latent_features = model(X_tensor)
# 將訓練好的特徵存回 AnnData 的 obsm 中,方便 Scanpy 繪圖
adata.obsm['X_ae'] = latent_features.numpy()
# 6. 可視化
sc.pl.scatter(adata, basis='ae', color='treatment', title="Autoencoder Latent Space")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
# 1. 提取編碼器第一層的權重 (Shape: 64 x 20899)
# model.encoder[0] 對應 nn.Linear(input_dim, 64)
weights = model.encoder[0].weight.detach().numpy()
# 2. 計算每個基因(column)的綜合影響力得分
# 我們取絕對值的總和,因為正向和負向權重都代表該基因具有資訊量
gene_importance = np.sum(np.abs(weights), axis=0)
# 3. 建立 DataFrame 並與基因名稱對應
importance_df = pd.DataFrame({
'gene_symbol': adata.var_names,
'importance_score': gene_importance
})
print("-^-" * 30)
# --- 1. 計算統計基準 ---
mean_score = gene_importance.mean()
std_score = gene_importance.std()
suggested_cutoff = mean_score + 2 * std_score
print(f"\n[統計資訊]")
print(f"平均得分 (Mean): {mean_score:.4f}")
print(f"標準差 (Std): {std_score:.4f}")
print(f"建議門檻值 (Mean + 2SD): {suggested_cutoff:.4f}")
print("-" * 30)
# --- 2. 暫停並等待輸入 ---
user_input = input(f"請輸入您想要的 Cutoff 門檻值 (直接按 Enter 則使用建議值 {suggested_cutoff:.4f}): ")
# 判斷輸入邏輯
try:
if user_input.strip() == "":
cutoff = suggested_cutoff
else:
cutoff = float(user_input)
except ValueError:
print("輸入格式錯誤,將自動使用建議門檻值。")
cutoff = suggested_cutoff
print(f"\n確認使用 Cutoff = {cutoff:.4f} 進行篩選...")
plt.figure(figsize=(8, 4))
sns.histplot(importance_df['importance_score'], bins=100, kde=True)
plt.axvline(cutoff, color='red', linestyle='--', label='Mean + 2SD')
plt.title("Distribution of Gene Importance Scores")
plt.legend()
plt.show()
# --- 3. 執行篩選 ---
# 標記是否顯著
importance_df['is_significant'] = importance_df['importance_score'] > cutoff
# 挑選出符合條件的基因並排序
top_genes_df = importance_df[importance_df['is_significant']].sort_values(
by='importance_score', ascending=False
)
# --- 4. 顯示結果 ---
num_sig = len(top_genes_df)
print(f"篩選完成!共找到 {num_sig} 個顯著基因。")
print('============================'*2)
print("--- Autoencoder 識別出的最具影響力基因 --- 基因數目 =", len(top_genes_df))
print(top_genes_df)
import matplotlib.pyplot as plt
# 5. 可視化:將這些重要基因的得分畫出來
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.barplot(data=top_genes_df, x='importance_score', y='gene_symbol', palette='viridis')
plt.title(f"Influential Genes identified by AE, n = {num_sig}")
plt.xlabel("Cumulative Absolute Weights")
plt.show()
results_dict = {}
for i in range(1, 3): # 跑兩次
print(f"\n>>> 開始第 {i} 輪訓練...")
# 這裡放入你原本的代碼:定義模型、訓練、計算 importance...
# (為了簡潔,假設你已將 AE 訓練邏輯包裝成一個函數或直接放在這裡)
# 訓練與篩選邏輯結束後:
current_genes = set(importance_df[importance_df['importance_score'] > cutoff]['gene_symbol'])
results_dict[f'run_{i}'] = current_genes
print(f"第 {i} 輪完成,篩選出 {len(current_genes)} 個基因")
# 最後提取出來
genes_run1 = results_dict['run_1']
genes_run2 = results_dict['run_2']
# 假設第一次篩選出的基因清單為 genes_run1,第二次為 genes_run2
# 這裡根據你提供的 top_genes_df 邏輯提取基因名稱
# 1. 計算交集 (Intersection)
stable_genes = genes_run1.intersection(genes_run2)
# 2. 計算聯集 (Union)
all_detected_genes = genes_run1.union(genes_run2)
# 3. 計算重合率 (Jaccard Index)
jaccard_index = len(stable_genes) / len(all_detected_genes)
print("\n" + "="*40)
print("基因穩定性分析結果")
print("="*40)
print(f"第一次運行基因數: {len(genes_run1)}")
print(f"第二次運行基因數: {len(genes_run2)}")
print(f"真正穩定的基因數 (交集): {len(stable_genes)}")
print(f"重合率 (Jaccard Index): {jaccard_index:.2%}")
# 4. 顯示前 10 個最穩定的基因
print("\n前 10 個穩定基因範例:")
print(list(stable_genes)[:10])
# 5. 存檔:將穩定基因導出
pd.DataFrame({'stable_genes': list(stable_genes)}).to_csv("stable_genes_intersection.csv", index=False)
print("\n已將穩定基因清單存至: stable_genes_intersection.csv")
import pandas as pd
import gseapy as gp
import networkx as nx
import matplotlib.pyplot as plt
from pathlib import Path
# --- 1. 路徑設定 ---
desktop = Path.home() / "Desktop"
stable_csv = Path("stable_genes_intersection.csv") # AE 產出的檔案
excel_source = desktop / "Aja 21649_LeidCom_clustered.xlsx"
output_dir = desktop / "Autoncoder_Enrichment_Results"
output_dir.mkdir(exist_ok=True)
# --- 2. 數據讀取與映射 (Mapping) ---
print("🔍 正在進行基因映射...")
try:
# 讀取 AE 篩選出的穩定基因 (假設欄位名稱為 stable_genes)
ae_genes = pd.read_csv(stable_csv)['stable_genes'].tolist()
# 讀取原始含有註解的 Excel
df_ref = pd.read_excel(excel_source)
# 進行映射:在 Excel 中找尋 Transcriptid 存在於 ae_genes 清單中的行
# 注意:請確保兩者的名稱格式一致
mapped_df = df_ref[df_ref['Transcriptid'].isin(ae_genes)].copy()
# 提取 Gene_symbol 用於富集分析 (去除空值與重複)
gene_list = mapped_df['Gene_symbol'].replace('#N/A', pd.NA).dropna().unique().tolist()
print(f"✅ 映射完成!AE 篩選 {len(ae_genes)} 個 ID,成功對應到 {len(gene_list)} 個有效基因。")
except Exception as e:
print(f"❌ 讀取或映射失敗: {e}")
exit()
# --- 3. 基因富集分析 (Enrichr) ---
print("🚀 執行 KEGG 富集分析...")
try:
enr = gp.enrichr(gene_list=gene_list,
gene_sets=['KEGG_2021_Human'], # 或選取其他適合的物種資料庫
organism='Human',
cutoff=0.5)
res = enr.results
# 儲存完整富集結果
res.to_excel(output_dir / "AE_Stable_Genes_Enrichment.xlsx", index=False)
# 篩選出顯著的通路 (Adj P < 0.05) 作為繪圖對象
top_paths = res[res['Adjusted P-value'] < 0.1].head(10) # 取前 10 名
if top_paths.empty:
print("⚠️ 無顯著富集通路,調整門檻取前 5 名展示...")
top_paths = res.nsmallest(10, 'Adjusted P-value')
except Exception as e:
print(f"❌ 富集分析失敗: {e}")
exit()
# --- 4. 繪製 Cnetplot (Gene-Concept Network) ---
print("🎨 正在生成 Cnetplot...")
try:
G = nx.Graph()
pathway_label_map = {}
for _, row in top_paths.iterrows():
pathway = row['Term']
p_val = row['Adjusted P-value']
pathway_label_map[pathway] = f"{pathway}\n(p={p_val:.2e})"
# 獲取該通路中的基因並與我們的清單取交集
path_genes = str(row['Genes']).split(';')
valid_genes = [g for g in path_genes if g in gene_list]
# 加入通路節點
G.add_node(pathway, node_type='pathway', size=len(valid_genes))
# 加入基因節點與邊 (限制顯示數量避免畫面太亂)
for gene in valid_genes[:15]:
G.add_node(gene, node_type='gene')
G.add_edge(pathway, gene)
# 繪圖佈局
plt.figure(figsize=(15, 12))
pos = nx.spring_layout(G, k=0.5, iterations=50, seed=42)
# 分別繪製通路與基因
path_nodes = [n for n, a in G.nodes(data=True) if a.get('node_type') == 'pathway']
gene_nodes = [n for n, a in G.nodes(data=True) if a.get('node_type') == 'gene']
# 繪製邊
nx.draw_networkx_edges(G, pos, edge_color='#dfe6e9', alpha=0.5)
# 繪製 Pathway 節點 (紅圈)
sizes = [G.nodes[n]['size'] * 500 for n in path_nodes]
nx.draw_networkx_nodes(G, pos, nodelist=path_nodes, node_color='#ff7675',
node_size=sizes, edgecolors='black', label='Pathways')
# 繪製 Gene 節點 (藍圈)
nx.draw_networkx_nodes(G, pos, nodelist=gene_nodes, node_color='#74b9ff',
node_size=150, alpha=0.8, label='Genes')
# 加入標籤 (Pathway)
path_labels = {n: pathway_label_map[n] for n in path_nodes}
nx.draw_networkx_labels(G, pos, labels=path_labels, font_size=10, font_weight='bold')
# 加入標籤 (Gene)
gene_labels = {n: n for n in gene_nodes}
nx.draw_networkx_labels(G, pos, labels=gene_labels, font_size=8)
plt.title("Cnetplot: AE Stable Genes & Top KEGG Pathways", fontsize=15)
plt.axis('off')
# 存檔
plt.savefig(output_dir / "AE_Stable_Cnetplot.png", dpi=300, bbox_inches='tight')
plt.show()
print(f"✅ 分析完成!結果已存至: {output_dir}")
except Exception as e:
print(f"❌ 繪圖失敗: {e}")
import pandas as pd
import gseapy as gp
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import traceback
import sys # 修正 exit() 問題
# --- 1. 路徑與參數設定 ---
desktop = Path.home() / "Desktop"
stable_csv = Path("stable_genes_intersection.csv")
excel_source = desktop / "Aja 21649_LeidCom_clustered.xlsx"
output_dir = desktop / "AE_Stability_Network_Analysis"
output_dir.mkdir(exist_ok=True)
# 你的目標數值欄位
value_col = 'avg_diff'
# --- 2. 數據映射與預處理 ---
print("🔍 正在進行基因數據映射...")
try:
ae_ids = pd.read_csv(stable_csv)['stable_genes'].tolist()
df_ref = pd.read_excel(excel_source)
# --- 自動欄位檢查機制 ---
if value_col not in df_ref.columns:
print(f"⚠️ 找不到欄位 '{value_col}'。")
print(f"現有欄位清單: {df_ref.columns.tolist()}")
# 嘗試找尋備用欄位
alternatives = ['log2FoldChange', 'logFC', 'p_val_adj', 'cluster']
found_alt = next((alt for alt in alternatives if alt in df_ref.columns), None)
if found_alt:
value_col = found_alt
print(f"💡 自動切換至備用欄位: '{value_col}'")
else:
print("❗ 找不到任何數值欄位,將建立預設數值 (1.0) 以維持繪圖。")
df_ref['default_value'] = 1.0
value_col = 'default_value'
mapped_df = df_ref[df_ref['Transcriptid'].isin(ae_ids)].copy()
if mapped_df.empty:
print("❌ 映射後沒有任何基因!請檢查 Transcriptid 是否匹配。")
sys.exit()
# 建立對照字典
gene_value_map = mapped_df.groupby('Gene_symbol')[value_col].mean().to_dict()
gene_list = list(gene_value_map.keys())
print(f"✅ 映射完成:{len(gene_list)} 個獨特基因符號。")
except Exception as e:
print(f"❌ 映射出錯: {e}")
traceback.print_exc()
sys.exit() # 修正後的安全退出
# --- 3. 基因富集分析與長條圖繪製 ---
print("🚀 執行 KEGG 富集分析...")
try:
enr = gp.enrichr(gene_list=gene_list,
gene_sets=['KEGG_2021_Human'],
organism='Human',
cutoff=0.5)
res = enr.results
if res.empty or 'Adjusted P-value' not in res.columns:
print("⚠️ 富集分析沒有結果,可能基因清單太少或不匹配。")
else:
res['-log10_Adj_P'] = -np.log10(res['Adjusted P-value'].replace(0, 1e-10))
# 繪製 Pathway Bar Plot
# 修正:確保 n 是整數
n_top = 20
top_10 = res.nsmallest(n_top, 'Adjusted P-value').iloc[::-1]
plt.figure(figsize=(10, 8)) # 稍微增加高度以容納 20 條
colors = plt.cm.viridis(np.linspace(0.2, 0.8, len(top_10)))
bars = plt.barh(top_10['Term'], top_10['-log10_Adj_P'], color=colors)
# 加入顯著性參考線 (P < 0.05 -> -log10(P) > 1.3)
# plt.axvline(x=-np.log10(0.1), color='red', linestyle='--', alpha=0.5, label='P=0.1')
plt.xlabel('-log10(Adjusted P-value)', fontsize=12)
plt.ylabel('Pathway Term', fontsize=12)
plt.title('Top Enriched KEGG Pathways (AE Stable Genes)', fontsize=14)
plt.legend()
plt.tight_layout()
plt.savefig(output_dir / "Pathway_Enrichment_Barplot.png", dpi=300)
plt.show()
# --- 4. 繪製純基因網路圖 (Network Graph) ---
print("🎨 生成穩定基因網路圖...")
G = nx.Graph()
top_paths = res.nsmallest(5, 'Adjusted P-value')
for _, row in top_paths.iterrows():
path_genes = [g for g in str(row['Genes']).split(';') if g in gene_list]
for i in range(len(path_genes)):
for j in range(i + 1, len(path_genes)):
G.add_edge(path_genes[i], path_genes[j])
if len(G.nodes) == 0:
print("⚠️ 基因間無共同通路聯繫,跳過網路圖製作。")
else:
plt.figure(figsize=(12, 12))
pos = nx.kamada_kawai_layout(G)
node_colors = [gene_value_map.get(n, 0) for n in G.nodes()]
nx.draw_networkx_edges(G, pos, alpha=0.2, edge_color='gray')
nodes = nx.draw_networkx_nodes(G, pos, node_size=500, node_color=node_colors,
cmap=plt.cm.RdBu_r, edgecolors='black', linewidths=0.5)
nx.draw_networkx_labels(G, pos, font_size=8, font_weight='bold')
plt.colorbar(nodes, label=f'Value ({value_col})', shrink=0.7)
plt.title("Gene Interaction Network")
plt.axis('off')
plt.savefig(output_dir / "Gene_Interaction_Network.png", dpi=300)
plt.show()
print(f"✨ 分析完成!結果已儲存至: {output_dir}")
except Exception as e:
traceback.print_exc()