from Bio import Entrez, SeqIO, SearchIO from Bio.Blast import NCBIWWW, NCBIXML import os import statistics import time # Ustawienie adresu e-mail dla API NCBI Entrez.email = "baiobelfer@gmail.com" # Ścieżki do plików data_dir = "data" log_dir = "_log" out_dir = "out" os.makedirs(log_dir, exist_ok=True) os.makedirs(out_dir, exist_ok=True) # Ścieżki do plików primerów i sekwencji primer_ITS1_file = os.path.join(data_dir, "ITS1.fasta") primer_ITS4_file = os.path.join(data_dir, "ITS4.fasta") sequence_file = os.path.join(data_dir, "sequence.fasta") # Ścieżka do pliku BLAST blast_results_file = os.path.join(data_dir, "blast_results.xml") # Funkcja do wczytywania primerów z plików FASTA def load_primer(primer_file): record = SeqIO.read(primer_file, "fasta") return str(record.seq) # Funkcja do wyszukiwania miejsc dopasowania primerów w sekwencji genomu def find_primer_sites(genome_seq, primer_seq): sites = [] primer_len = len(primer_seq) for i in range(len(genome_seq) - primer_len + 1): if genome_seq[i:i + primer_len] == primer_seq: sites.append(i) return sites # Funkcja do analizy amplifikowanych regionów def analyze_amplification(sequence_file, primer_its1, primer_its4, log_path): amplification_stats = [] amplified_sequences = [] # Otwieranie pliku logowania with open(log_path, "w") as log_file: log_file.write("Amplified Regions:\n") # Wczytanie genomu genome = SeqIO.read(sequence_file, "fasta") genome_seq = str(genome.seq) # Wyszukiwanie dopasowań primerów its1_sites = find_primer_sites(genome_seq, primer_its1) its4_sites = find_primer_sites(genome_seq, primer_its4) # Analiza amplifikacji między ITS1 i ITS4 for start in its1_sites: for end in its4_sites: if end > start: amplified_region = genome_seq[start:end + len(primer_its4)] amplified_len = len(amplified_region) amplification_stats.append(amplified_len) amplified_sequences.append(amplified_region) log_file.write(f"Position {start}-{end + len(primer_its4)} | Length: {amplified_len} bp\n") break # Analiza statystyczna długości amplifikowanych regionów if amplification_stats: avg_length = statistics.mean(amplification_stats) median_length = statistics.median(amplification_stats) min_length = min(amplification_stats) max_length = max(amplification_stats) log_file.write("\nAmplification Statistics:\n") log_file.write(f"Average Length: {avg_length} bp\n") log_file.write(f"Median Length: {median_length} bp\n") log_file.write(f"Min Length: {min_length} bp\n") log_file.write(f"Max Length: {max_length} bp\n") else: log_file.write("No amplification regions found.\n") return amplified_sequences # Funkcja do wysyłania zapytania BLAST do NCBI def perform_blast(sequences, blast_output_file): # Łączenie wszystkich amplifikowanych sekwencji w jedno zapytanie query_sequence = "\n".join(sequences) # Wysyłanie zapytania BLAST print("Wysyłanie zapytania BLAST do NCBI...") result_handle = NCBIWWW.qblast("blastn", "nt", query_sequence) # Zapisywanie wyników BLAST do pliku with open(blast_output_file, "w") as out_handle: out_handle.write(result_handle.read()) result_handle.close() print(f"Wyniki BLAST zapisane do pliku {blast_output_file}") # Funkcja do analizy wyników BLAST def analyze_blast(blast_output_file, analysis_log_path): print("Analiza wyników BLAST...") with open(blast_output_file) as result_handle, open(analysis_log_path, "w") as log_file: blast_records = NCBIXML.parse(result_handle) for record in blast_records: for alignment in record.alignments: for hsp in alignment.hsps: log_file.write("****HIT****\n") log_file.write(f"Sequence: {alignment.hit_def}\n") log_file.write(f"Length: {hsp.align_length}\n") log_file.write(f"E-value: {hsp.expect}\n") log_file.write(f"Score: {hsp.score}\n") log_file.write(f"Identities: {hsp.identities}/{hsp.align_length}\n") log_file.write("Query sequence:\n") log_file.write(f"{hsp.query}\n") log_file.write("Match:\n") log_file.write(f"{hsp.match}\n") log_file.write("Subject sequence:\n") log_file.write(f"{hsp.sbjct}\n\n") print(f"Analiza BLAST zakończona. Wyniki zapisane do {analysis_log_path}") def main(): # Wczytywanie primerów primer_ITS1 = load_primer(primer_ITS1_file) primer_ITS4 = load_primer(primer_ITS4_file) # Analiza amplifikacji i zapis logów log_path = os.path.join(log_dir, "amplification_analysis_log.txt") amplified_sequences = analyze_amplification(sequence_file, primer_ITS1, primer_ITS4, log_path) print(f"Amplification analysis completed. Results saved to {log_path}") if amplified_sequences: # Wykonanie BLAST dla amplifikowanych regionów perform_blast(amplified_sequences, blast_results_file) # Analiza wyników BLAST blast_analysis_log = os.path.join(log_dir, "blast_analysis_log.txt") analyze_blast(blast_results_file, blast_analysis_log) print(f"BLAST analysis completed. Results saved to {blast_analysis_log}") else: print("Brak amplifikowanych regionów do analizy BLAST.") if __name__ == "__main__": main()