import os from Bio import SeqIO # Ścieżki do katalogów i plików FASTA data_dir = "../data" output_dir = "../out" os.makedirs(output_dir, exist_ok=True) # Pliki primerów primer_ITS1_file = os.path.join(data_dir, "ITS1.fasta") primer_ITS4_file = os.path.join(data_dir, "ITS4.fasta") # Funkcja do wczytywania sekwencji primerów z plików FASTA def load_primer(primer_file): record = SeqIO.read(primer_file, "fasta") return str(record.seq) # Funkcja do znajdowania pozycji amplifikowanych regionów def find_amplified_regions(genome_seq, primer_its1, primer_its4): its1_positions = [] its4_positions = [] # Szukanie dopasowań dla ITS1 pos = genome_seq.find(primer_its1) while pos != -1: its1_positions.append(pos) pos = genome_seq.find(primer_its1, pos + 1) # Szukanie dopasowań dla ITS4 pos = genome_seq.find(primer_its4) while pos != -1: its4_positions.append(pos) pos = genome_seq.find(primer_its4, pos + 1) # Znajdowanie regionów amplifikowanych między primerami ITS1 i ITS4 amplified_regions = [] for start in its1_positions: for end in its4_positions: if end > start: amplified_region = genome_seq[start:end + len(primer_its4)] amplified_regions.append((start, end + len(primer_its4), amplified_region)) break # Przerwij po znalezieniu pierwszego pasującego ITS4 po ITS1 return amplified_regions # Wczytywanie primerów ITS1 i ITS4 primer_ITS1 = load_primer(primer_ITS1_file) primer_ITS4 = load_primer(primer_ITS4_file) # Lista plików genomów do analizy genome_files = [ os.path.join(data_dir, filename) for filename in ["217314860.fasta", "2187833333.fasta", "2813891763.fasta", "2813891767.fasta", "599088294.fasta"] ] # Analiza amplifikacji dla każdego pliku genomu for genome_file in genome_files: with open(genome_file) as f: genome_record = SeqIO.read(f, "fasta") genome_seq = str(genome_record.seq) # Znajdowanie amplifikowanych regionów amplified_regions = find_amplified_regions(genome_seq, primer_ITS1, primer_ITS4) # Zapis wyników dla każdego pliku genomu do ../out output_filename = f"amplified_regions_{os.path.basename(genome_file)}.txt" output_path = os.path.join(output_dir, output_filename) with open(output_path, "w") as output_file: output_file.write(f"Amplified Regions for {os.path.basename(genome_file)}:\n") for start, end, region in amplified_regions: output_file.write(f"Start: {start}, End: {end}, Length: {len(region)}\n") output_file.write(f"Region sequence: {region}\n\n") print(f"Amplified regions for {os.path.basename(genome_file)} saved to {output_path}")