64 lines
2.3 KiB
Python
64 lines
2.3 KiB
Python
|
from Bio import SeqIO
|
||
|
import os
|
||
|
|
||
|
# Ścieżki do plików FASTA w katalogu ../data
|
||
|
data_dir = "../data"
|
||
|
genome_file = os.path.join(data_dir, "genome.fasta") # Nazwa pliku FASTA dla genomu Fusarium solani
|
||
|
primer_ITS1_file = os.path.join(data_dir, "ITS1.fasta")
|
||
|
primer_ITS4_file = os.path.join(data_dir, "ITS4.fasta")
|
||
|
|
||
|
# Wczytywanie sekwencji primerów z plików FASTA
|
||
|
def load_primer(primer_file):
|
||
|
record = SeqIO.read(primer_file, "fasta")
|
||
|
return str(record.seq)
|
||
|
|
||
|
# Funkcja do znajdowania pozycji amplifikowanych regionów
|
||
|
def find_amplified_regions(genome_seq, primer_its1, primer_its4):
|
||
|
its1_positions = []
|
||
|
its4_positions = []
|
||
|
|
||
|
# Szukanie dopasowań dla ITS1
|
||
|
pos = genome_seq.find(primer_its1)
|
||
|
while pos != -1:
|
||
|
its1_positions.append(pos)
|
||
|
pos = genome_seq.find(primer_its1, pos + 1)
|
||
|
|
||
|
# Szukanie dopasowań dla ITS4
|
||
|
pos = genome_seq.find(primer_its4)
|
||
|
while pos != -1:
|
||
|
its4_positions.append(pos)
|
||
|
pos = genome_seq.find(primer_its4, pos + 1)
|
||
|
|
||
|
# Znajdowanie regionów amplifikowanych między primerami ITS1 i ITS4
|
||
|
amplified_regions = []
|
||
|
for start in its1_positions:
|
||
|
for end in its4_positions:
|
||
|
if end > start:
|
||
|
amplified_region = genome_seq[start:end + len(primer_its4)]
|
||
|
amplified_regions.append((start, end + len(primer_its4), amplified_region))
|
||
|
break # Przerwij po znalezieniu pierwszego pasującego ITS4 po ITS1
|
||
|
return amplified_regions
|
||
|
|
||
|
# Wczytywanie primerów
|
||
|
primer_ITS1 = load_primer(primer_ITS1_file)
|
||
|
primer_ITS4 = load_primer(primer_ITS4_file)
|
||
|
|
||
|
# Wczytywanie genomu z pliku FASTA
|
||
|
with open(genome_file) as f:
|
||
|
genome_record = SeqIO.read(f, "fasta")
|
||
|
genome_seq = str(genome_record.seq)
|
||
|
|
||
|
# Znajdowanie amplifikowanych regionów
|
||
|
amplified_regions = find_amplified_regions(genome_seq, primer_ITS1, primer_ITS4)
|
||
|
|
||
|
# Zapis wyników
|
||
|
output_path = os.path.join(data_dir, "amplified_regions.txt")
|
||
|
with open(output_path, "w") as output_file:
|
||
|
output_file.write("Amplified Regions:\n")
|
||
|
for start, end, region in amplified_regions:
|
||
|
output_file.write(f"Start: {start}, End: {end}, Length: {len(region)}\n")
|
||
|
output_file.write(f"Region sequence: {region}\n\n")
|
||
|
|
||
|
print(f"Amplified regions saved to {output_path}")
|
||
|
|