This commit is contained in:
email 2024-11-07 13:39:46 +01:00
parent cb1d872eed
commit 42ce34b748
11 changed files with 195 additions and 1445 deletions

34
_l Normal file
View File

@ -0,0 +1,34 @@
.
├── alg
│   ├── ampf.py
│   ├── analysis.py
│   ├── blast.py
│   ├── _log
│   ├── out
│   └── startery.py
├── cmd
│   ├── gen.py
│   └── seq2fasta.py
├── data
│   ├── 217314860.fasta
│   ├── 2187833333.fasta
│   ├── 2813891763.fasta
│   ├── 2813891767.fasta
│   ├── 599088294.fasta
│   ├── ITS1.fasta
│   ├── ITS4.fasta
│   └── sequence.fasta
├── doc
│   ├── main.pdf
│   ├── main.tex
│   └── references.bib
├── _l
├── _log
│   └── amplification_analysis_log.txt
├── log
│   ├── amplification_analysis_log.txt
│   └── __log.txt
└── out
└── blast_results.xml
10 directories, 22 files

View File

@ -0,0 +1,2 @@
Amplified Regions:
No amplification regions found.

63
alg/ampf.py Normal file
View File

@ -0,0 +1,63 @@
from Bio import SeqIO
import os
# Ścieżki do plików FASTA w katalogu ../data
data_dir = "../data"
genome_file = os.path.join(data_dir, "genome.fasta") # Nazwa pliku FASTA dla genomu Fusarium solani
primer_ITS1_file = os.path.join(data_dir, "ITS1.fasta")
primer_ITS4_file = os.path.join(data_dir, "ITS4.fasta")
# Wczytywanie sekwencji primerów z plików FASTA
def load_primer(primer_file):
record = SeqIO.read(primer_file, "fasta")
return str(record.seq)
# Funkcja do znajdowania pozycji amplifikowanych regionów
def find_amplified_regions(genome_seq, primer_its1, primer_its4):
its1_positions = []
its4_positions = []
# Szukanie dopasowań dla ITS1
pos = genome_seq.find(primer_its1)
while pos != -1:
its1_positions.append(pos)
pos = genome_seq.find(primer_its1, pos + 1)
# Szukanie dopasowań dla ITS4
pos = genome_seq.find(primer_its4)
while pos != -1:
its4_positions.append(pos)
pos = genome_seq.find(primer_its4, pos + 1)
# Znajdowanie regionów amplifikowanych między primerami ITS1 i ITS4
amplified_regions = []
for start in its1_positions:
for end in its4_positions:
if end > start:
amplified_region = genome_seq[start:end + len(primer_its4)]
amplified_regions.append((start, end + len(primer_its4), amplified_region))
break # Przerwij po znalezieniu pierwszego pasującego ITS4 po ITS1
return amplified_regions
# Wczytywanie primerów
primer_ITS1 = load_primer(primer_ITS1_file)
primer_ITS4 = load_primer(primer_ITS4_file)
# Wczytywanie genomu z pliku FASTA
with open(genome_file) as f:
genome_record = SeqIO.read(f, "fasta")
genome_seq = str(genome_record.seq)
# Znajdowanie amplifikowanych regionów
amplified_regions = find_amplified_regions(genome_seq, primer_ITS1, primer_ITS4)
# Zapis wyników
output_path = os.path.join(data_dir, "amplified_regions.txt")
with open(output_path, "w") as output_file:
output_file.write("Amplified Regions:\n")
for start, end, region in amplified_regions:
output_file.write(f"Start: {start}, End: {end}, Length: {len(region)}\n")
output_file.write(f"Region sequence: {region}\n\n")
print(f"Amplified regions saved to {output_path}")

View File

@ -8,9 +8,9 @@ import time
Entrez.email = "baiobelfer@gmail.com" Entrez.email = "baiobelfer@gmail.com"
# Ścieżki do plików # Ścieżki do plików
data_dir = "data" data_dir = "../data"
log_dir = "_log" log_dir = "../_log"
out_dir = "out" out_dir = "../out"
os.makedirs(log_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True)
os.makedirs(out_dir, exist_ok=True) os.makedirs(out_dir, exist_ok=True)

38
cmd/gen.py Normal file
View File

@ -0,0 +1,38 @@
from Bio import Entrez, SeqIO
import os
# Konfiguracja email dla Entrez (wymagane przez NCBI)
Entrez.email = "baiobelfer@gmail.com"
# Identyfikatory GenBank dla wybranych izolatów Fusarium solani
genbank_ids = [
"217314860", # Fusarium solani isolate T03
"2813891763", # Fusarium solani isolate Fso2
"599088294", # Uncultured Fusarium clone TTRK-10
"2813891767", # Fusarium solani isolate Fso6
"2187833333" # Fusarium solani isolate CBG103
]
# Katalog do zapisu plików FASTA
data_dir = "../data"
os.makedirs(data_dir, exist_ok=True)
# Funkcja do pobierania genomów i zapisywania ich w plikach FASTA
def download_genome(genbank_id):
try:
# Pobieranie rekordu z NCBI w formacie FASTA
with Entrez.efetch(db="nucleotide", id=genbank_id, rettype="fasta", retmode="text") as handle:
record = SeqIO.read(handle, "fasta")
# Zapis rekordu do pliku FASTA w katalogu ../data
fasta_path = os.path.join(data_dir, f"{genbank_id}.fasta")
SeqIO.write(record, fasta_path, "fasta")
print(f"Pobrano i zapisano {genbank_id} do {fasta_path}")
except Exception as e:
print(f"Nie udało się pobrać {genbank_id}: {e}")
# Pobieranie i zapisywanie genomów dla każdego ID
for genbank_id in genbank_ids:
download_genome(genbank_id)
print("Pobieranie zakończone.")

11
data/217314860.fasta Normal file
View File

@ -0,0 +1,11 @@
>FJ459973.1 Fusarium solani isolate T03 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
GAGGACCCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTA
AAACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGAT
AAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCG
CCAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCT
GGCGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
ACGCCGTAAAACACCCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCT
GAACTTAAGCATATCAATAAGCGGAGGA

11
data/2187833333.fasta Normal file
View File

@ -0,0 +1,11 @@
>OM502955.1 Fusarium solani isolate CBG103 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
CCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAAC
ATACCTAAACGTTGCTTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCAG
AGGACCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAAA
ACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATAA
GTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGCC
AGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTGG
CGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGGTC
CCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCCAC
GCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTGAA
CTTAAGCATATCAATAAGCGGAG

11
data/2813891763.fasta Normal file
View File

@ -0,0 +1,11 @@
>PQ432857.1 Fusarium solani isolate Fso2 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
CGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAACA
TACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCAGA
GGACCCTTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAAAA
CTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATAAG
TAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGCCA
GTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTGGC
GTTGGGGATCGGCGGAGCCCCTCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGGTC
CCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCCAC
GCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTGAA
CTTAAGCATATCAATAAGCGG

11
data/2813891767.fasta Normal file
View File

@ -0,0 +1,11 @@
>PQ432861.1 Fusarium solani isolate Fso6 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
GAGGACCCTTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAA
AACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATA
AGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGC
CAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTG
GCGTTGGGGATCGGCGGAGCCCCTCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
ACGCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTG
AACTTAAGCATATCAATAAGCGGAGGA

11
data/599088294.fasta Normal file
View File

@ -0,0 +1,11 @@
>KJ400965.1 Uncultured Fusarium clone TTRK-10 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
GAGGACCCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTA
AAACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGAT
AAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCG
CCAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCT
GGCGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
ACGCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTG
AACTTAAGCATATCAATAAGCGGAGGA

File diff suppressed because it is too large Load Diff