This commit is contained in:
parent
cb1d872eed
commit
42ce34b748
|
@ -0,0 +1,34 @@
|
||||||
|
.
|
||||||
|
├── alg
|
||||||
|
│ ├── ampf.py
|
||||||
|
│ ├── analysis.py
|
||||||
|
│ ├── blast.py
|
||||||
|
│ ├── _log
|
||||||
|
│ ├── out
|
||||||
|
│ └── startery.py
|
||||||
|
├── cmd
|
||||||
|
│ ├── gen.py
|
||||||
|
│ └── seq2fasta.py
|
||||||
|
├── data
|
||||||
|
│ ├── 217314860.fasta
|
||||||
|
│ ├── 2187833333.fasta
|
||||||
|
│ ├── 2813891763.fasta
|
||||||
|
│ ├── 2813891767.fasta
|
||||||
|
│ ├── 599088294.fasta
|
||||||
|
│ ├── ITS1.fasta
|
||||||
|
│ ├── ITS4.fasta
|
||||||
|
│ └── sequence.fasta
|
||||||
|
├── doc
|
||||||
|
│ ├── main.pdf
|
||||||
|
│ ├── main.tex
|
||||||
|
│ └── references.bib
|
||||||
|
├── _l
|
||||||
|
├── _log
|
||||||
|
│ └── amplification_analysis_log.txt
|
||||||
|
├── log
|
||||||
|
│ ├── amplification_analysis_log.txt
|
||||||
|
│ └── __log.txt
|
||||||
|
└── out
|
||||||
|
└── blast_results.xml
|
||||||
|
|
||||||
|
10 directories, 22 files
|
|
@ -0,0 +1,2 @@
|
||||||
|
Amplified Regions:
|
||||||
|
No amplification regions found.
|
|
@ -0,0 +1,63 @@
|
||||||
|
from Bio import SeqIO
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Ścieżki do plików FASTA w katalogu ../data
|
||||||
|
data_dir = "../data"
|
||||||
|
genome_file = os.path.join(data_dir, "genome.fasta") # Nazwa pliku FASTA dla genomu Fusarium solani
|
||||||
|
primer_ITS1_file = os.path.join(data_dir, "ITS1.fasta")
|
||||||
|
primer_ITS4_file = os.path.join(data_dir, "ITS4.fasta")
|
||||||
|
|
||||||
|
# Wczytywanie sekwencji primerów z plików FASTA
|
||||||
|
def load_primer(primer_file):
|
||||||
|
record = SeqIO.read(primer_file, "fasta")
|
||||||
|
return str(record.seq)
|
||||||
|
|
||||||
|
# Funkcja do znajdowania pozycji amplifikowanych regionów
|
||||||
|
def find_amplified_regions(genome_seq, primer_its1, primer_its4):
|
||||||
|
its1_positions = []
|
||||||
|
its4_positions = []
|
||||||
|
|
||||||
|
# Szukanie dopasowań dla ITS1
|
||||||
|
pos = genome_seq.find(primer_its1)
|
||||||
|
while pos != -1:
|
||||||
|
its1_positions.append(pos)
|
||||||
|
pos = genome_seq.find(primer_its1, pos + 1)
|
||||||
|
|
||||||
|
# Szukanie dopasowań dla ITS4
|
||||||
|
pos = genome_seq.find(primer_its4)
|
||||||
|
while pos != -1:
|
||||||
|
its4_positions.append(pos)
|
||||||
|
pos = genome_seq.find(primer_its4, pos + 1)
|
||||||
|
|
||||||
|
# Znajdowanie regionów amplifikowanych między primerami ITS1 i ITS4
|
||||||
|
amplified_regions = []
|
||||||
|
for start in its1_positions:
|
||||||
|
for end in its4_positions:
|
||||||
|
if end > start:
|
||||||
|
amplified_region = genome_seq[start:end + len(primer_its4)]
|
||||||
|
amplified_regions.append((start, end + len(primer_its4), amplified_region))
|
||||||
|
break # Przerwij po znalezieniu pierwszego pasującego ITS4 po ITS1
|
||||||
|
return amplified_regions
|
||||||
|
|
||||||
|
# Wczytywanie primerów
|
||||||
|
primer_ITS1 = load_primer(primer_ITS1_file)
|
||||||
|
primer_ITS4 = load_primer(primer_ITS4_file)
|
||||||
|
|
||||||
|
# Wczytywanie genomu z pliku FASTA
|
||||||
|
with open(genome_file) as f:
|
||||||
|
genome_record = SeqIO.read(f, "fasta")
|
||||||
|
genome_seq = str(genome_record.seq)
|
||||||
|
|
||||||
|
# Znajdowanie amplifikowanych regionów
|
||||||
|
amplified_regions = find_amplified_regions(genome_seq, primer_ITS1, primer_ITS4)
|
||||||
|
|
||||||
|
# Zapis wyników
|
||||||
|
output_path = os.path.join(data_dir, "amplified_regions.txt")
|
||||||
|
with open(output_path, "w") as output_file:
|
||||||
|
output_file.write("Amplified Regions:\n")
|
||||||
|
for start, end, region in amplified_regions:
|
||||||
|
output_file.write(f"Start: {start}, End: {end}, Length: {len(region)}\n")
|
||||||
|
output_file.write(f"Region sequence: {region}\n\n")
|
||||||
|
|
||||||
|
print(f"Amplified regions saved to {output_path}")
|
||||||
|
|
|
@ -8,9 +8,9 @@ import time
|
||||||
Entrez.email = "baiobelfer@gmail.com"
|
Entrez.email = "baiobelfer@gmail.com"
|
||||||
|
|
||||||
# Ścieżki do plików
|
# Ścieżki do plików
|
||||||
data_dir = "data"
|
data_dir = "../data"
|
||||||
log_dir = "_log"
|
log_dir = "../_log"
|
||||||
out_dir = "out"
|
out_dir = "../out"
|
||||||
os.makedirs(log_dir, exist_ok=True)
|
os.makedirs(log_dir, exist_ok=True)
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
os.makedirs(out_dir, exist_ok=True)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
from Bio import Entrez, SeqIO
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Konfiguracja email dla Entrez (wymagane przez NCBI)
|
||||||
|
Entrez.email = "baiobelfer@gmail.com"
|
||||||
|
|
||||||
|
# Identyfikatory GenBank dla wybranych izolatów Fusarium solani
|
||||||
|
genbank_ids = [
|
||||||
|
"217314860", # Fusarium solani isolate T03
|
||||||
|
"2813891763", # Fusarium solani isolate Fso2
|
||||||
|
"599088294", # Uncultured Fusarium clone TTRK-10
|
||||||
|
"2813891767", # Fusarium solani isolate Fso6
|
||||||
|
"2187833333" # Fusarium solani isolate CBG103
|
||||||
|
]
|
||||||
|
|
||||||
|
# Katalog do zapisu plików FASTA
|
||||||
|
data_dir = "../data"
|
||||||
|
os.makedirs(data_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Funkcja do pobierania genomów i zapisywania ich w plikach FASTA
|
||||||
|
def download_genome(genbank_id):
|
||||||
|
try:
|
||||||
|
# Pobieranie rekordu z NCBI w formacie FASTA
|
||||||
|
with Entrez.efetch(db="nucleotide", id=genbank_id, rettype="fasta", retmode="text") as handle:
|
||||||
|
record = SeqIO.read(handle, "fasta")
|
||||||
|
# Zapis rekordu do pliku FASTA w katalogu ../data
|
||||||
|
fasta_path = os.path.join(data_dir, f"{genbank_id}.fasta")
|
||||||
|
SeqIO.write(record, fasta_path, "fasta")
|
||||||
|
print(f"Pobrano i zapisano {genbank_id} do {fasta_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Nie udało się pobrać {genbank_id}: {e}")
|
||||||
|
|
||||||
|
# Pobieranie i zapisywanie genomów dla każdego ID
|
||||||
|
for genbank_id in genbank_ids:
|
||||||
|
download_genome(genbank_id)
|
||||||
|
|
||||||
|
print("Pobieranie zakończone.")
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
>FJ459973.1 Fusarium solani isolate T03 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence
|
||||||
|
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
|
||||||
|
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
|
||||||
|
GAGGACCCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTA
|
||||||
|
AAACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGAT
|
||||||
|
AAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCG
|
||||||
|
CCAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCT
|
||||||
|
GGCGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
|
||||||
|
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
|
||||||
|
ACGCCGTAAAACACCCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCT
|
||||||
|
GAACTTAAGCATATCAATAAGCGGAGGA
|
|
@ -0,0 +1,11 @@
|
||||||
|
>OM502955.1 Fusarium solani isolate CBG103 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
|
||||||
|
CCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAAC
|
||||||
|
ATACCTAAACGTTGCTTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCAG
|
||||||
|
AGGACCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAAA
|
||||||
|
ACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATAA
|
||||||
|
GTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGCC
|
||||||
|
AGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTGG
|
||||||
|
CGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGGTC
|
||||||
|
CCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCCAC
|
||||||
|
GCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTGAA
|
||||||
|
CTTAAGCATATCAATAAGCGGAG
|
|
@ -0,0 +1,11 @@
|
||||||
|
>PQ432857.1 Fusarium solani isolate Fso2 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
|
||||||
|
CGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAACA
|
||||||
|
TACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCAGA
|
||||||
|
GGACCCTTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAAAA
|
||||||
|
CTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATAAG
|
||||||
|
TAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGCCA
|
||||||
|
GTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTGGC
|
||||||
|
GTTGGGGATCGGCGGAGCCCCTCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGGTC
|
||||||
|
CCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCCAC
|
||||||
|
GCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTGAA
|
||||||
|
CTTAAGCATATCAATAAGCGG
|
|
@ -0,0 +1,11 @@
|
||||||
|
>PQ432861.1 Fusarium solani isolate Fso6 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
|
||||||
|
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
|
||||||
|
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
|
||||||
|
GAGGACCCTTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAA
|
||||||
|
AACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATA
|
||||||
|
AGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGC
|
||||||
|
CAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTG
|
||||||
|
GCGTTGGGGATCGGCGGAGCCCCTCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
|
||||||
|
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
|
||||||
|
ACGCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTG
|
||||||
|
AACTTAAGCATATCAATAAGCGGAGGA
|
|
@ -0,0 +1,11 @@
|
||||||
|
>KJ400965.1 Uncultured Fusarium clone TTRK-10 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence
|
||||||
|
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
|
||||||
|
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
|
||||||
|
GAGGACCCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTA
|
||||||
|
AAACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGAT
|
||||||
|
AAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCG
|
||||||
|
CCAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCT
|
||||||
|
GGCGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
|
||||||
|
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
|
||||||
|
ACGCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTG
|
||||||
|
AACTTAAGCATATCAATAAGCGGAGGA
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue