This commit is contained in:
parent
cb1d872eed
commit
42ce34b748
|
@ -0,0 +1,34 @@
|
|||
.
|
||||
├── alg
|
||||
│ ├── ampf.py
|
||||
│ ├── analysis.py
|
||||
│ ├── blast.py
|
||||
│ ├── _log
|
||||
│ ├── out
|
||||
│ └── startery.py
|
||||
├── cmd
|
||||
│ ├── gen.py
|
||||
│ └── seq2fasta.py
|
||||
├── data
|
||||
│ ├── 217314860.fasta
|
||||
│ ├── 2187833333.fasta
|
||||
│ ├── 2813891763.fasta
|
||||
│ ├── 2813891767.fasta
|
||||
│ ├── 599088294.fasta
|
||||
│ ├── ITS1.fasta
|
||||
│ ├── ITS4.fasta
|
||||
│ └── sequence.fasta
|
||||
├── doc
|
||||
│ ├── main.pdf
|
||||
│ ├── main.tex
|
||||
│ └── references.bib
|
||||
├── _l
|
||||
├── _log
|
||||
│ └── amplification_analysis_log.txt
|
||||
├── log
|
||||
│ ├── amplification_analysis_log.txt
|
||||
│ └── __log.txt
|
||||
└── out
|
||||
└── blast_results.xml
|
||||
|
||||
10 directories, 22 files
|
|
@ -0,0 +1,2 @@
|
|||
Amplified Regions:
|
||||
No amplification regions found.
|
|
@ -0,0 +1,63 @@
|
|||
from Bio import SeqIO
|
||||
import os
|
||||
|
||||
# Ścieżki do plików FASTA w katalogu ../data
|
||||
data_dir = "../data"
|
||||
genome_file = os.path.join(data_dir, "genome.fasta") # Nazwa pliku FASTA dla genomu Fusarium solani
|
||||
primer_ITS1_file = os.path.join(data_dir, "ITS1.fasta")
|
||||
primer_ITS4_file = os.path.join(data_dir, "ITS4.fasta")
|
||||
|
||||
# Wczytywanie sekwencji primerów z plików FASTA
|
||||
def load_primer(primer_file):
|
||||
record = SeqIO.read(primer_file, "fasta")
|
||||
return str(record.seq)
|
||||
|
||||
# Funkcja do znajdowania pozycji amplifikowanych regionów
|
||||
def find_amplified_regions(genome_seq, primer_its1, primer_its4):
|
||||
its1_positions = []
|
||||
its4_positions = []
|
||||
|
||||
# Szukanie dopasowań dla ITS1
|
||||
pos = genome_seq.find(primer_its1)
|
||||
while pos != -1:
|
||||
its1_positions.append(pos)
|
||||
pos = genome_seq.find(primer_its1, pos + 1)
|
||||
|
||||
# Szukanie dopasowań dla ITS4
|
||||
pos = genome_seq.find(primer_its4)
|
||||
while pos != -1:
|
||||
its4_positions.append(pos)
|
||||
pos = genome_seq.find(primer_its4, pos + 1)
|
||||
|
||||
# Znajdowanie regionów amplifikowanych między primerami ITS1 i ITS4
|
||||
amplified_regions = []
|
||||
for start in its1_positions:
|
||||
for end in its4_positions:
|
||||
if end > start:
|
||||
amplified_region = genome_seq[start:end + len(primer_its4)]
|
||||
amplified_regions.append((start, end + len(primer_its4), amplified_region))
|
||||
break # Przerwij po znalezieniu pierwszego pasującego ITS4 po ITS1
|
||||
return amplified_regions
|
||||
|
||||
# Wczytywanie primerów
|
||||
primer_ITS1 = load_primer(primer_ITS1_file)
|
||||
primer_ITS4 = load_primer(primer_ITS4_file)
|
||||
|
||||
# Wczytywanie genomu z pliku FASTA
|
||||
with open(genome_file) as f:
|
||||
genome_record = SeqIO.read(f, "fasta")
|
||||
genome_seq = str(genome_record.seq)
|
||||
|
||||
# Znajdowanie amplifikowanych regionów
|
||||
amplified_regions = find_amplified_regions(genome_seq, primer_ITS1, primer_ITS4)
|
||||
|
||||
# Zapis wyników
|
||||
output_path = os.path.join(data_dir, "amplified_regions.txt")
|
||||
with open(output_path, "w") as output_file:
|
||||
output_file.write("Amplified Regions:\n")
|
||||
for start, end, region in amplified_regions:
|
||||
output_file.write(f"Start: {start}, End: {end}, Length: {len(region)}\n")
|
||||
output_file.write(f"Region sequence: {region}\n\n")
|
||||
|
||||
print(f"Amplified regions saved to {output_path}")
|
||||
|
|
@ -8,9 +8,9 @@ import time
|
|||
Entrez.email = "baiobelfer@gmail.com"
|
||||
|
||||
# Ścieżki do plików
|
||||
data_dir = "data"
|
||||
log_dir = "_log"
|
||||
out_dir = "out"
|
||||
data_dir = "../data"
|
||||
log_dir = "../_log"
|
||||
out_dir = "../out"
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
from Bio import Entrez, SeqIO
|
||||
import os
|
||||
|
||||
# Konfiguracja email dla Entrez (wymagane przez NCBI)
|
||||
Entrez.email = "baiobelfer@gmail.com"
|
||||
|
||||
# Identyfikatory GenBank dla wybranych izolatów Fusarium solani
|
||||
genbank_ids = [
|
||||
"217314860", # Fusarium solani isolate T03
|
||||
"2813891763", # Fusarium solani isolate Fso2
|
||||
"599088294", # Uncultured Fusarium clone TTRK-10
|
||||
"2813891767", # Fusarium solani isolate Fso6
|
||||
"2187833333" # Fusarium solani isolate CBG103
|
||||
]
|
||||
|
||||
# Katalog do zapisu plików FASTA
|
||||
data_dir = "../data"
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
# Funkcja do pobierania genomów i zapisywania ich w plikach FASTA
|
||||
def download_genome(genbank_id):
|
||||
try:
|
||||
# Pobieranie rekordu z NCBI w formacie FASTA
|
||||
with Entrez.efetch(db="nucleotide", id=genbank_id, rettype="fasta", retmode="text") as handle:
|
||||
record = SeqIO.read(handle, "fasta")
|
||||
# Zapis rekordu do pliku FASTA w katalogu ../data
|
||||
fasta_path = os.path.join(data_dir, f"{genbank_id}.fasta")
|
||||
SeqIO.write(record, fasta_path, "fasta")
|
||||
print(f"Pobrano i zapisano {genbank_id} do {fasta_path}")
|
||||
except Exception as e:
|
||||
print(f"Nie udało się pobrać {genbank_id}: {e}")
|
||||
|
||||
# Pobieranie i zapisywanie genomów dla każdego ID
|
||||
for genbank_id in genbank_ids:
|
||||
download_genome(genbank_id)
|
||||
|
||||
print("Pobieranie zakończone.")
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
>FJ459973.1 Fusarium solani isolate T03 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence
|
||||
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
|
||||
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
|
||||
GAGGACCCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTA
|
||||
AAACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGAT
|
||||
AAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCG
|
||||
CCAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCT
|
||||
GGCGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
|
||||
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
|
||||
ACGCCGTAAAACACCCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCT
|
||||
GAACTTAAGCATATCAATAAGCGGAGGA
|
|
@ -0,0 +1,11 @@
|
|||
>OM502955.1 Fusarium solani isolate CBG103 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
|
||||
CCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAAC
|
||||
ATACCTAAACGTTGCTTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCAG
|
||||
AGGACCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAAA
|
||||
ACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATAA
|
||||
GTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGCC
|
||||
AGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTGG
|
||||
CGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGGTC
|
||||
CCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCCAC
|
||||
GCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTGAA
|
||||
CTTAAGCATATCAATAAGCGGAG
|
|
@ -0,0 +1,11 @@
|
|||
>PQ432857.1 Fusarium solani isolate Fso2 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
|
||||
CGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAACA
|
||||
TACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCAGA
|
||||
GGACCCTTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAAAA
|
||||
CTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATAAG
|
||||
TAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGCCA
|
||||
GTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTGGC
|
||||
GTTGGGGATCGGCGGAGCCCCTCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGGTC
|
||||
CCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCCAC
|
||||
GCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTGAA
|
||||
CTTAAGCATATCAATAAGCGG
|
|
@ -0,0 +1,11 @@
|
|||
>PQ432861.1 Fusarium solani isolate Fso6 small subunit ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and large subunit ribosomal RNA gene, partial sequence
|
||||
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
|
||||
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
|
||||
GAGGACCCTTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTAA
|
||||
AACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGATA
|
||||
AGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGC
|
||||
CAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCTG
|
||||
GCGTTGGGGATCGGCGGAGCCCCTCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
|
||||
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
|
||||
ACGCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTG
|
||||
AACTTAAGCATATCAATAAGCGGAGGA
|
|
@ -0,0 +1,11 @@
|
|||
>KJ400965.1 Uncultured Fusarium clone TTRK-10 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence
|
||||
TCCGTAGGTGAACCTGCGGAGGGATCATTACCGAGTTATACAACTCATCAACCCTGTGAA
|
||||
CATACCTAAACGTTGCCTCGGCGGGAACAGACGGCCCCGTGAAACGGGCCGCCCCCGCCA
|
||||
GAGGACCCCCTAACTCTGTTTCTATAATGTTTCTTCTGAGTAAAACAAGCAAATAAATTA
|
||||
AAACTTTCAACAACGGATCTCTTGGCTCTGGCATCGATGAAGAACGCAGCGAAATGCGAT
|
||||
AAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCG
|
||||
CCAGTATTCTGGCGGGCATGCCTGTTCGAGCGTCATTACAACCCTCAGGCCCCCGGGCCT
|
||||
GGCGTTGGGGATCGGCGGAGCCCCCCGTGGGCACACGCCGTCCCCCAAATACAGTGGCGG
|
||||
TCCCGCCGCAGCTTCCATCGCGTAGTAGCTAACACCTCGCGACTGGAGAGCGGCGCGGCC
|
||||
ACGCCGTAAAACACCCAACTCTTCTGAAGTTGACCTCGAATCAGGTAGGAATACCCGCTG
|
||||
AACTTAAGCATATCAATAAGCGGAGGA
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue