from Bio import SeqIO from sqlalchemy.orm import Session from app.database import engine, SessionLocal, Base from app.models import Sequence, Feature import json # Tworzenie tabeli w bazie danych Base.metadata.create_all(bind=engine) def get_db(): db = SessionLocal() try: yield db finally: db.close() def load_genbank_to_db(file_path: str): db = next(get_db()) with open(file_path, 'r') as file: record = SeqIO.read(file, "genbank") # Zapisz sekwencję do bazy danych sequence_data = Sequence( name=record.name, description=record.description if 'description' in record.annotations else 'No description', sequence=str(record.seq) ) db.add(sequence_data) db.commit() db.refresh(sequence_data) # Odśwież sekwencję, aby uzyskać jej ID # Zapisz cechy (features) do bazy danych for feature in record.features: if feature.type in ["promoter", "RBS","CDS", "protein_bind", "misc_feature", "rep_origin", "terminator"]: qualifiers = {} if feature.type == "CDS": qualifiers['gene_name'] = feature.qualifiers.get('gene', ['Unknown gene'])[0] qualifiers['product'] = feature.qualifiers.get('product', ['Unknown product'])[0] elif feature.type == "protein_bind": qualifiers['binding_site'] = feature.qualifiers.get('bound_moiety', ['Unknown binding site'])[0] elif feature.type == "misc_feature": qualifiers['note'] = feature.qualifiers.get('note', ['No additional information'])[0] elif feature.type == "rep_origin": qualifiers['note'] = "This is a replication origin." feature_data = Feature( type=feature.type, location=str(feature.location), sequence=str(record.seq[feature.location.start:feature.location.end]), qualifiers=json.dumps(qualifiers), sequence_id=sequence_data.id # Ustawienie poprawnego ID ) db.add(feature_data) db.commit() print(f"Loaded {file_path} to database.") if __name__ == "__main__": load_genbank_to_db("data/plasmid.gb")