from Bio import Entrez
Entrez.email = "A.N.Other@example.com" # Always tell NCBI who you are
handle = Entrez.einfo()
result = handle.read()
print(result)
根据 proterin_id 取信息
from Bio import Entrez
Entrez.email = "Your.Name.Here@example.org"
handle = Entrez.efetch(db="protein", id="WP_190432046.1",
rettype="gb", retmode="text")
print(handle.read())
根据关键词(term)取id
from Bio import Entrez
Entrez.email = "Your.Name.Here@example.org"
handle = Entrez.esearch(db="protein", retmax=10,
term="cas12a", idtype="acc")
record = Entrez.read(handle)
print(record)
根据id取得 详情信息
from Bio import SeqIO
from Bio import Entrez
Entrez.email = 'A.N.Other@example.com'
handle = Entrez.efetch(db="protein", id='WP_190432046.1',
rettype="gb", retmode="text")
records = SeqIO.parse(handle, "genbank")
for record in records:
print(dir(record))
print(record.id)
# print(record.seq)
# print(record.format)
print(record.name)
print(record.annotations)
print(record.features)
根据 pmid 取详细信息
from Bio import Entrez, Medline
handle = Entrez.efetch(db="pubmed", id='10021369', retmode="text", rettype="medline")
records = Medline.parse(handle)
record = next(records)
print(record['PMID'])
print(record['TI'])
print(record['AB'])
print(record['AU'])
print(record['DP'])
print(record['JT'])
print(record['MH'])
print(record['SO'])
常用字段
from Bio import SeqIO
from Bio import Entrez
Entrez.email = 'A.N.Other@example.com'
handle = Entrez.efetch(db="protein", id='7EU9_A',
rettype="gb", retmode="text")
records = SeqIO.parse(handle, "genbank")
record = next(records)
# ['annotations', 'dbxrefs', 'description', 'features', 'format', 'id', 'letter_annotations', 'lower', 'name', 'reverse_complement', 'seq', 'translate', 'upper']
print(record.id)
print(record.name)
print(record.seq)
print(record.format('fasta'))
from Bio import Entrez
Entrez.email = "A.N.Other@example.com" # Always tell NCBI who you are
handle = Entrez.esearch(db="protein", term="cas12", retmax=1000000)
record = Entrez.read(handle)
print(record["IdList"], len(record["IdList"]))
得到 genbank 的数据
from Bio import Entrez
Entrez.email = "A.N.Other@example.com" # Always tell NCBI who you are
handle = Entrez.efetch(db="protein", id="EU490707.1", rettype="gb")
print(handle.read())
根据第一步的 id 取数据
from Bio import Entrez
Entrez.email = "A.N.Other@example.com" # Always tell NCBI who you are
# ['2222680928', '2222680927', '2222680926', '2222680924']
handle = Entrez.efetch(db="protein", id="2222680928",
rettype="gb", retmode="text")
print(handle.read())
得到 fasta 的数据
from Bio import Entrez
Entrez.email = "A.N.Other@example.com" # Always tell NCBI who you are
handle = Entrez.efetch(db="protein", id="EU490707.1", rettype="fasta")
print(handle.read())
自己实现了一个方便获取 accids 的工具
只适用于较少的ids的情况
较大的会考虑生成文件(已经实现)
import jsw_bio as bio
bio.ncbi_download_accids(term='cas12', filename="./test.list")
# ['VEJ66715.1', 'SUY72866.1', 'SUY81473.1', ...