#=GF ID crAss_CARG1
#=GF AC PF25725.2
#=GF DE Crassvirus cargo protein 1 domain
#=GF AU Bateman A;0000-0002-6982-4660
#=GF SE UniProtKB:A0A385DV85
#=GF GA 27.00 27.00;
#=GF TC 27.30 40.70;
#=GF NC 26.20 25.80;
#=GF BM hmmbuild HMM.ann SEED.ann
#=GF SM hmmsearch -E 1000 -Z 90746521 --cpu 8 HMM pfamseq
#=GF TP Family
#=GF RN [1]
#=GF RM 37138077
#=GF RT Structural atlas of a human gut crassvirus.
#=GF RA Bayfield OW, Shkoporov AN, Yutin N, Khokhlova EV, Smith JLR,
#=GF RA Hawkins DEDP, Koonin EV, Hill C, Antson AA;
#=GF RL Nature. 2023;617:409-416.
#=GF DR SO; 0100021; polypeptide_conserved_region;
#=GF CC This entry represents cargo protein 1 (C1) found in
#=GF CC crassviruses. This protein is unique in that different portions
#=GF CC of it are found in both the capsid and the tail of the virion.
#=GF CC This entry represents residues 425-552 that fold into a compact
#=GF CC domain that is bound to the portal protein wing domain inside
#=GF CC the capsid, with 12 such segments interdigitating with wing
#=GF CC loops (SH3-like domains) of the portal. Additionally, 12 copies
#=GF CC of an alpha-helical segment of the same protein (residues
#=GF CC 214-245) are located at the inner surface of the first and
#=GF CC second rings of the tail barrel. A transmembrane helix is
#=GF CC predicted in the N-terminal region (residues 136-158), which may
#=GF CC insert into the host cell membrane. The presence of cargo
#=GF CC protein 1 in both the capsid and tail suggests a mechanism for
#=GF CC protein ejection prior to complete ejection of genomic DNA,
#=GF CC likely involving partial unfolding of the protein as it is
#=GF CC extruded through the tail. Cargo protein 1 is believed to be
#=GF CC ejected in the N to C direction, with the N-terminal part
#=GF CC located in the tail exiting first, followed by the C-terminal
#=GF CC domain located in the capsid. This protein likely contributes to
#=GF CC the formation of a transmembrane channel for DNA ejection. The
#=GF CC protein is broadly conserved among crassviruses, although
#=GF CC members of the epsilon group lack a detectable homologue [1].
#=GF SQ 15
#=GS CARG1_BPCA1/424-556 AC A0A385DV85.1
#=GS A0AAE7RU91_9CAUD/480-611 AC A0AAE7RU91.1
#=GS A0AAF0ILU8_9CAUD/377-507 AC A0AAF0ILU8.1
#=GS A0AAE7RUY1_9CAUD/434-569 AC A0AAE7RUY1.1
#=GS A0A7M1RYG1_9CAUD/207-340 AC A0A7M1RYG1.1
#=GS S0A053_9CAUD/60-161 AC S0A053.1
#=GS A0AAE7V2U3_9CAUD/285-414 AC A0AAE7V2U3.1
#=GS A0A7M1RXU8_9CAUD/152-287 AC A0A7M1RXU8.1
#=GS A0A7M1S008_9CAUD/434-569 AC A0A7M1S008.1
#=GS A0A7M1RXZ2_9CAUD/22-153 AC A0A7M1RXZ2.1
#=GS A0A7M1RVA6_9CAUD/294-425 AC A0A7M1RVA6.1
#=GS A0A7M1RZ43_9CAUD/486-618 AC A0A7M1RZ43.1
#=GS A0A7M1S1F0_9CAUD/258-394 AC A0A7M1S1F0.1
#=GS A0AAF0BFF5_9CAUD/424-556 AC A0AAF0BFF5.1
#=GS A0AA36DUI4_CYLNA/1002-1136 AC A0AA36DUI4.1
CARG1_BPCA1/424-556 ..........n-AFGGWLNTQGGDFTNGVTFINEGGSHEENPYQGIQIGVDPEGAPNLVEQGEVVYD.D...YVFSDRMEIPDDIRKEY....KLRGK...TFAKAAKSAQRESEERPNDPLSTKGLQAAMERIATAQEEARQRKEAHREGNEY-p........................
A0AAE7RU91_9CAUD/480-611 ........dal--------TNGGVFSDGVTVVGEGGSHEENPLTGVPMGVAPDGQPNLVEEGEVIFN.D...YVFSNRLHPSEELLKSVnlpsKYKDN...TFASIAEKINKEPKERPYDPIARRGLLANMSKLMQAQEEVKAIKEAKTEGRQF-a........................
A0AAF0ILU8_9CAUD/377-507 ........dal--------THGGVFSDGVTVVGEGGSHEENPLSGVPMGLAPDGQPNLVEEGEVVFN.D...YVFSNRLHPTEKMLKQYniplKYKDH...TFASIAEKFNKEPKERPNDPIAKRGLLANMGKLMQAQEEVRAKKEAR-QGTQ--fa.......................
A0AAE7RUY1_9CAUD/434-569 .........fg--DGGLMGTHGADFTNGVTVINNGGYHEENPHEGVQIGVDYNGIPNLVEEGEVIYN.D...YVFSNRIKVPDSVKQKY....KLKGGkglTFADAAKKIQKESEERPNDPISKRGLEDGLIKLMQEQEALRGQGQYGLEGVQYA.........................
A0A7M1RYG1_9CAUD/207-340 ..........a-AFGGELNTNGADFPTGLMFIDEGGTHESNPYEGVPMGIAPDGKPNLVEEGETIFN.D...YVFSRRLLVPKTIRNKY....KLGDK..lTFAEASKKLAKVSEERPNDPISQETLHEIMSDLATVQEEVRQKKQARQEGVQYA.........................
S0A053_9CAUD/60-161 ........lat---GGSIISDTFNKPFKFNEFNEGGKHETNSLGGIPQGVGANGKMNTVEEDETSMTtDngkFIFSHRLGVKGDINQY-....-----...------------------------------------------------------vgggmvdpptkltkeavvgdskdfv
A0AAE7V2U3_9CAUD/285-414 ...........HSFGGWLNTQGGDFSNGITMIGNGGTHEQNPFEGVQMGLDPQGVPNLVEEGEVVFN.D...YVFSNRLKVPKKDRKRL....RLKEG...TFANAAEKLQKESAERPNDPISRRGLEASMNRLAGIQEGVRERKSG--------nkfed....................
A0A7M1RXU8_9CAUD/152-287 ..........v-AFGGWVGTHGGDYPTGFSEFNSGGSHETNANGGIPQGIGPNGNPNLVEEGETKWD.D...YVFSQRLKVPKGFGKAYd.lgRVDKK...SYADASKKLSKESEERPFDPISKRGRDAMLGRLQQAQEAQKSIDKADEAMNEI-f........................
A0A7M1S008_9CAUD/434-569 .........fg--DGGLMGTHGADFTNGVTVINNGGYHEENPHEGVQIGVDYNGIPNLVEEGEVIYN.D...YVFSNRIKVPDSVKQKY....KLKGGkglTFADAAKRIQKESEERPNDPISKRGLEDGLIKLMQEQEALREQGQYGLEGVQYA.........................
A0A7M1RXZ2_9CAUD/22-153 ...........HAFGGWLNTHGGDFNNGVTIIDEGGTHEQNPNEGVQIGVDQQGVPNLVEQGEVIYN.D...YVFSNRIKLPESIKKKY....KLKGD...TFADAAKYAQLESQERPNDPISKRGLEANMSRLAEAQEGIKNRRGKG-DTNKF-e........................
A0A7M1RVA6_9CAUD/294-425 ...........HDFGGWLNTHGGDFNNGVTIIDEGGTHEQNPNEGVQMGVDQQGVPNLVEQGEVIYN.D...YVFSNRIKLPESIKKKY....KLKGD...TFADAAKYAQLESQERPNDPISKRGLEANMSRLAEAQEGIKNRRGQG-DSNKF-e........................
A0A7M1RZ43_9CAUD/486-618 ..........y-ELGGPINTHGGVFDNGVTIVGNGGTHEENPLEGVQMGVDEQGIPNLVEEGEVIFN.D...YVFSNRMKAPKNLKKRY....KFKGK...TFADVAKSIQKESEERPNDPISKAGLDVNMARLAMSQEEVRNSKMKKRTSNKYA.........................
A0A7M1S1F0_9CAUD/258-394 ...........HAFGGELGTNGTDWTNGVTIFGEGGTHEENPNQGIPQGVDQNGVPNLVEEGEVKYQ.D...YIFSNRLKADKEILEMVglpdKYKRK...KYSDLAEKASKESEERPNDPISKAGLEDSMLRLQVAQEVQRAKKKGN-------kfskggr..................
A0AAF0BFF5_9CAUD/424-556 ..........n-AFGGWLNTQGGDFTNGVTFIDEGGSHEENPYQGIQIGVDPEGAPNLVEQGEVVYD.D...YVFSDRMEIPDDIRKEY....KLRGK...TFAKAAKSAQRESEERPNDPLSTRGLQAAMERIATAQEEARERKEAHREGNEY-p........................
A0AA36DUI4_CYLNA/1002-1136 kpvgtgvyhnh--------------CKGGTLVNTGGSHQENPYGGVQVGVDPNGIPNLLEEGEPVYD.D...YVFSDNIYADGGILDKFiipkKYAGM...LYSEAADKILSEAELRPNDPISNNGLRVMLHRLADAQEAQKQKKEAAKLK----rqink....................
#=GC seq_cons ..........h.thGG.lsTpGGDFsNGVTllsEGGoHEENPhpGVQhGVDssGhPNLVEEGEVlaN.D...YVFSNRl+lPcsl+KcY....KlKsc...TFA-AAK+hpKESEERPNDPIS+RGLcssMtRLtpAQEtlRp+Kput.puspa..........................
//